Exemplo n.º 1
0
/*@
  DLOOP_Dataloop_create_named - create a dataloop for a "named" type
  if necessary.

  "named" types are ones for which MPI_Type_get_envelope() returns a
  combiner of MPI_COMBINER_NAMED. some types that fit this category,
  such as MPI_SHORT_INT, have multiple elements with potential gaps
  and padding. these types need dataloops for correct processing.
@*/
static void DLOOP_Dataloop_create_named(MPI_Datatype type,
					DLOOP_Dataloop **dlp_p,
					int *dlsz_p,
					int *dldepth_p,
					int flag)
{
    DLOOP_Dataloop *dlp;

    /* special case: pairtypes need dataloops too.
     *
     * note: not dealing with MPI_2INT because size == extent
     *       in all cases for that type.
     *
     * note: MPICH always precreates these, so we will never call
     *       Dataloop_create_pairtype() from here in the MPICH
     *       case.
     */
    if (type == MPI_FLOAT_INT || type == MPI_DOUBLE_INT ||
	type == MPI_LONG_INT || type == MPI_SHORT_INT ||
	type == MPI_LONG_DOUBLE_INT)
    {
	DLOOP_Handle_get_loopptr_macro(type, dlp, flag);
	if (dlp != NULL) {
	    /* dataloop already created; just return it. */
	    *dlp_p = dlp;
	    DLOOP_Handle_get_loopsize_macro(type, *dlsz_p, flag);
	    DLOOP_Handle_get_loopdepth_macro(type, *dldepth_p, flag);
	}
	else {
	    PREPEND_PREFIX(Dataloop_create_pairtype)(type,
						     dlp_p,
						     dlsz_p,
						     dldepth_p,
						     flag);
	}
	return;
    }
    /* no other combiners need dataloops; exit. */
    else {
	*dlp_p = NULL;
	*dlsz_p = 0;
	*dldepth_p = 0;
	return;
    }
}
Exemplo n.º 2
0
/*@
   Dataloop_contiguous - create the dataloop representation for a
   contiguous datatype

   Input Parameters:
+  int icount,
.  DLOOP_Type oldtype
-  int flag

   Output Parameters:
+  DLOOP_Dataloop **dlp_p,
.  DLOOP_Size *dlsz_p,
-  int *dldepth_p,


.N Errors
.N Returns 0 on success, -1 on failure.
@*/
int PREPEND_PREFIX(Dataloop_create_contiguous)(DLOOP_Count icount,
					       DLOOP_Type oldtype,
					       DLOOP_Dataloop **dlp_p,
					       DLOOP_Size *dlsz_p,
					       int *dldepth_p,
					       int flag)
{
    DLOOP_Count count;
    int is_builtin, apply_contig_coalescing = 0;
    int new_loop_depth;
    DLOOP_Size new_loop_sz;

    DLOOP_Dataloop *new_dlp;

    count = icount;

    is_builtin = (DLOOP_Handle_hasloop_macro(oldtype)) ? 0 : 1;

    if (is_builtin)
    {
	new_loop_depth = 1;
    }
    else
    {
	int old_loop_depth = 0;
	DLOOP_Offset old_size = 0, old_extent = 0;
	DLOOP_Dataloop *old_loop_ptr;

	DLOOP_Handle_get_loopdepth_macro(oldtype, old_loop_depth, flag);
	DLOOP_Handle_get_loopptr_macro(oldtype, old_loop_ptr, flag);
	DLOOP_Handle_get_size_macro(oldtype, old_size);
	DLOOP_Handle_get_extent_macro(oldtype, old_extent);

	/* if we have a simple combination of contigs, coalesce */
	if (((old_loop_ptr->kind & DLOOP_KIND_MASK) == DLOOP_KIND_CONTIG)
	    && (old_size == old_extent))
	{
	    /* will just copy contig and multiply count */
	    apply_contig_coalescing = 1;
	    new_loop_depth          = old_loop_depth;
	}
	else
	{
	    new_loop_depth = old_loop_depth + 1;
	}
    }

    if (is_builtin)
    {
	DLOOP_Offset basic_sz = 0;

	PREPEND_PREFIX(Dataloop_alloc)(DLOOP_KIND_CONTIG,
				       count,
				       &new_dlp,
				       &new_loop_sz);
	/* --BEGIN ERROR HANDLING-- */
	if (!new_dlp) return -1;
	/* --END ERROR HANDLING-- */

	DLOOP_Handle_get_size_macro(oldtype, basic_sz);
	new_dlp->kind = DLOOP_KIND_CONTIG | DLOOP_FINAL_MASK;

	if (flag == DLOOP_DATALOOP_ALL_BYTES)
	{
	    count             *= basic_sz;
	    new_dlp->el_size   = 1;
	    new_dlp->el_extent = 1;
	    new_dlp->el_type   = MPI_BYTE;
	}
	else
	{
	    new_dlp->el_size   = basic_sz;
	    new_dlp->el_extent = new_dlp->el_size;
	    new_dlp->el_type   = oldtype;
	}

	new_dlp->loop_params.c_t.count = count;
    }
    else
    {
	/* user-defined base type (oldtype) */
	DLOOP_Dataloop *old_loop_ptr;
	MPI_Aint old_loop_sz = 0;

	DLOOP_Handle_get_loopptr_macro(oldtype, old_loop_ptr, flag);
	DLOOP_Handle_get_loopsize_macro(oldtype, old_loop_sz, flag);

	if (apply_contig_coalescing)
	{
	    /* make a copy of the old loop and multiply the count */
	    PREPEND_PREFIX(Dataloop_dup)(old_loop_ptr,
					 old_loop_sz,
					 &new_dlp);
	    /* --BEGIN ERROR HANDLING-- */
	    if (!new_dlp) return -1;
	    /* --END ERROR HANDLING-- */

	    new_dlp->loop_params.c_t.count *= count;

	    new_loop_sz = old_loop_sz;
	    DLOOP_Handle_get_loopdepth_macro(oldtype, new_loop_depth, flag);
	}
	else
	{
	    /* allocate space for new loop including copy of old */
	    PREPEND_PREFIX(Dataloop_alloc_and_copy)(DLOOP_KIND_CONTIG,
						    count,
						    old_loop_ptr,
						    old_loop_sz,
						    &new_dlp,
						    &new_loop_sz);
	    /* --BEGIN ERROR HANDLING-- */
	    if (!new_dlp) return -1;
	    /* --END ERROR HANDLING-- */

	    new_dlp->kind = DLOOP_KIND_CONTIG;
	    DLOOP_Handle_get_size_macro(oldtype, new_dlp->el_size);
	    DLOOP_Handle_get_extent_macro(oldtype, new_dlp->el_extent);
	    DLOOP_Handle_get_basic_type_macro(oldtype, new_dlp->el_type);
	    
	    new_dlp->loop_params.c_t.count = count;
	}
    }

    *dlp_p     = new_dlp;
    *dlsz_p    = new_loop_sz;
    *dldepth_p = new_loop_depth;

    return 0;
}
/*@
   Dataloop_create_blockindexed - create blockindexed dataloop

   Arguments:
+  DLOOP_Count count
.  void *displacement_array (array of either MPI_Aints or ints)
.  int displacement_in_bytes (boolean)
.  MPI_Datatype old_type
.  DLOOP_Dataloop **output_dataloop_ptr
.  int output_dataloop_size
.  int output_dataloop_depth
-  int flag

.N Errors
.N Returns 0 on success, -1 on failure.
@*/
int MPIR_Dataloop_create_blockindexed(DLOOP_Count icount,
                                      DLOOP_Count iblklen,
                                      const void *disp_array,
                                      int dispinbytes,
                                      DLOOP_Type oldtype,
                                      DLOOP_Dataloop ** dlp_p,
                                      DLOOP_Size * dlsz_p, int *dldepth_p, int flag)
{
    int err, is_builtin, is_vectorizable = 1;
    int i, old_loop_depth;
    DLOOP_Size new_loop_sz;

    DLOOP_Count contig_count, count, blklen;
    DLOOP_Offset old_extent, eff_disp0, eff_disp1, last_stride;
    DLOOP_Dataloop *new_dlp;

    count = (DLOOP_Count) icount;       /* avoid subsequent casting */
    blklen = (DLOOP_Count) iblklen;

    /* if count or blklen are zero, handle with contig code, call it a int */
    if (count == 0 || blklen == 0) {
        err = MPIR_Dataloop_create_contiguous(0, MPI_INT, dlp_p, dlsz_p, dldepth_p, flag);
        return err;
    }

    is_builtin = (DLOOP_Handle_hasloop_macro(oldtype)) ? 0 : 1;

    if (is_builtin) {
        DLOOP_Handle_get_size_macro(oldtype, old_extent);
        old_loop_depth = 0;
    } else {
        DLOOP_Handle_get_extent_macro(oldtype, old_extent);
        DLOOP_Handle_get_loopdepth_macro(oldtype, old_loop_depth);
    }

    contig_count = MPIR_Type_blockindexed_count_contig(count,
                                                       blklen, disp_array, dispinbytes, old_extent);

    /* optimization:
     *
     * if contig_count == 1 and block starts at displacement 0,
     * store it as a contiguous rather than a blockindexed dataloop.
     */
    if ((contig_count == 1) &&
        ((!dispinbytes && ((int *) disp_array)[0] == 0) ||
         (dispinbytes && ((MPI_Aint *) disp_array)[0] == 0))) {
        err = MPIR_Dataloop_create_contiguous(icount * iblklen,
                                              oldtype, dlp_p, dlsz_p, dldepth_p, flag);
        return err;
    }

    /* optimization:
     *
     * if contig_count == 1 store it as a blockindexed with one
     * element rather than as a lot of individual blocks.
     */
    if (contig_count == 1) {
        /* adjust count and blklen and drop through */
        blklen *= count;
        count = 1;
        iblklen *= icount;
        icount = 1;
    }

    /* optimization:
     *
     * if displacements start at zero and result in a fixed stride,
     * store it as a vector rather than a blockindexed dataloop.
     */
    eff_disp0 = (dispinbytes) ? ((DLOOP_Offset) ((MPI_Aint *) disp_array)[0]) :
        (((DLOOP_Offset) ((int *) disp_array)[0]) * old_extent);

    if (count > 1 && eff_disp0 == (DLOOP_Offset) 0) {
        eff_disp1 = (dispinbytes) ?
            ((DLOOP_Offset) ((MPI_Aint *) disp_array)[1]) :
            (((DLOOP_Offset) ((int *) disp_array)[1]) * old_extent);
        last_stride = eff_disp1 - eff_disp0;

        for (i = 2; i < count; i++) {
            eff_disp0 = eff_disp1;
            eff_disp1 = (dispinbytes) ?
                ((DLOOP_Offset) ((MPI_Aint *) disp_array)[i]) :
                (((DLOOP_Offset) ((int *) disp_array)[i]) * old_extent);
            if (eff_disp1 - eff_disp0 != last_stride) {
                is_vectorizable = 0;
                break;
            }
        }
        if (is_vectorizable) {
            err = MPIR_Dataloop_create_vector(count, blklen, last_stride, 1,    /* strideinbytes */
                                              oldtype, dlp_p, dlsz_p, dldepth_p, flag);
            return err;
        }
    }

    /* TODO: optimization:
     *
     * if displacements result in a fixed stride, but first displacement
     * is not zero, store it as a blockindexed (blklen == 1) of a vector.
     */

    /* TODO: optimization:
     *
     * if a blockindexed of a contig, absorb the contig into the blocklen
     * parameter and keep the same overall depth
     */

    /* otherwise storing as a blockindexed dataloop */

    /* Q: HOW CAN WE TELL IF IT IS WORTH IT TO STORE AS AN
     * INDEXED WITH FEWER CONTIG BLOCKS (IF CONTIG_COUNT IS SMALL)?
     */

    if (is_builtin) {
        MPIR_Dataloop_alloc(DLOOP_KIND_BLOCKINDEXED, count, &new_dlp, &new_loop_sz);
        /* --BEGIN ERROR HANDLING-- */
        if (!new_dlp)
            return -1;
        /* --END ERROR HANDLING-- */

        new_dlp->kind = DLOOP_KIND_BLOCKINDEXED | DLOOP_FINAL_MASK;

        if (flag == DLOOP_DATALOOP_ALL_BYTES) {
            blklen *= old_extent;
            new_dlp->el_size = 1;
            new_dlp->el_extent = 1;
            new_dlp->el_type = MPI_BYTE;
        } else {
            new_dlp->el_size = old_extent;
            new_dlp->el_extent = old_extent;
            new_dlp->el_type = oldtype;
        }
    } else {
        DLOOP_Dataloop *old_loop_ptr = NULL;
        MPI_Aint old_loop_sz = 0;

        DLOOP_Handle_get_loopptr_macro(oldtype, old_loop_ptr);
        DLOOP_Handle_get_loopsize_macro(oldtype, old_loop_sz);

        MPIR_Dataloop_alloc_and_copy(DLOOP_KIND_BLOCKINDEXED,
                                     count, old_loop_ptr, old_loop_sz, &new_dlp, &new_loop_sz);
        /* --BEGIN ERROR HANDLING-- */
        if (!new_dlp)
            return -1;
        /* --END ERROR HANDLING-- */

        new_dlp->kind = DLOOP_KIND_BLOCKINDEXED;

        DLOOP_Handle_get_size_macro(oldtype, new_dlp->el_size);
        DLOOP_Handle_get_extent_macro(oldtype, new_dlp->el_extent);
        DLOOP_Handle_get_basic_type_macro(oldtype, new_dlp->el_type);
    }

    new_dlp->loop_params.bi_t.count = count;
    new_dlp->loop_params.bi_t.blocksize = blklen;

    /* copy in displacement parameters
     *
     * regardless of dispinbytes, we store displacements in bytes in loop.
     */
    DLOOP_Type_blockindexed_array_copy(count,
                                       disp_array,
                                       new_dlp->loop_params.bi_t.offset_array,
                                       dispinbytes, old_extent);

    *dlp_p = new_dlp;
    *dlsz_p = new_loop_sz;
    *dldepth_p = old_loop_depth + 1;

    return 0;
}
Exemplo n.º 4
0
int PREPEND_PREFIX(Dataloop_create_indexed)(DLOOP_Count icount,
					    const DLOOP_Size *blocklength_array,
					    const void *displacement_array,
					    int dispinbytes,
					    MPI_Datatype oldtype,
					    DLOOP_Dataloop **dlp_p,
					    DLOOP_Size *dlsz_p,
					    int *dldepth_p,
					    int flag)
{
    int err, is_builtin;
    int old_loop_depth;
    MPI_Aint i;
    DLOOP_Size new_loop_sz, blksz;
    DLOOP_Count first;

    DLOOP_Count old_type_count = 0, contig_count, count;
    DLOOP_Offset old_extent;
    struct DLOOP_Dataloop *new_dlp;

    count = (DLOOP_Count) icount; /* avoid subsequent casting */


    /* if count is zero, handle with contig code, call it an int */
    if (count == 0)
    {
	err = PREPEND_PREFIX(Dataloop_create_contiguous)(0,
							 MPI_INT,
							 dlp_p,
							 dlsz_p,
							 dldepth_p,
							 flag);
	return err;
    }

    /* Skip any initial zero-length blocks */
    for (first = 0; first < count; first++)
        if ((DLOOP_Count) blocklength_array[first])
            break;
    

    is_builtin = (DLOOP_Handle_hasloop_macro(oldtype)) ? 0 : 1;

    if (is_builtin)
    {
	DLOOP_Handle_get_extent_macro(oldtype, old_extent);
	old_loop_depth = 0;
    }
    else
    {
	DLOOP_Handle_get_extent_macro(oldtype, old_extent);
	DLOOP_Handle_get_loopdepth_macro(oldtype, old_loop_depth, flag);
    }

    for (i=first; i < count; i++)
    {
	old_type_count += (DLOOP_Count) blocklength_array[i];
    }

    contig_count = PREPEND_PREFIX(Type_indexed_count_contig)(count,
                                                             blocklength_array,
                                                             displacement_array,
                                                             dispinbytes,
                                                             old_extent);

    /* if contig_count is zero (no data), handle with contig code */
    if (contig_count == 0)
    {
	err = PREPEND_PREFIX(Dataloop_create_contiguous)(0,
							 MPI_INT,
							 dlp_p,
							 dlsz_p,
							 dldepth_p,
							 flag);
	return err;
    }

    /* optimization:
     *
     * if contig_count == 1 and block starts at displacement 0,
     * store it as a contiguous rather than an indexed dataloop.
     */    
    if ((contig_count == 1) &&
	((!dispinbytes && ((int *) displacement_array)[first] == 0) ||
	 (dispinbytes && ((MPI_Aint *) displacement_array)[first] == 0)))
    {
	err = PREPEND_PREFIX(Dataloop_create_contiguous)(old_type_count,
							 oldtype,
							 dlp_p,
							 dlsz_p,
							 dldepth_p,
							 flag);
	return err;
    }

    /* optimization:
     *
     * if contig_count == 1 (and displacement != 0), store this as
     * a single element blockindexed rather than a lot of individual
     * blocks.
     */
    if (contig_count == 1)
    {
        const void *disp_arr_tmp; /* no ternary assignment to avoid clang warnings */
        if (dispinbytes)
            disp_arr_tmp = &(((const MPI_Aint *)displacement_array)[first]);
        else
            disp_arr_tmp = &(((const int *)displacement_array)[first]);
	err = PREPEND_PREFIX(Dataloop_create_blockindexed)(1,
							   old_type_count,
							   disp_arr_tmp,
							   dispinbytes,
							   oldtype,
							   dlp_p,
							   dlsz_p,
							   dldepth_p,
							   flag);

	return err;
    }

    /* optimization:
     *
     * if block length is the same for all blocks, store it as a
     * blockindexed rather than an indexed dataloop.
     */
    blksz = blocklength_array[first];
    for (i = first+1; i < count; i++)
    {
	if (blocklength_array[i] != blksz)
	{
	    blksz--;
	    break;
	}
    }
    if (blksz == blocklength_array[first])
    {
        const void *disp_arr_tmp; /* no ternary assignment to avoid clang warnings */
        if (dispinbytes)
            disp_arr_tmp = &(((const MPI_Aint *)displacement_array)[first]);
        else
            disp_arr_tmp = &(((const int *)displacement_array)[first]);
	err = PREPEND_PREFIX(Dataloop_create_blockindexed)(icount-first,
							   blksz,
							   disp_arr_tmp,
							   dispinbytes,
							   oldtype,
							   dlp_p,
							   dlsz_p,
							   dldepth_p,
							   flag);

	return err;
    }

    /* note: blockindexed looks for the vector optimization */

    /* TODO: optimization:
     *
     * if an indexed of a contig, absorb the contig into the blocklen array
     * and keep the same overall depth
     */

    /* otherwise storing as an indexed dataloop */

    if (is_builtin)
    {
	PREPEND_PREFIX(Dataloop_alloc)(DLOOP_KIND_INDEXED,
				       count,
				       &new_dlp,
				       &new_loop_sz);
	/* --BEGIN ERROR HANDLING-- */
	if (!new_dlp) return -1;
	/* --END ERROR HANDLING-- */

	new_dlp->kind = DLOOP_KIND_INDEXED | DLOOP_FINAL_MASK;

	if (flag == DLOOP_DATALOOP_ALL_BYTES)
	{
	    /* blocklengths are modified below */
	    new_dlp->el_size   = 1;
	    new_dlp->el_extent = 1;
	    new_dlp->el_type   = MPI_BYTE;
	}
	else
	{
	    new_dlp->el_size   = old_extent;
	    new_dlp->el_extent = old_extent;
	    new_dlp->el_type   = oldtype;
	}
    }
    else
    {
	DLOOP_Dataloop *old_loop_ptr = NULL;
	MPI_Aint old_loop_sz = 0;

	DLOOP_Handle_get_loopptr_macro(oldtype, old_loop_ptr, flag);
	DLOOP_Handle_get_loopsize_macro(oldtype, old_loop_sz, flag);

	PREPEND_PREFIX(Dataloop_alloc_and_copy)(DLOOP_KIND_INDEXED,
						contig_count,
						old_loop_ptr,
						old_loop_sz,
						&new_dlp,
						&new_loop_sz);
	/* --BEGIN ERROR HANDLING-- */
	if (!new_dlp) return -1;
	/* --END ERROR HANDLING-- */

	new_dlp->kind = DLOOP_KIND_INDEXED;

	DLOOP_Handle_get_size_macro(oldtype, new_dlp->el_size);
	DLOOP_Handle_get_extent_macro(oldtype, new_dlp->el_extent);
	DLOOP_Handle_get_basic_type_macro(oldtype, new_dlp->el_type);
    }

    new_dlp->loop_params.i_t.count        = contig_count;
    new_dlp->loop_params.i_t.total_blocks = old_type_count;

    /* copy in blocklength and displacement parameters (in that order)
     *
     * regardless of dispinbytes, we store displacements in bytes in loop.
     */
    DLOOP_Type_indexed_array_copy(count,
				  contig_count,
				  blocklength_array,
				  displacement_array,
				  new_dlp->loop_params.i_t.blocksize_array,
				  new_dlp->loop_params.i_t.offset_array,
				  dispinbytes,
				  old_extent);

    if (is_builtin && (flag == DLOOP_DATALOOP_ALL_BYTES))
    {
	DLOOP_Count *tmp_blklen_array =
	    new_dlp->loop_params.i_t.blocksize_array;

	for (i=0; i < contig_count; i++)
	{
	    /* increase block lengths so they are in bytes */
	    tmp_blklen_array[i] *= old_extent;
	}

        new_dlp->loop_params.i_t.total_blocks *= old_extent;
    }

    *dlp_p     = new_dlp;
    *dlsz_p    = new_loop_sz;
    *dldepth_p = old_loop_depth + 1;

    return MPI_SUCCESS;
}
Exemplo n.º 5
0
/*@
  Dataloop_create_struct - create the dataloop representation for a
  struct datatype

Input Parameters:
+ count - number of blocks in vector
. blklens - number of elements in each block
. disps - offsets of blocks from start of type in bytes
- oldtypes - types (using handle) of datatypes on which vector is based

Output Parameters:
+ dlp_p - pointer to address in which to place pointer to new dataloop
- dlsz_p - pointer to address in which to place size of new dataloop

  Return Value:
  0 on success, -1 on failure.

  Notes:
  This function relies on others, like Dataloop_create_indexed, to create
  types in some cases. This call (like all the rest) takes int blklens
  and MPI_Aint displacements, so it's possible to overflow when working
  with a particularly large struct type in some cases. This isn't detected
  or corrected in this code at this time.

@*/
int MPIR_Dataloop_create_struct(DLOOP_Count count,
					   const int *blklens,
					   const MPI_Aint *disps,
					   const DLOOP_Type *oldtypes,
					   DLOOP_Dataloop **dlp_p,
					   MPI_Aint *dlsz_p,
					   int *dldepth_p,
					   int flag)
{
    int err, i, nr_basics = 0, nr_derived = 0, type_pos = 0;

    DLOOP_Type first_basic = MPI_DATATYPE_NULL,
	first_derived = MPI_DATATYPE_NULL;

    /* variables used in general case only */
    int loop_idx, new_loop_depth;
    int old_loop_depth = 0;
    MPI_Aint new_loop_sz, old_loop_sz = 0;

    DLOOP_Dataloop *new_dlp, *curpos;

    /* if count is zero, handle with contig code, call it a int */
    if (count == 0)
    {
	err = MPIR_Dataloop_create_contiguous(0,
							 MPI_INT,
							 dlp_p,
							 dlsz_p,
							 dldepth_p,
							 flag);
	return err;
    }

    /* browse the old types and characterize */
    for (i=0; i < count; i++)
    {
	/* ignore type elements with a zero blklen */
	if (blklens[i] == 0) continue;

	if (oldtypes[i] != MPI_LB && oldtypes[i] != MPI_UB)
	{
	    int is_builtin;

	    is_builtin =
		(DLOOP_Handle_hasloop_macro(oldtypes[i])) ? 0 : 1;

	    if (is_builtin)
	    {
		if (nr_basics == 0)
		{
		    first_basic = oldtypes[i];
		    type_pos = i;
		}
		else if (oldtypes[i] != first_basic)
		{
		    first_basic = MPI_DATATYPE_NULL;
		}
		nr_basics++;
	    }
	    else /* derived type */
	    {
		if (nr_derived == 0)
		{
		    first_derived = oldtypes[i];
		    type_pos = i;
		}
		else if (oldtypes[i] != first_derived)
		{
		    first_derived = MPI_DATATYPE_NULL;
		}
		nr_derived++;
	    }
	}
    }

    /* note on optimizations:
     *
     * because LB, UB, and extent calculations are handled as part of
     * the Datatype, we can safely ignore them in all our calculations
     * here.
     */

    /* optimization:
     *
     * if there were only MPI_LBs and MPI_UBs in the struct type,
     * treat it as a zero-element contiguous (just as count == 0).
     */
    if (nr_basics == 0 && nr_derived == 0)
    {
	err = MPIR_Dataloop_create_contiguous(0,
							 MPI_INT,
							 dlp_p,
							 dlsz_p,
							 dldepth_p,
							 flag);
	return err;
    }

    /* optimization:
     *
     * if there is only one unique instance of a type in the struct, treat it
     * as a blockindexed type.
     *
     * notes:
     *
     * if the displacement happens to be zero, the blockindexed code will
     * optimize this into a contig.
     */
    if (nr_basics + nr_derived == 1)
    {
	/* type_pos is index to only real type in array */
	err = MPIR_Dataloop_create_blockindexed
	    (1, /* count */
	     blklens[type_pos],
	     &disps[type_pos],
	     1, /* displacement in bytes */
	     oldtypes[type_pos],
	     dlp_p,
	     dlsz_p,
	     dldepth_p,
	     flag);

	return err;
    }

    /* optimization:
     *
     * if there only one unique type (more than one instance) in the
     * struct, treat it as an indexed type.
     *
     * notes:
     *
     * this will apply to a single type with an LB/UB, as those
     * are handled elsewhere.
     *
     */
    if (((nr_derived == 0) && (first_basic != MPI_DATATYPE_NULL)) ||
	((nr_basics == 0) && (first_derived != MPI_DATATYPE_NULL)))
    {
	return DLOOP_Dataloop_create_unique_type_struct(count,
							blklens,
							disps,
							oldtypes,
							type_pos,
							dlp_p,
							dlsz_p,
							dldepth_p,
							flag);
    }

    /* optimization:
     *
     * if there are no derived types and caller indicated either a
     * homogeneous system or the "all bytes" conversion, convert
     * everything to bytes and use an indexed type.
     */
    if (nr_derived == 0 && ((flag == DLOOP_DATALOOP_HOMOGENEOUS) ||
			    (flag == DLOOP_DATALOOP_ALL_BYTES)))
    {
	return DLOOP_Dataloop_create_basic_all_bytes_struct(count,
							    blklens,
							    disps,
							    oldtypes,
							    dlp_p,
							    dlsz_p,
							    dldepth_p,
							    flag);
    }

    /* optimization:
     *
     * if caller asked for homogeneous or all bytes representation,
     * flatten the type and store it as an indexed type so that
     * there are no branches in the dataloop tree.
     */
    if ((flag == DLOOP_DATALOOP_HOMOGENEOUS) ||
	     (flag == DLOOP_DATALOOP_ALL_BYTES))
    {
	return DLOOP_Dataloop_create_flattened_struct(count,
						      blklens,
						      disps,
						      oldtypes,
						      dlp_p,
						      dlsz_p,
						      dldepth_p,
						      flag);
    }

    /* scan through types and gather derived type info */
    for (i=0; i < count; i++)
    {
	/* ignore type elements with a zero blklen */
	if (blklens[i] == 0) continue;

	if (DLOOP_Handle_hasloop_macro(oldtypes[i]))
	{
	    int tmp_loop_depth;
	    MPI_Aint tmp_loop_sz;

	    DLOOP_Handle_get_loopdepth_macro(oldtypes[i], tmp_loop_depth, flag);
	    DLOOP_Handle_get_loopsize_macro(oldtypes[i], tmp_loop_sz, flag);

	    if (tmp_loop_depth > old_loop_depth)
	    {
		old_loop_depth = tmp_loop_depth;
	    }
	    old_loop_sz += tmp_loop_sz;
	}
    }

    /* general case below: 2 or more distinct types that are either
     * basics or derived, and for which we want to preserve the types
     * themselves.
     */

    if (nr_basics > 0)
    {
	/* basics introduce an extra level of depth, so if our new depth
	 * must be at least 2 if there are basics.
	 */
	new_loop_depth = ((old_loop_depth+1) > 2) ? (old_loop_depth+1) : 2;
    }
    else
    {
	new_loop_depth = old_loop_depth + 1;
    }

    MPIR_Dataloop_struct_alloc((DLOOP_Count) nr_basics + nr_derived,
					  old_loop_sz,
					  nr_basics,
					  &curpos,
					  &new_dlp,
					  &new_loop_sz);
    /* --BEGIN ERROR HANDLING-- */
    if (!new_dlp)
    {
	return DLOOP_Dataloop_create_struct_memory_error();
    }
    /* --END ERROR HANDLING-- */


    new_dlp->kind = DLOOP_KIND_STRUCT;
    new_dlp->el_size = -1; /* not valid for struct */
    new_dlp->el_extent = -1; /* not valid for struct; see el_extent_array */
    new_dlp->el_type = MPI_DATATYPE_NULL; /* not valid for struct */

    new_dlp->loop_params.s_t.count = (DLOOP_Count) nr_basics + nr_derived;

    /* note: curpos points to first byte in "old dataloop" region of
     * newly allocated space.
     */

    for (i=0, loop_idx = 0; i < count; i++)
    {
	int is_builtin;

	/* ignore type elements with a zero blklen */
	if (blklens[i] == 0) continue;

	is_builtin = (DLOOP_Handle_hasloop_macro(oldtypes[i])) ? 0 : 1;

	if (is_builtin)
	{
	    DLOOP_Dataloop *dummy_dlp;
	    int dummy_depth;
	    MPI_Aint dummy_sz;

	    /* LBs and UBs already taken care of -- skip them */
	    if (oldtypes[i] == MPI_LB || oldtypes[i] == MPI_UB)
	    {
		continue;
	    }

	    /* build a contig dataloop for this basic and point to that
	     *
	     * optimization:
	     *
	     * push the count (blklen) from the struct down into the
	     * contig so we can process more at the leaf.
	     */
	    err = MPIR_Dataloop_create_contiguous(blklens[i],
							     oldtypes[i],
							     &dummy_dlp,
							     &dummy_sz,
							     &dummy_depth,
							     flag);

	    /* --BEGIN ERROR HANDLING-- */
	    if (err) {
		/* TODO: FREE ALLOCATED RESOURCES */
		return -1;
	    }
	    /* --END ERROR HANDLING-- */

	    /* copy the new contig loop into place in the struct memory
	     * region
	     */
	    MPIR_Dataloop_copy(curpos, dummy_dlp, dummy_sz);
	    new_dlp->loop_params.s_t.dataloop_array[loop_idx] = curpos;
	    curpos = (DLOOP_Dataloop *) ((char *) curpos + dummy_sz);

	    /* we stored the block size in the contig -- use 1 here */
	    new_dlp->loop_params.s_t.blocksize_array[loop_idx] = 1;
	    new_dlp->loop_params.s_t.el_extent_array[loop_idx] =
		((DLOOP_Offset) blklens[i]) * dummy_dlp->el_extent;
	    MPIR_Dataloop_free(&dummy_dlp);
	}
	else
	{
	    DLOOP_Dataloop *old_loop_ptr;
	    DLOOP_Offset old_extent;

	    DLOOP_Handle_get_loopptr_macro(oldtypes[i], old_loop_ptr, flag);
	    DLOOP_Handle_get_loopsize_macro(oldtypes[i], old_loop_sz, flag);
	    DLOOP_Handle_get_extent_macro(oldtypes[i], old_extent);

	    MPIR_Dataloop_copy(curpos, old_loop_ptr, old_loop_sz);
	    new_dlp->loop_params.s_t.dataloop_array[loop_idx] = curpos;
	    curpos = (DLOOP_Dataloop *) ((char *) curpos + old_loop_sz);

	    new_dlp->loop_params.s_t.blocksize_array[loop_idx] =
		(DLOOP_Count) blklens[i];
	    new_dlp->loop_params.s_t.el_extent_array[loop_idx] =
		old_extent;
	}
	new_dlp->loop_params.s_t.offset_array[loop_idx] =
	    (DLOOP_Offset) disps[i];
	loop_idx++;
    }

    *dlp_p     = new_dlp;
    *dlsz_p    = new_loop_sz;
    *dldepth_p = new_loop_depth;

    return 0;
}
Exemplo n.º 6
0
/*@
   Dataloop_create_vector

   Arguments:
+  int icount
.  int iblocklength
.  MPI_Aint astride
.  int strideinbytes
.  MPI_Datatype oldtype
.  DLOOP_Dataloop **dlp_p
.  int *dlsz_p
.  int *dldepth_p
-  int flag

   Returns 0 on success, -1 on failure.

@*/
int PREPEND_PREFIX(Dataloop_create_vector)(int icount,
        int iblocklength,
        MPI_Aint astride,
        int strideinbytes,
        DLOOP_Type oldtype,
        DLOOP_Dataloop **dlp_p,
        int *dlsz_p,
        int *dldepth_p,
        int flag)
{
    int err, is_builtin;
    int new_loop_sz, new_loop_depth;

    DLOOP_Count count, blocklength;
    DLOOP_Offset stride;
    DLOOP_Dataloop *new_dlp;

    count       = (DLOOP_Count) icount; /* avoid subsequent casting */
    blocklength = (DLOOP_Count) iblocklength;
    stride      = (DLOOP_Offset) astride;

    /* if count or blocklength are zero, handle with contig code,
     * call it a int
     */
    if (count == 0 || blocklength == 0)
    {

        err = PREPEND_PREFIX(Dataloop_create_contiguous)(0,
                MPI_INT,
                dlp_p,
                dlsz_p,
                dldepth_p,
                flag);
        return err;
    }

    /* optimization:
     *
     * if count == 1, store as a contiguous rather than a vector dataloop.
     */
    if (count == 1) {
        err = PREPEND_PREFIX(Dataloop_create_contiguous)(iblocklength,
                oldtype,
                dlp_p,
                dlsz_p,
                dldepth_p,
                flag);
        return err;
    }

    is_builtin = (DLOOP_Handle_hasloop_macro(oldtype)) ? 0 : 1;

    if (is_builtin) {
        new_loop_sz = sizeof(DLOOP_Dataloop);
        new_loop_depth = 1;
    }
    else {
        int old_loop_sz = 0, old_loop_depth = 0;

        DLOOP_Handle_get_loopsize_macro(oldtype, old_loop_sz, flag);
        DLOOP_Handle_get_loopdepth_macro(oldtype, old_loop_depth, flag);

        /* TODO: ACCOUNT FOR PADDING IN LOOP_SZ HERE */
        new_loop_sz = sizeof(DLOOP_Dataloop) + old_loop_sz;
        new_loop_depth = old_loop_depth + 1;
    }


    if (is_builtin) {
        DLOOP_Offset basic_sz = 0;

        PREPEND_PREFIX(Dataloop_alloc)(DLOOP_KIND_VECTOR,
                                       count,
                                       &new_dlp,
                                       &new_loop_sz);
        /* --BEGIN ERROR HANDLING-- */
        if (!new_dlp) return -1;
        /* --END ERROR HANDLING-- */

        DLOOP_Handle_get_size_macro(oldtype, basic_sz);
        new_dlp->kind = DLOOP_KIND_VECTOR | DLOOP_FINAL_MASK;

        if (flag == DLOOP_DATALOOP_ALL_BYTES)
        {

            blocklength       *= basic_sz;
            new_dlp->el_size   = 1;
            new_dlp->el_extent = 1;
            new_dlp->el_type   = MPI_BYTE;

            if(!strideinbytes)
                /* the stride was specified in units of oldtype, now
                   that we're using bytes, rather than oldtype, we
                   need to update stride. */
                stride *= basic_sz;
        }
        else
        {
            new_dlp->el_size   = basic_sz;
            new_dlp->el_extent = new_dlp->el_size;
            new_dlp->el_type   = oldtype;
        }
    }
    else { /* user-defined base type (oldtype) */
        DLOOP_Dataloop *old_loop_ptr;
        int old_loop_sz = 0;

        DLOOP_Handle_get_loopptr_macro(oldtype, old_loop_ptr, flag);
        DLOOP_Handle_get_loopsize_macro(oldtype, old_loop_sz, flag);

        PREPEND_PREFIX(Dataloop_alloc_and_copy)(DLOOP_KIND_VECTOR,
                                                count,
                                                old_loop_ptr,
                                                old_loop_sz,
                                                &new_dlp,
                                                &new_loop_sz);
        /* --BEGIN ERROR HANDLING-- */
        if (!new_dlp) return -1;
        /* --END ERROR HANDLING-- */

        new_dlp->kind = DLOOP_KIND_VECTOR;
        DLOOP_Handle_get_size_macro(oldtype, new_dlp->el_size);
        DLOOP_Handle_get_extent_macro(oldtype, new_dlp->el_extent);
        DLOOP_Handle_get_basic_type_macro(oldtype, new_dlp->el_type);
    }

    /* vector-specific members
     *
     * stride stored in dataloop is always in bytes for local rep of type
     */
    new_dlp->loop_params.v_t.count     = count;
    new_dlp->loop_params.v_t.blocksize = blocklength;
    new_dlp->loop_params.v_t.stride    = (strideinbytes) ? stride :
                                         stride * new_dlp->el_extent;

    *dlp_p     = new_dlp;
    *dlsz_p    = new_loop_sz;
    *dldepth_p = new_loop_depth;

    return 0;
}
Exemplo n.º 7
0
void PREPEND_PREFIX(Dataloop_create)(MPI_Datatype type,
				     DLOOP_Dataloop **dlp_p,
				     int *dlsz_p,
				     int *dldepth_p,
				     int flag)
{
    int i;
    int err;

    int nr_ints, nr_aints, nr_types, combiner;
    MPI_Datatype *types;
    int *ints;
    MPI_Aint *aints;

    DLOOP_Dataloop *old_dlp;
    int old_dlsz, old_dldepth;

    int dummy1, dummy2, dummy3, type0_combiner, ndims;
    MPI_Datatype tmptype;

    MPI_Aint stride;
    MPI_Aint *disps;

    MPIR_Type_get_envelope_impl(type, &nr_ints, &nr_aints, &nr_types, &combiner);

    /* some named types do need dataloops; handle separately. */
    if (combiner == MPI_COMBINER_NAMED) {
	DLOOP_Dataloop_create_named(type, dlp_p, dlsz_p, dldepth_p, flag);
	return;
    }
    else if (combiner == MPI_COMBINER_F90_REAL ||
             combiner == MPI_COMBINER_F90_COMPLEX ||
             combiner == MPI_COMBINER_F90_INTEGER)
    {
        MPI_Datatype f90basetype;
        DLOOP_Handle_get_basic_type_macro(type, f90basetype);
        PREPEND_PREFIX(Dataloop_create_contiguous)(1 /* count */,
                                                   f90basetype,
                                                   dlp_p, dlsz_p,
                                                   dldepth_p,
                                                   flag);
        return;
    }

    /* Q: should we also check for "hasloop", or is the COMBINER
     *    check above enough to weed out everything that wouldn't
     *    have a loop?
     */
    DLOOP_Handle_get_loopptr_macro(type, old_dlp, flag);
    if (old_dlp != NULL) {
	/* dataloop already created; just return it. */
	*dlp_p = old_dlp;
	DLOOP_Handle_get_loopsize_macro(type, *dlsz_p, flag);
	DLOOP_Handle_get_loopdepth_macro(type, *dldepth_p, flag);
	return;
    }

    PREPEND_PREFIX(Type_access_contents)(type, &ints, &aints, &types);

    /* first check for zero count on types where that makes sense */
    switch(combiner) {
	case MPI_COMBINER_CONTIGUOUS:
	case MPI_COMBINER_VECTOR:
	case MPI_COMBINER_HVECTOR_INTEGER:
	case MPI_COMBINER_HVECTOR:
	case MPI_COMBINER_INDEXED_BLOCK:
	case MPI_COMBINER_HINDEXED_BLOCK:
	case MPI_COMBINER_INDEXED:
	case MPI_COMBINER_HINDEXED_INTEGER:
	case MPI_COMBINER_HINDEXED:
	case MPI_COMBINER_STRUCT_INTEGER:
	case MPI_COMBINER_STRUCT:
	    if (ints[0] == 0) {
		PREPEND_PREFIX(Dataloop_create_contiguous)(0,
							   MPI_INT,
							   dlp_p,
							   dlsz_p,
							   dldepth_p,
							   flag);
		goto clean_exit;
	    }
	    break;
	default:
	    break;
    }

    /* recurse, processing types "below" this one before processing
     * this one, if those type don't already have dataloops.
     *
     * note: in the struct case below we'll handle any additional
     *       types "below" the current one.
     */
    MPIR_Type_get_envelope_impl(types[0], &dummy1, &dummy2, &dummy3, &type0_combiner);
    if (type0_combiner != MPI_COMBINER_NAMED)
    {
	DLOOP_Handle_get_loopptr_macro(types[0], old_dlp, flag);
	if (old_dlp == NULL)
	{
	    /* no dataloop already present; create and store one */
	    PREPEND_PREFIX(Dataloop_create)(types[0],
					    &old_dlp,
					    &old_dlsz,
					    &old_dldepth,
					    flag);

	    DLOOP_Handle_set_loopptr_macro(types[0], old_dlp, flag);
	    DLOOP_Handle_set_loopsize_macro(types[0], old_dlsz, flag);
	    DLOOP_Handle_set_loopdepth_macro(types[0], old_dldepth, flag);
	}
	else {
	    DLOOP_Handle_get_loopsize_macro(types[0], old_dlsz, flag);
	    DLOOP_Handle_get_loopdepth_macro(types[0], old_dldepth, flag);
	}
    }
       
    switch(combiner)
    {
	case MPI_COMBINER_DUP:
	    if (type0_combiner != MPI_COMBINER_NAMED) {
		PREPEND_PREFIX(Dataloop_dup)(old_dlp, old_dlsz, dlp_p);
		*dlsz_p    = old_dlsz;
		*dldepth_p = old_dldepth;
	    }
	    else {
		PREPEND_PREFIX(Dataloop_create_contiguous)(1,
							   types[0], 
							   dlp_p, dlsz_p,
							   dldepth_p,
							   flag);
	    }
	    break;
	case MPI_COMBINER_RESIZED:
	    if (type0_combiner != MPI_COMBINER_NAMED) {
		PREPEND_PREFIX(Dataloop_dup)(old_dlp, old_dlsz, dlp_p);
		*dlsz_p    = old_dlsz;
		*dldepth_p = old_dldepth;
	    }
	    else {
		PREPEND_PREFIX(Dataloop_create_contiguous)(1,
							   types[0], 
							   dlp_p, dlsz_p,
							   dldepth_p,
							   flag);

		(*dlp_p)->el_extent = aints[1]; /* extent */
	    }
	    break;
	case MPI_COMBINER_CONTIGUOUS:
	    PREPEND_PREFIX(Dataloop_create_contiguous)(ints[0] /* count */,
						       types[0] /* oldtype */,
						       dlp_p, dlsz_p,
						       dldepth_p,
						       flag);
	    break;
	case MPI_COMBINER_VECTOR:
	    PREPEND_PREFIX(Dataloop_create_vector)(ints[0] /* count */,
						   ints[1] /* blklen */,
						   ints[2] /* stride */,
						   0 /* stride not bytes */,
						   types[0] /* oldtype */,
						   dlp_p, dlsz_p, dldepth_p,
						   flag);
	    break;
	case MPI_COMBINER_HVECTOR_INTEGER:
	case MPI_COMBINER_HVECTOR:
	    /* fortran hvector has integer stride in bytes */
	    if (combiner == MPI_COMBINER_HVECTOR_INTEGER) {
		stride = (MPI_Aint) ints[2];
	    }
	    else {
		stride = aints[0];
	    }

	    PREPEND_PREFIX(Dataloop_create_vector)(ints[0] /* count */,
						   ints[1] /* blklen */,
						   stride,
						   1 /* stride in bytes */,
						   types[0] /* oldtype */,
						   dlp_p, dlsz_p, dldepth_p,
						   flag);
	    break;
	case MPI_COMBINER_INDEXED_BLOCK:
	    PREPEND_PREFIX(Dataloop_create_blockindexed)(ints[0] /* count */,
							 ints[1] /* blklen */,
							 &ints[2] /* disps */,
							 0 /* disp not bytes */,
							 types[0] /* oldtype */,
							 dlp_p, dlsz_p,
							 dldepth_p,
							 flag);
	    break;
	case MPI_COMBINER_HINDEXED_BLOCK:
            disps = (MPI_Aint *) DLOOP_Malloc(ints[0] * sizeof(MPI_Aint));
            for (i = 0; i < ints[0]; i++)
                disps[i] = aints[i];
	    PREPEND_PREFIX(Dataloop_create_blockindexed)(ints[0] /* count */,
							 ints[1] /* blklen */,
							 disps /* disps */,
							 1 /* disp is bytes */,
							 types[0] /* oldtype */,
							 dlp_p, dlsz_p,
							 dldepth_p,
							 flag);
            DLOOP_Free(disps);
	    break;
	case MPI_COMBINER_INDEXED:
	    PREPEND_PREFIX(Dataloop_create_indexed)(ints[0] /* count */,
						    &ints[1] /* blklens */,
						    &ints[ints[0]+1] /* disp */,
						    0 /* disp not in bytes */,
						    types[0] /* oldtype */,
						    dlp_p, dlsz_p, dldepth_p,
						    flag);
	    break;
	case MPI_COMBINER_HINDEXED_INTEGER:
	case MPI_COMBINER_HINDEXED:
	    if (combiner == MPI_COMBINER_HINDEXED_INTEGER) {
		disps = (MPI_Aint *) DLOOP_Malloc(ints[0] * sizeof(MPI_Aint));

		for (i=0; i < ints[0]; i++) {
		    disps[i] = (MPI_Aint) ints[ints[0] + 1 + i];
		}
	    }
	    else {
		disps = aints;
	    }

	    PREPEND_PREFIX(Dataloop_create_indexed)(ints[0] /* count */,
						    &ints[1] /* blklens */,
						    disps,
						    1 /* disp in bytes */,
						    types[0] /* oldtype */,
						    dlp_p, dlsz_p, dldepth_p,
						    flag);

	    if (combiner == MPI_COMBINER_HINDEXED_INTEGER) {
		DLOOP_Free(disps);
	    }

	    break;
	case MPI_COMBINER_STRUCT_INTEGER:
	case MPI_COMBINER_STRUCT:
	    for (i = 1; i < ints[0]; i++) {
		int type_combiner;
		MPIR_Type_get_envelope_impl(types[i], &dummy1, &dummy2, &dummy3, &type_combiner);

		if (type_combiner != MPI_COMBINER_NAMED) {
		    DLOOP_Handle_get_loopptr_macro(types[i], old_dlp, flag);
		    if (old_dlp == NULL)
		    {
			PREPEND_PREFIX(Dataloop_create)(types[i],
							&old_dlp,
							&old_dlsz,
							&old_dldepth,
							flag);
			
			DLOOP_Handle_set_loopptr_macro(types[i], old_dlp,
						       flag);
			DLOOP_Handle_set_loopsize_macro(types[i], old_dlsz,
							flag);
			DLOOP_Handle_set_loopdepth_macro(types[i], old_dldepth,
							 flag);
		    }
		}
	    }
	    if (combiner == MPI_COMBINER_STRUCT_INTEGER) {
		disps = (MPI_Aint *) DLOOP_Malloc(ints[0] * sizeof(MPI_Aint));

		for (i=0; i < ints[0]; i++) {
		    disps[i] = (MPI_Aint) ints[ints[0] + 1 + i];
		}
	    }
	    else {
		disps = aints;
	    }

            err = PREPEND_PREFIX(Dataloop_create_struct)(ints[0] /* count */,
                                                         &ints[1] /* blklens */,
                                                         disps,
                                                         types /* oldtype array */,
                                                         dlp_p, dlsz_p, dldepth_p,
                                                         flag);
            /* TODO if/when this function returns error codes, propagate this failure instead */
            DLOOP_Assert(0 == err);
            /* if (err) return err; */

	    if (combiner == MPI_COMBINER_STRUCT_INTEGER) {
		DLOOP_Free(disps);
	    }
	    break;
	case MPI_COMBINER_SUBARRAY:
	    ndims = ints[0];
	    PREPEND_PREFIX(Type_convert_subarray)(ndims,
						  &ints[1] /* sizes */,
						  &ints[1+ndims] /* subsizes */,
						  &ints[1+2*ndims] /* starts */,
						  ints[1+3*ndims] /* order */,
						  types[0],
						  &tmptype);

	    PREPEND_PREFIX(Dataloop_create)(tmptype,
					    dlp_p,
					    dlsz_p,
					    dldepth_p,
					    flag);
	    
	    MPIR_Type_free_impl(&tmptype);
	    break;
	case MPI_COMBINER_DARRAY:
	    ndims = ints[2];
	    PREPEND_PREFIX(Type_convert_darray)(ints[0] /* size */,
						ints[1] /* rank */,
						ndims,
						&ints[3] /* gsizes */,
						&ints[3+ndims] /*distribs */,
						&ints[3+2*ndims] /* dargs */,
						&ints[3+3*ndims] /* psizes */,
						ints[3+4*ndims] /* order */,
						types[0],
						&tmptype);

	    PREPEND_PREFIX(Dataloop_create)(tmptype,
					    dlp_p,
					    dlsz_p,
					    dldepth_p,
					    flag);

	    MPIR_Type_free_impl(&tmptype);
	    break;
	default:
	    DLOOP_Assert(0);
	    break;
    }

 clean_exit:

    PREPEND_PREFIX(Type_release_contents)(type, &ints, &aints, &types);

    /* for now we just leave the intermediate dataloops in place.
     * could remove them to save space if we wanted.
     */

    return;
}