Ejemplo n.º 1
0
void PREPEND_PREFIX(Segment_unpack)(DLOOP_Segment *segp,
				    DLOOP_Offset   first,
				    DLOOP_Offset  *lastp,
				    void *streambuf)
{
    struct PREPEND_PREFIX(m2m_params) params;

    DBG_SEGMENT(printf( "Segment_unpack...\n" ));
    /* experimenting with discarding buf value in the segment, keeping in
     * per-use structure instead. would require moving the parameters around a
     * bit.
     */
    params.userbuf   = segp->ptr;
    params.streambuf = streambuf;
    params.direction = DLOOP_M2M_TO_USERBUF;

    PREPEND_PREFIX(Segment_manipulate)(segp, first, lastp,
				       PREPEND_PREFIX(Segment_contig_m2m),
				       PREPEND_PREFIX(Segment_vector_m2m),
				       PREPEND_PREFIX(Segment_blkidx_m2m),
				       PREPEND_PREFIX(Segment_index_m2m),
				       NULL, /* size fn */
				       &params);
    return;
}
Ejemplo n.º 2
0
/*@
  Dataloop_copy - Copy an arbitrary dataloop structure, updating
  pointers as necessary

Input Parameters:
+ dest   - pointer to destination region
. src    - pointer to original dataloop structure
- size   - size of dataloop structure

  This routine parses the dataloop structure as it goes in order to
  determine what exactly it needs to update.

  Notes:
  It assumes that the source dataloop was allocated in our usual way;
  this means that the entire dataloop is in a contiguous region and that
  the root of the tree is first in the array.

  This has some implications:
+ we can use a contiguous copy mechanism to copy the majority of the
  structure
- all pointers in the region are relative to the start of the data region
  the first dataloop in the array is the root of the tree
@*/
void PREPEND_PREFIX(Dataloop_copy)(void *dest,
				   void *src,
				   DLOOP_Size size)
{
    DLOOP_Offset ptrdiff;

#ifdef DLOOP_DEBUG_MEMORY
    DLOOP_dbg_printf("DLOOP_Dataloop_copy: copying from %x to %x (%z bytes).\n",
		     (int) src, (int) dest, (size_t)size);
#endif

    /* copy region first */
    DLOOP_Memcpy(dest, src, size);

    /* Calculate difference in starting locations. DLOOP_Dataloop_update()
     * then traverses the new structure and updates internal pointers by
     * adding this difference to them. This way we can just copy the
     * structure, including pointers, in one big block.
     */
    ptrdiff = (DLOOP_Offset) ((char *) dest - (char *) src);

    /* traverse structure updating pointers */
    PREPEND_PREFIX(Dataloop_update)(dest, ptrdiff);

    return;
}
Ejemplo n.º 3
0
/* MPID_Segment_mpi_flatten - flatten a type into a representation
 *                            appropriate for passing to hindexed create.
 *
 * NOTE: blocks will be in units of bytes when returned.
 *
 * WARNING: there's potential for overflow here as we convert from
 *          various types into an index of bytes.
 *
 * Parameters:
 * segp    - pointer to segment structure
 * first   - first byte in segment to pack
 * lastp   - in/out parameter describing last byte to pack (and afterwards
 *           the last byte _actually_ packed)
 *           NOTE: actually returns index of byte _after_ last one packed
 * blklens, disps - the usual blocklength and displacement arrays for MPI
 * lengthp - in/out parameter describing length of array (and afterwards
 *           the amount of the array that has actual data)
 */
void PREPEND_PREFIX(Segment_mpi_flatten)(DLOOP_Segment *segp,
					 DLOOP_Offset first,
					 DLOOP_Offset *lastp,
					 DLOOP_Size *blklens,
					 MPI_Aint *disps,
					 DLOOP_Size *lengthp)
{
    struct PREPEND_PREFIX(mpi_flatten_params) params;

    DLOOP_Assert(*lengthp > 0);

    params.index   = 0;
    params.length  = *lengthp;
    params.blklens = blklens;
    params.disps   = disps;

    PREPEND_PREFIX(Segment_manipulate)(segp,
				       first,
				       lastp,
				       DLOOP_Leaf_contig_mpi_flatten,
				       DLOOP_Leaf_vector_mpi_flatten,
				       DLOOP_Leaf_blkidx_mpi_flatten,
				       DLOOP_Leaf_index_mpi_flatten,
				       NULL,
				       &params);

    /* last value already handled by MPID_Segment_manipulate */
    *lengthp = params.index;
    return;
}
Ejemplo n.º 4
0
/* DLOOP_Segment_count_contig_blocks()
 *
 * Count number of contiguous regions in segment between first and last.
 */
void PREPEND_PREFIX(Segment_count_contig_blocks)(DLOOP_Segment *segp,
						 DLOOP_Offset first,
						 DLOOP_Offset *lastp,
						 DLOOP_Count *countp)
{
    struct PREPEND_PREFIX(contig_blocks_params) params;

    params.count    = 0;
    params.last_loc = 0;

    /* FIXME: The blkidx and index functions are not used since they
     * optimize the count by coalescing contiguous segments, while
     * functions using the count do not optimize in the same way
     * (e.g., flatten code) */
    PREPEND_PREFIX(Segment_manipulate)(segp,
				       first,
				       lastp,
				       DLOOP_Leaf_contig_count_block,
				       DLOOP_Leaf_vector_count_block,
				       DLOOP_Leaf_blkidx_count_block,
				       DLOOP_Leaf_index_count_block,
				       NULL, /* size fn */
				       (void *) &params);

    *countp = params.count;
    return;
}
Ejemplo n.º 5
0
/*@
  Dataloop_alloc - allocate the resources used to store a dataloop with
                   no old loops associated with it.

Input Parameters:
+ kind          - kind of dataloop to allocate
. count         - number of elements in dataloop (kind dependent)
. new_loop_p    - address at which to store new dataloop pointer
- new_loop_sz_p - pointer to integer in which to store new loop size

  Notes:
  The count parameter passed into this function will often be different
  from the count passed in at the MPI layer due to optimizations.
@*/
void PREPEND_PREFIX(Dataloop_alloc)(int kind,
				    DLOOP_Count count,
				    DLOOP_Dataloop **new_loop_p,
				    MPI_Aint *new_loop_sz_p)
{
    PREPEND_PREFIX(Dataloop_alloc_and_copy)(kind,
					    count,
					    NULL,
					    0,
					    new_loop_p,
					    new_loop_sz_p);
    return;
}
Ejemplo n.º 6
0
/*@
  DLOOP_Dataloop_create_named - create a dataloop for a "named" type
  if necessary.

  "named" types are ones for which MPI_Type_get_envelope() returns a
  combiner of MPI_COMBINER_NAMED. some types that fit this category,
  such as MPI_SHORT_INT, have multiple elements with potential gaps
  and padding. these types need dataloops for correct processing.
@*/
static void DLOOP_Dataloop_create_named(MPI_Datatype type,
					DLOOP_Dataloop **dlp_p,
					int *dlsz_p,
					int *dldepth_p,
					int flag)
{
    DLOOP_Dataloop *dlp;

    /* special case: pairtypes need dataloops too.
     *
     * note: not dealing with MPI_2INT because size == extent
     *       in all cases for that type.
     *
     * note: MPICH always precreates these, so we will never call
     *       Dataloop_create_pairtype() from here in the MPICH
     *       case.
     */
    if (type == MPI_FLOAT_INT || type == MPI_DOUBLE_INT ||
	type == MPI_LONG_INT || type == MPI_SHORT_INT ||
	type == MPI_LONG_DOUBLE_INT)
    {
	DLOOP_Handle_get_loopptr_macro(type, dlp, flag);
	if (dlp != NULL) {
	    /* dataloop already created; just return it. */
	    *dlp_p = dlp;
	    DLOOP_Handle_get_loopsize_macro(type, *dlsz_p, flag);
	    DLOOP_Handle_get_loopdepth_macro(type, *dldepth_p, flag);
	}
	else {
	    PREPEND_PREFIX(Dataloop_create_pairtype)(type,
						     dlp_p,
						     dlsz_p,
						     dldepth_p,
						     flag);
	}
	return;
    }
    /* no other combiners need dataloops; exit. */
    else {
	*dlp_p = NULL;
	*dlsz_p = 0;
	*dldepth_p = 0;
	return;
    }
}
/*@
   Dataloop_create_pairtype - create dataloop for a pairtype

   Arguments:
+  MPI_Datatype type - the pairtype
.  DLOOP_Dataloop **output_dataloop_ptr
.  int output_dataloop_size
.  int output_dataloop_depth
-  int flag

.N Errors
.N Returns 0 on success, -1 on failure.

   Note:
   This function simply creates the appropriate input parameters for
   use with Dataloop_create_struct and then calls that function.

   This same function could be used to create dataloops for any type
   that actually consists of two distinct elements.
@*/
int PREPEND_PREFIX(Dataloop_create_pairtype)(MPI_Datatype type,
        DLOOP_Dataloop **dlp_p,
        int *dlsz_p,
        int *dldepth_p,
        int flag)
{
    int blocks[2] = { 1, 1 };
    MPI_Aint disps[2];
    MPI_Datatype types[2];

    DLOOP_Assert(type == MPI_FLOAT_INT || type == MPI_DOUBLE_INT ||
                 type == MPI_LONG_INT || type == MPI_SHORT_INT ||
                 type == MPI_LONG_DOUBLE_INT || type == MPI_2INT);

    switch(type) {
    case MPI_FLOAT_INT:
        PAIRTYPE_CONTENTS(MPI_FLOAT, float, MPI_INT, int);
        break;
    case MPI_DOUBLE_INT:
        PAIRTYPE_CONTENTS(MPI_DOUBLE, double, MPI_INT, int);
        break;
    case MPI_LONG_INT:
        PAIRTYPE_CONTENTS(MPI_LONG, long, MPI_INT, int);
        break;
    case MPI_SHORT_INT:
        PAIRTYPE_CONTENTS(MPI_SHORT, short, MPI_INT, int);
        break;
    case MPI_LONG_DOUBLE_INT:
        PAIRTYPE_CONTENTS(MPI_LONG_DOUBLE, long double, MPI_INT, int);
        break;
    case MPI_2INT:
        PAIRTYPE_CONTENTS(MPI_INT, int, MPI_INT, int);
        break;
    }

    return PREPEND_PREFIX(Dataloop_create_struct)(2,
            blocks,
            disps,
            types,
            dlp_p,
            dlsz_p,
            dldepth_p,
            flag);
}
Ejemplo n.º 8
0
/*@
  Dataloop_dup - make a copy of a dataloop

  Returns 0 on success, -1 on failure.
@*/
void PREPEND_PREFIX(Dataloop_dup)(DLOOP_Dataloop *old_loop,
				  DLOOP_Count old_loop_sz,
				  DLOOP_Dataloop **new_loop_p)
{
    DLOOP_Dataloop *new_loop;

    DLOOP_Assert(old_loop != NULL);
    DLOOP_Assert(old_loop_sz > 0);

    new_loop = (DLOOP_Dataloop *) DLOOP_Malloc(old_loop_sz);
    if (new_loop == NULL) {
	*new_loop_p = NULL;
	return;
    }

    PREPEND_PREFIX(Dataloop_copy)(new_loop, old_loop, old_loop_sz);
    *new_loop_p = new_loop;
    return;
}
Ejemplo n.º 9
0
/*@
   Dataloop_contiguous - create the dataloop representation for a
   contiguous datatype

   Input Parameters:
+  int icount,
.  DLOOP_Type oldtype
-  int flag

   Output Parameters:
+  DLOOP_Dataloop **dlp_p,
.  DLOOP_Size *dlsz_p,
-  int *dldepth_p,


.N Errors
.N Returns 0 on success, -1 on failure.
@*/
int PREPEND_PREFIX(Dataloop_create_contiguous)(DLOOP_Count icount,
					       DLOOP_Type oldtype,
					       DLOOP_Dataloop **dlp_p,
					       DLOOP_Size *dlsz_p,
					       int *dldepth_p,
					       int flag)
{
    DLOOP_Count count;
    int is_builtin, apply_contig_coalescing = 0;
    int new_loop_depth;
    DLOOP_Size new_loop_sz;

    DLOOP_Dataloop *new_dlp;

    count = icount;

    is_builtin = (DLOOP_Handle_hasloop_macro(oldtype)) ? 0 : 1;

    if (is_builtin)
    {
	new_loop_depth = 1;
    }
    else
    {
	int old_loop_depth = 0;
	DLOOP_Offset old_size = 0, old_extent = 0;
	DLOOP_Dataloop *old_loop_ptr;

	DLOOP_Handle_get_loopdepth_macro(oldtype, old_loop_depth, flag);
	DLOOP_Handle_get_loopptr_macro(oldtype, old_loop_ptr, flag);
	DLOOP_Handle_get_size_macro(oldtype, old_size);
	DLOOP_Handle_get_extent_macro(oldtype, old_extent);

	/* if we have a simple combination of contigs, coalesce */
	if (((old_loop_ptr->kind & DLOOP_KIND_MASK) == DLOOP_KIND_CONTIG)
	    && (old_size == old_extent))
	{
	    /* will just copy contig and multiply count */
	    apply_contig_coalescing = 1;
	    new_loop_depth          = old_loop_depth;
	}
	else
	{
	    new_loop_depth = old_loop_depth + 1;
	}
    }

    if (is_builtin)
    {
	DLOOP_Offset basic_sz = 0;

	PREPEND_PREFIX(Dataloop_alloc)(DLOOP_KIND_CONTIG,
				       count,
				       &new_dlp,
				       &new_loop_sz);
	/* --BEGIN ERROR HANDLING-- */
	if (!new_dlp) return -1;
	/* --END ERROR HANDLING-- */

	DLOOP_Handle_get_size_macro(oldtype, basic_sz);
	new_dlp->kind = DLOOP_KIND_CONTIG | DLOOP_FINAL_MASK;

	if (flag == DLOOP_DATALOOP_ALL_BYTES)
	{
	    count             *= basic_sz;
	    new_dlp->el_size   = 1;
	    new_dlp->el_extent = 1;
	    new_dlp->el_type   = MPI_BYTE;
	}
	else
	{
	    new_dlp->el_size   = basic_sz;
	    new_dlp->el_extent = new_dlp->el_size;
	    new_dlp->el_type   = oldtype;
	}

	new_dlp->loop_params.c_t.count = count;
    }
    else
    {
	/* user-defined base type (oldtype) */
	DLOOP_Dataloop *old_loop_ptr;
	MPI_Aint old_loop_sz = 0;

	DLOOP_Handle_get_loopptr_macro(oldtype, old_loop_ptr, flag);
	DLOOP_Handle_get_loopsize_macro(oldtype, old_loop_sz, flag);

	if (apply_contig_coalescing)
	{
	    /* make a copy of the old loop and multiply the count */
	    PREPEND_PREFIX(Dataloop_dup)(old_loop_ptr,
					 old_loop_sz,
					 &new_dlp);
	    /* --BEGIN ERROR HANDLING-- */
	    if (!new_dlp) return -1;
	    /* --END ERROR HANDLING-- */

	    new_dlp->loop_params.c_t.count *= count;

	    new_loop_sz = old_loop_sz;
	    DLOOP_Handle_get_loopdepth_macro(oldtype, new_loop_depth, flag);
	}
	else
	{
	    /* allocate space for new loop including copy of old */
	    PREPEND_PREFIX(Dataloop_alloc_and_copy)(DLOOP_KIND_CONTIG,
						    count,
						    old_loop_ptr,
						    old_loop_sz,
						    &new_dlp,
						    &new_loop_sz);
	    /* --BEGIN ERROR HANDLING-- */
	    if (!new_dlp) return -1;
	    /* --END ERROR HANDLING-- */

	    new_dlp->kind = DLOOP_KIND_CONTIG;
	    DLOOP_Handle_get_size_macro(oldtype, new_dlp->el_size);
	    DLOOP_Handle_get_extent_macro(oldtype, new_dlp->el_extent);
	    DLOOP_Handle_get_basic_type_macro(oldtype, new_dlp->el_type);
	    
	    new_dlp->loop_params.c_t.count = count;
	}
    }

    *dlp_p     = new_dlp;
    *dlsz_p    = new_loop_sz;
    *dldepth_p = new_loop_depth;

    return 0;
}
Ejemplo n.º 10
0
int PREPEND_PREFIX(Dataloop_create_indexed)(DLOOP_Count icount,
					    const DLOOP_Size *blocklength_array,
					    const void *displacement_array,
					    int dispinbytes,
					    MPI_Datatype oldtype,
					    DLOOP_Dataloop **dlp_p,
					    DLOOP_Size *dlsz_p,
					    int *dldepth_p,
					    int flag)
{
    int err, is_builtin;
    int old_loop_depth;
    MPI_Aint i;
    DLOOP_Size new_loop_sz, blksz;
    DLOOP_Count first;

    DLOOP_Count old_type_count = 0, contig_count, count;
    DLOOP_Offset old_extent;
    struct DLOOP_Dataloop *new_dlp;

    count = (DLOOP_Count) icount; /* avoid subsequent casting */


    /* if count is zero, handle with contig code, call it an int */
    if (count == 0)
    {
	err = PREPEND_PREFIX(Dataloop_create_contiguous)(0,
							 MPI_INT,
							 dlp_p,
							 dlsz_p,
							 dldepth_p,
							 flag);
	return err;
    }

    /* Skip any initial zero-length blocks */
    for (first = 0; first < count; first++)
        if ((DLOOP_Count) blocklength_array[first])
            break;
    

    is_builtin = (DLOOP_Handle_hasloop_macro(oldtype)) ? 0 : 1;

    if (is_builtin)
    {
	DLOOP_Handle_get_extent_macro(oldtype, old_extent);
	old_loop_depth = 0;
    }
    else
    {
	DLOOP_Handle_get_extent_macro(oldtype, old_extent);
	DLOOP_Handle_get_loopdepth_macro(oldtype, old_loop_depth, flag);
    }

    for (i=first; i < count; i++)
    {
	old_type_count += (DLOOP_Count) blocklength_array[i];
    }

    contig_count = PREPEND_PREFIX(Type_indexed_count_contig)(count,
                                                             blocklength_array,
                                                             displacement_array,
                                                             dispinbytes,
                                                             old_extent);

    /* if contig_count is zero (no data), handle with contig code */
    if (contig_count == 0)
    {
	err = PREPEND_PREFIX(Dataloop_create_contiguous)(0,
							 MPI_INT,
							 dlp_p,
							 dlsz_p,
							 dldepth_p,
							 flag);
	return err;
    }

    /* optimization:
     *
     * if contig_count == 1 and block starts at displacement 0,
     * store it as a contiguous rather than an indexed dataloop.
     */    
    if ((contig_count == 1) &&
	((!dispinbytes && ((int *) displacement_array)[first] == 0) ||
	 (dispinbytes && ((MPI_Aint *) displacement_array)[first] == 0)))
    {
	err = PREPEND_PREFIX(Dataloop_create_contiguous)(old_type_count,
							 oldtype,
							 dlp_p,
							 dlsz_p,
							 dldepth_p,
							 flag);
	return err;
    }

    /* optimization:
     *
     * if contig_count == 1 (and displacement != 0), store this as
     * a single element blockindexed rather than a lot of individual
     * blocks.
     */
    if (contig_count == 1)
    {
        const void *disp_arr_tmp; /* no ternary assignment to avoid clang warnings */
        if (dispinbytes)
            disp_arr_tmp = &(((const MPI_Aint *)displacement_array)[first]);
        else
            disp_arr_tmp = &(((const int *)displacement_array)[first]);
	err = PREPEND_PREFIX(Dataloop_create_blockindexed)(1,
							   old_type_count,
							   disp_arr_tmp,
							   dispinbytes,
							   oldtype,
							   dlp_p,
							   dlsz_p,
							   dldepth_p,
							   flag);

	return err;
    }

    /* optimization:
     *
     * if block length is the same for all blocks, store it as a
     * blockindexed rather than an indexed dataloop.
     */
    blksz = blocklength_array[first];
    for (i = first+1; i < count; i++)
    {
	if (blocklength_array[i] != blksz)
	{
	    blksz--;
	    break;
	}
    }
    if (blksz == blocklength_array[first])
    {
        const void *disp_arr_tmp; /* no ternary assignment to avoid clang warnings */
        if (dispinbytes)
            disp_arr_tmp = &(((const MPI_Aint *)displacement_array)[first]);
        else
            disp_arr_tmp = &(((const int *)displacement_array)[first]);
	err = PREPEND_PREFIX(Dataloop_create_blockindexed)(icount-first,
							   blksz,
							   disp_arr_tmp,
							   dispinbytes,
							   oldtype,
							   dlp_p,
							   dlsz_p,
							   dldepth_p,
							   flag);

	return err;
    }

    /* note: blockindexed looks for the vector optimization */

    /* TODO: optimization:
     *
     * if an indexed of a contig, absorb the contig into the blocklen array
     * and keep the same overall depth
     */

    /* otherwise storing as an indexed dataloop */

    if (is_builtin)
    {
	PREPEND_PREFIX(Dataloop_alloc)(DLOOP_KIND_INDEXED,
				       count,
				       &new_dlp,
				       &new_loop_sz);
	/* --BEGIN ERROR HANDLING-- */
	if (!new_dlp) return -1;
	/* --END ERROR HANDLING-- */

	new_dlp->kind = DLOOP_KIND_INDEXED | DLOOP_FINAL_MASK;

	if (flag == DLOOP_DATALOOP_ALL_BYTES)
	{
	    /* blocklengths are modified below */
	    new_dlp->el_size   = 1;
	    new_dlp->el_extent = 1;
	    new_dlp->el_type   = MPI_BYTE;
	}
	else
	{
	    new_dlp->el_size   = old_extent;
	    new_dlp->el_extent = old_extent;
	    new_dlp->el_type   = oldtype;
	}
    }
    else
    {
	DLOOP_Dataloop *old_loop_ptr = NULL;
	MPI_Aint old_loop_sz = 0;

	DLOOP_Handle_get_loopptr_macro(oldtype, old_loop_ptr, flag);
	DLOOP_Handle_get_loopsize_macro(oldtype, old_loop_sz, flag);

	PREPEND_PREFIX(Dataloop_alloc_and_copy)(DLOOP_KIND_INDEXED,
						contig_count,
						old_loop_ptr,
						old_loop_sz,
						&new_dlp,
						&new_loop_sz);
	/* --BEGIN ERROR HANDLING-- */
	if (!new_dlp) return -1;
	/* --END ERROR HANDLING-- */

	new_dlp->kind = DLOOP_KIND_INDEXED;

	DLOOP_Handle_get_size_macro(oldtype, new_dlp->el_size);
	DLOOP_Handle_get_extent_macro(oldtype, new_dlp->el_extent);
	DLOOP_Handle_get_basic_type_macro(oldtype, new_dlp->el_type);
    }

    new_dlp->loop_params.i_t.count        = contig_count;
    new_dlp->loop_params.i_t.total_blocks = old_type_count;

    /* copy in blocklength and displacement parameters (in that order)
     *
     * regardless of dispinbytes, we store displacements in bytes in loop.
     */
    DLOOP_Type_indexed_array_copy(count,
				  contig_count,
				  blocklength_array,
				  displacement_array,
				  new_dlp->loop_params.i_t.blocksize_array,
				  new_dlp->loop_params.i_t.offset_array,
				  dispinbytes,
				  old_extent);

    if (is_builtin && (flag == DLOOP_DATALOOP_ALL_BYTES))
    {
	DLOOP_Count *tmp_blklen_array =
	    new_dlp->loop_params.i_t.blocksize_array;

	for (i=0; i < contig_count; i++)
	{
	    /* increase block lengths so they are in bytes */
	    tmp_blklen_array[i] *= old_extent;
	}

        new_dlp->loop_params.i_t.total_blocks *= old_extent;
    }

    *dlp_p     = new_dlp;
    *dlsz_p    = new_loop_sz;
    *dldepth_p = old_loop_depth + 1;

    return MPI_SUCCESS;
}
Ejemplo n.º 11
0
/*@
  Dataloop_print - dump a dataloop tree to stdout for debugging
  purposes

Input Parameters:
+ dataloop - root of tree to dump
- depth - starting depth; used to help keep up with where we are in the tree
@*/
void PREPEND_PREFIX(Dataloop_print)(struct DLOOP_Dataloop *dataloop,
				    int depth)
{
    int i;

    if (dataloop == NULL)
    {
        DLOOP_dbg_printf("dataloop is NULL (probably basic type)\n");
        return;
    }

    DLOOP_dbg_printf("loc=%p, treedepth=%d, kind=%d, el_extent=" DLOOP_OFFSET_FMT_DEC_SPEC "\n",
		     dataloop, (int) depth, (int) dataloop->kind, (DLOOP_Offset) dataloop->el_extent);
    switch(dataloop->kind & DLOOP_KIND_MASK) {
	case DLOOP_KIND_CONTIG:
	    DLOOP_dbg_printf("\tCONTIG: count=%d, datatype=%p\n",
			     (int) dataloop->loop_params.c_t.count,
			     dataloop->loop_params.c_t.dataloop);
	    if (!(dataloop->kind & DLOOP_FINAL_MASK))
		PREPEND_PREFIX(Dataloop_print)(dataloop->loop_params.c_t.dataloop, depth+1);
	    break;
	case DLOOP_KIND_VECTOR:
	    DLOOP_dbg_printf("\tVECTOR: count=%d, blksz=%d, stride=" DLOOP_OFFSET_FMT_DEC_SPEC ", datatype=%p\n",
			     (int) dataloop->loop_params.v_t.count,
			     (int) dataloop->loop_params.v_t.blocksize,
			     (DLOOP_Offset) dataloop->loop_params.v_t.stride,
			     dataloop->loop_params.v_t.dataloop);
	    if (!(dataloop->kind & DLOOP_FINAL_MASK))
		PREPEND_PREFIX(Dataloop_print)(dataloop->loop_params.v_t.dataloop, depth+1);
	    break;
	case DLOOP_KIND_BLOCKINDEXED:
	    DLOOP_dbg_printf("\tBLOCKINDEXED: count=%d, blksz=%d, datatype=%p\n",
			     (int) dataloop->loop_params.bi_t.count,
			     (int) dataloop->loop_params.bi_t.blocksize,
			     dataloop->loop_params.bi_t.dataloop);
	    /* print out offsets later */
	    if (!(dataloop->kind & DLOOP_FINAL_MASK))
		PREPEND_PREFIX(Dataloop_print)(dataloop->loop_params.bi_t.dataloop, depth+1);
	    break;
	case DLOOP_KIND_INDEXED:
	    DLOOP_dbg_printf("\tINDEXED: count=%d, datatype=%p\n",
			     (int) dataloop->loop_params.i_t.count,
			     dataloop->loop_params.i_t.dataloop);
	    /* print out blocksizes and offsets later */
	    if (!(dataloop->kind & DLOOP_FINAL_MASK))
		PREPEND_PREFIX(Dataloop_print)(dataloop->loop_params.i_t.dataloop, depth+1);
	    break;
	case DLOOP_KIND_STRUCT:
	    DLOOP_dbg_printf("\tSTRUCT: count=%d\n", (int) dataloop->loop_params.s_t.count);
	    DLOOP_dbg_printf("\tblocksizes:\n");
	    for (i=0; i < dataloop->loop_params.s_t.count; i++)
		DLOOP_dbg_printf("\t\t%d\n", (int) dataloop->loop_params.s_t.blocksize_array[i]);
	    DLOOP_dbg_printf("\toffsets:\n");
	    for (i=0; i < dataloop->loop_params.s_t.count; i++)
		DLOOP_dbg_printf("\t\t" DLOOP_OFFSET_FMT_DEC_SPEC "\n", (DLOOP_Offset) dataloop->loop_params.s_t.offset_array[i]);
	    DLOOP_dbg_printf("\tdatatypes:\n");
	    for (i=0; i < dataloop->loop_params.s_t.count; i++)
		DLOOP_dbg_printf("\t\t%p\n", dataloop->loop_params.s_t.dataloop_array[i]);
	    if (dataloop->kind & DLOOP_FINAL_MASK) break;

	    for (i=0; i < dataloop->loop_params.s_t.count; i++) {
		PREPEND_PREFIX(Dataloop_print)(dataloop->loop_params.s_t.dataloop_array[i],depth+1);
	    }
	    break;
	default:
	    DLOOP_Assert(0);
	    break;
    }
    return;
}
Ejemplo n.º 12
0
/*@
  Dataloop_stream_size - return the size of the data described by the dataloop

Input Parameters:
+ dl_p   - pointer to dataloop for which we will return the size
- sizefn - function for determining size of types in the corresponding stream
           (passing NULL will instead result in el_size values being used)

@*/
DLOOP_Offset
PREPEND_PREFIX(Dataloop_stream_size)(struct DLOOP_Dataloop *dl_p,
				     DLOOP_Offset (*sizefn)(DLOOP_Type el_type))
{
    DLOOP_Offset tmp_sz, tmp_ct = 1;

    for (;;)
    {
        if ((dl_p->kind & DLOOP_KIND_MASK) == DLOOP_KIND_STRUCT)
        {
            int i;

            tmp_sz = 0;
            for (i = 0; i < dl_p->loop_params.s_t.count; i++)
            {
                tmp_sz += (DLOOP_Offset)(dl_p->loop_params.s_t.blocksize_array[i]) *
                    PREPEND_PREFIX(Dataloop_stream_size)(dl_p->loop_params.s_t.dataloop_array[i], sizefn);
            }
            return tmp_sz * tmp_ct;
        }

        switch (dl_p->kind & DLOOP_KIND_MASK) {
        case DLOOP_KIND_CONTIG:
            tmp_ct *= (DLOOP_Offset)(dl_p->loop_params.c_t.count);
#ifdef DLOOP_DEBUG_SIZE
            DLOOP_dbg_printf("stream_size: contig: ct = %d; new tot_ct = " DLOOP_OFFSET_FMT_DEC_SPEC "\n",
                             (int) dl_p->loop_params.c_t.count, (DLOOP_Offset) tmp_ct);
#endif
            break;
        case DLOOP_KIND_VECTOR:
            tmp_ct *= (DLOOP_Offset)(dl_p->loop_params.v_t.count) *
		      (DLOOP_Offset)(dl_p->loop_params.v_t.blocksize);
#ifdef DLOOP_DEBUG_SIZE
            DLOOP_dbg_printf("stream_size: vector: ct = %d; blk = %d; new tot_ct = " DLOOP_OFFSET_FMT_DEC_SPEC "\n",
                             (int) dl_p->loop_params.v_t.count,
                             (int) dl_p->loop_params.v_t.blocksize,
                             (DLOOP_Offset) tmp_ct);
#endif
            break;
        case DLOOP_KIND_BLOCKINDEXED:
            tmp_ct *= (DLOOP_Offset)(dl_p->loop_params.bi_t.count) *
		      (DLOOP_Offset)(dl_p->loop_params.bi_t.blocksize);
#ifdef DLOOP_DEBUG_SIZE
            DLOOP_dbg_printf("stream_size: blkindexed: blks = %d; new tot_ct = " DLOOP_OFFSET_FMT_DEC_SPEC "\n",
                             (int) dl_p->loop_params.bi_t.count *
                             (int) dl_p->loop_params.bi_t.blocksize,
                             (DLOOP_Offset) tmp_ct);
#endif
            break;
        case DLOOP_KIND_INDEXED:
            tmp_ct *= (DLOOP_Offset)(dl_p->loop_params.i_t.total_blocks);
#ifdef DLOOP_DEBUG_SIZE
            DLOOP_dbg_printf("stream_size: contig: blks = %d; new tot_ct = " DLOOP_OFFSET_FMT_DEC_SPEC "\n",
                             (int) dl_p->loop_params.i_t.total_blocks,
                             (DLOOP_Offset) tmp_ct);
#endif
            break;
        default:
            /* --BEGIN ERROR HANDLING-- */
            DLOOP_Assert(0);
            break;
            /* --END ERROR HANDLING-- */
        }

        if (dl_p->kind & DLOOP_FINAL_MASK) break;
        else {
            DLOOP_Assert(dl_p->loop_params.cm_t.dataloop != NULL);
            dl_p = dl_p->loop_params.cm_t.dataloop;
        }
    }

    /* call fn for size using bottom type, or use size if fnptr is NULL */
    tmp_sz = ((sizefn) ? sizefn(dl_p->el_type) : dl_p->el_size);

    return tmp_sz * tmp_ct;
}
Ejemplo n.º 13
0
/*@
  Dataloop_alloc_and_copy - allocate the resources used to store a
                            dataloop and copy in old dataloop as
			    appropriate

Input Parameters:
+ kind          - kind of dataloop to allocate
. count         - number of elements in dataloop (kind dependent)
. old_loop      - pointer to old dataloop (or NULL for none)
. old_loop_sz   - size of old dataloop (should be zero if old_loop is NULL)
. new_loop_p    - address at which to store new dataloop pointer
- new_loop_sz_p - pointer to integer in which to store new loop size

  Notes:
  The count parameter passed into this function will often be different
  from the count passed in at the MPI layer.
@*/
void PREPEND_PREFIX(Dataloop_alloc_and_copy)(int kind,
					     DLOOP_Count count,
					     DLOOP_Dataloop *old_loop,
					     DLOOP_Size old_loop_sz,
					     DLOOP_Dataloop **new_loop_p,
					     DLOOP_Size *new_loop_sz_p)
{
    DLOOP_Size new_loop_sz = 0;
    int align_sz = 8; /* default aligns everything to 8-byte boundaries */
    int epsilon;
    DLOOP_Size loop_sz = sizeof(DLOOP_Dataloop);
    DLOOP_Size off_sz = 0, blk_sz = 0, ptr_sz = 0, extent_sz = 0;

    char *pos;
    DLOOP_Dataloop *new_loop;

#ifdef HAVE_MAX_STRUCT_ALIGNMENT
    if (align_sz > HAVE_MAX_STRUCT_ALIGNMENT) {
	align_sz = HAVE_MAX_STRUCT_ALIGNMENT;
    }
#endif

    if (old_loop != NULL) {
	DLOOP_Assert((old_loop_sz % align_sz) == 0);
    }

    /* calculate the space that we actually need for everything */
    switch (kind) {
	case DLOOP_KIND_STRUCT:
	    /* need space for dataloop pointers and extents */
	    ptr_sz = count * sizeof(DLOOP_Dataloop *);
	    extent_sz = count * sizeof(DLOOP_Offset);
	case DLOOP_KIND_INDEXED:
	    /* need space for block sizes */
	    blk_sz = count * sizeof(DLOOP_Count);
	case DLOOP_KIND_BLOCKINDEXED:
	    /* need space for block offsets */
	    off_sz = count * sizeof(DLOOP_Offset);
	case DLOOP_KIND_CONTIG:
	case DLOOP_KIND_VECTOR:
	    break;
	default:
	    DLOOP_Assert(0);
    }

    /* pad everything that we're going to allocate */
    epsilon = loop_sz % align_sz;
    if (epsilon) loop_sz += align_sz - epsilon;

    epsilon = off_sz % align_sz;
    if (epsilon) off_sz += align_sz - epsilon;

    epsilon = blk_sz % align_sz;
    if (epsilon) blk_sz += align_sz - epsilon;

    epsilon = ptr_sz % align_sz;
    if (epsilon) ptr_sz += align_sz - epsilon;

    epsilon = extent_sz % align_sz;
    if (epsilon) extent_sz += align_sz - epsilon;

    new_loop_sz += loop_sz + off_sz + blk_sz + ptr_sz +
	extent_sz + old_loop_sz;

    /* allocate space */
    new_loop = (DLOOP_Dataloop *) DLOOP_Malloc(new_loop_sz);
    if (new_loop == NULL) {
	*new_loop_p = NULL;
	return;
    }

#ifdef DLOOP_DEBUG_MEMORY
    DLOOP_dbg_printf("DLOOP_Dataloop_alloc_and_copy: new loop @ %x (tot sz = %z, loop = %z, off = %z, blk = %z, ptr = %z, extent = %z, old = %z)\n",
		     (int) new_loop,
		     new_loop_sz,
		     loop_sz,
		     off_sz,
		     blk_sz,
		     ptr_sz,
		     extent_sz,
		     old_loop_sz);
#endif

    /* set all the pointers in the new dataloop structure */
    switch (kind) {
	case DLOOP_KIND_STRUCT:
	    /* order is:
	     * - pointers
	     * - blocks
	     * - offsets
	     * - extents
	     */
	    new_loop->loop_params.s_t.dataloop_array =
		(DLOOP_Dataloop **) (((char *) new_loop) + loop_sz);
	    new_loop->loop_params.s_t.blocksize_array =
		(DLOOP_Count *) (((char *) new_loop) + loop_sz + ptr_sz);
	    new_loop->loop_params.s_t.offset_array =
		(DLOOP_Offset *) (((char *) new_loop) + loop_sz +
				  ptr_sz + blk_sz);
	    new_loop->loop_params.s_t.el_extent_array =
		(DLOOP_Offset *) (((char *) new_loop) + loop_sz +
				  ptr_sz + blk_sz + off_sz);
	    break;
	case DLOOP_KIND_INDEXED:
	    /* order is:
	     * - blocks
	     * - offsets
	     */
	    new_loop->loop_params.i_t.blocksize_array =
		(DLOOP_Count *) (((char *) new_loop) + loop_sz);
	    new_loop->loop_params.i_t.offset_array =
		(DLOOP_Offset *) (((char *) new_loop) + loop_sz + blk_sz);
	    if (old_loop == NULL) {
		new_loop->loop_params.i_t.dataloop = NULL;
	    }
	    else {
		new_loop->loop_params.i_t.dataloop =
		    (DLOOP_Dataloop *) (((char *) new_loop) +
					(new_loop_sz - old_loop_sz));
	    }
	    break;
	case DLOOP_KIND_BLOCKINDEXED:
	    new_loop->loop_params.bi_t.offset_array =
		(DLOOP_Offset *) (((char *) new_loop) + loop_sz);
	    if (old_loop == NULL) {
		new_loop->loop_params.bi_t.dataloop = NULL;
	    }
	    else {
		new_loop->loop_params.bi_t.dataloop =
		    (DLOOP_Dataloop *) (((char *) new_loop) +
					(new_loop_sz - old_loop_sz));
	    }
	    break;
	case DLOOP_KIND_CONTIG:
	    if (old_loop == NULL) {
		new_loop->loop_params.c_t.dataloop = NULL;
	    }
	    else {
		new_loop->loop_params.c_t.dataloop =
		    (DLOOP_Dataloop *) (((char *) new_loop) +
					(new_loop_sz - old_loop_sz));
	    }
	    break;
	case DLOOP_KIND_VECTOR:
	    if (old_loop == NULL) {
		new_loop->loop_params.v_t.dataloop = NULL;
	    }
	    else {
		new_loop->loop_params.v_t.dataloop =
		    (DLOOP_Dataloop *) (((char *) new_loop) +
					(new_loop_sz - old_loop_sz));
	    }
	    break;
	default:
	    DLOOP_Assert(0);
    }

    pos = ((char *) new_loop) + (new_loop_sz - old_loop_sz);
    if (old_loop != NULL) {
	PREPEND_PREFIX(Dataloop_copy)(pos, old_loop, old_loop_sz);
    }

    *new_loop_p    = new_loop;
    *new_loop_sz_p = new_loop_sz;
    return;
}
Ejemplo n.º 14
0
void PREPEND_PREFIX(Dataloop_create)(MPI_Datatype type,
				     DLOOP_Dataloop **dlp_p,
				     int *dlsz_p,
				     int *dldepth_p,
				     int flag)
{
    int i;
    int err;

    int nr_ints, nr_aints, nr_types, combiner;
    MPI_Datatype *types;
    int *ints;
    MPI_Aint *aints;

    DLOOP_Dataloop *old_dlp;
    int old_dlsz, old_dldepth;

    int dummy1, dummy2, dummy3, type0_combiner, ndims;
    MPI_Datatype tmptype;

    MPI_Aint stride;
    MPI_Aint *disps;

    MPIR_Type_get_envelope_impl(type, &nr_ints, &nr_aints, &nr_types, &combiner);

    /* some named types do need dataloops; handle separately. */
    if (combiner == MPI_COMBINER_NAMED) {
	DLOOP_Dataloop_create_named(type, dlp_p, dlsz_p, dldepth_p, flag);
	return;
    }
    else if (combiner == MPI_COMBINER_F90_REAL ||
             combiner == MPI_COMBINER_F90_COMPLEX ||
             combiner == MPI_COMBINER_F90_INTEGER)
    {
        MPI_Datatype f90basetype;
        DLOOP_Handle_get_basic_type_macro(type, f90basetype);
        PREPEND_PREFIX(Dataloop_create_contiguous)(1 /* count */,
                                                   f90basetype,
                                                   dlp_p, dlsz_p,
                                                   dldepth_p,
                                                   flag);
        return;
    }

    /* Q: should we also check for "hasloop", or is the COMBINER
     *    check above enough to weed out everything that wouldn't
     *    have a loop?
     */
    DLOOP_Handle_get_loopptr_macro(type, old_dlp, flag);
    if (old_dlp != NULL) {
	/* dataloop already created; just return it. */
	*dlp_p = old_dlp;
	DLOOP_Handle_get_loopsize_macro(type, *dlsz_p, flag);
	DLOOP_Handle_get_loopdepth_macro(type, *dldepth_p, flag);
	return;
    }

    PREPEND_PREFIX(Type_access_contents)(type, &ints, &aints, &types);

    /* first check for zero count on types where that makes sense */
    switch(combiner) {
	case MPI_COMBINER_CONTIGUOUS:
	case MPI_COMBINER_VECTOR:
	case MPI_COMBINER_HVECTOR_INTEGER:
	case MPI_COMBINER_HVECTOR:
	case MPI_COMBINER_INDEXED_BLOCK:
	case MPI_COMBINER_HINDEXED_BLOCK:
	case MPI_COMBINER_INDEXED:
	case MPI_COMBINER_HINDEXED_INTEGER:
	case MPI_COMBINER_HINDEXED:
	case MPI_COMBINER_STRUCT_INTEGER:
	case MPI_COMBINER_STRUCT:
	    if (ints[0] == 0) {
		PREPEND_PREFIX(Dataloop_create_contiguous)(0,
							   MPI_INT,
							   dlp_p,
							   dlsz_p,
							   dldepth_p,
							   flag);
		goto clean_exit;
	    }
	    break;
	default:
	    break;
    }

    /* recurse, processing types "below" this one before processing
     * this one, if those type don't already have dataloops.
     *
     * note: in the struct case below we'll handle any additional
     *       types "below" the current one.
     */
    MPIR_Type_get_envelope_impl(types[0], &dummy1, &dummy2, &dummy3, &type0_combiner);
    if (type0_combiner != MPI_COMBINER_NAMED)
    {
	DLOOP_Handle_get_loopptr_macro(types[0], old_dlp, flag);
	if (old_dlp == NULL)
	{
	    /* no dataloop already present; create and store one */
	    PREPEND_PREFIX(Dataloop_create)(types[0],
					    &old_dlp,
					    &old_dlsz,
					    &old_dldepth,
					    flag);

	    DLOOP_Handle_set_loopptr_macro(types[0], old_dlp, flag);
	    DLOOP_Handle_set_loopsize_macro(types[0], old_dlsz, flag);
	    DLOOP_Handle_set_loopdepth_macro(types[0], old_dldepth, flag);
	}
	else {
	    DLOOP_Handle_get_loopsize_macro(types[0], old_dlsz, flag);
	    DLOOP_Handle_get_loopdepth_macro(types[0], old_dldepth, flag);
	}
    }
       
    switch(combiner)
    {
	case MPI_COMBINER_DUP:
	    if (type0_combiner != MPI_COMBINER_NAMED) {
		PREPEND_PREFIX(Dataloop_dup)(old_dlp, old_dlsz, dlp_p);
		*dlsz_p    = old_dlsz;
		*dldepth_p = old_dldepth;
	    }
	    else {
		PREPEND_PREFIX(Dataloop_create_contiguous)(1,
							   types[0], 
							   dlp_p, dlsz_p,
							   dldepth_p,
							   flag);
	    }
	    break;
	case MPI_COMBINER_RESIZED:
	    if (type0_combiner != MPI_COMBINER_NAMED) {
		PREPEND_PREFIX(Dataloop_dup)(old_dlp, old_dlsz, dlp_p);
		*dlsz_p    = old_dlsz;
		*dldepth_p = old_dldepth;
	    }
	    else {
		PREPEND_PREFIX(Dataloop_create_contiguous)(1,
							   types[0], 
							   dlp_p, dlsz_p,
							   dldepth_p,
							   flag);

		(*dlp_p)->el_extent = aints[1]; /* extent */
	    }
	    break;
	case MPI_COMBINER_CONTIGUOUS:
	    PREPEND_PREFIX(Dataloop_create_contiguous)(ints[0] /* count */,
						       types[0] /* oldtype */,
						       dlp_p, dlsz_p,
						       dldepth_p,
						       flag);
	    break;
	case MPI_COMBINER_VECTOR:
	    PREPEND_PREFIX(Dataloop_create_vector)(ints[0] /* count */,
						   ints[1] /* blklen */,
						   ints[2] /* stride */,
						   0 /* stride not bytes */,
						   types[0] /* oldtype */,
						   dlp_p, dlsz_p, dldepth_p,
						   flag);
	    break;
	case MPI_COMBINER_HVECTOR_INTEGER:
	case MPI_COMBINER_HVECTOR:
	    /* fortran hvector has integer stride in bytes */
	    if (combiner == MPI_COMBINER_HVECTOR_INTEGER) {
		stride = (MPI_Aint) ints[2];
	    }
	    else {
		stride = aints[0];
	    }

	    PREPEND_PREFIX(Dataloop_create_vector)(ints[0] /* count */,
						   ints[1] /* blklen */,
						   stride,
						   1 /* stride in bytes */,
						   types[0] /* oldtype */,
						   dlp_p, dlsz_p, dldepth_p,
						   flag);
	    break;
	case MPI_COMBINER_INDEXED_BLOCK:
	    PREPEND_PREFIX(Dataloop_create_blockindexed)(ints[0] /* count */,
							 ints[1] /* blklen */,
							 &ints[2] /* disps */,
							 0 /* disp not bytes */,
							 types[0] /* oldtype */,
							 dlp_p, dlsz_p,
							 dldepth_p,
							 flag);
	    break;
	case MPI_COMBINER_HINDEXED_BLOCK:
            disps = (MPI_Aint *) DLOOP_Malloc(ints[0] * sizeof(MPI_Aint));
            for (i = 0; i < ints[0]; i++)
                disps[i] = aints[i];
	    PREPEND_PREFIX(Dataloop_create_blockindexed)(ints[0] /* count */,
							 ints[1] /* blklen */,
							 disps /* disps */,
							 1 /* disp is bytes */,
							 types[0] /* oldtype */,
							 dlp_p, dlsz_p,
							 dldepth_p,
							 flag);
            DLOOP_Free(disps);
	    break;
	case MPI_COMBINER_INDEXED:
	    PREPEND_PREFIX(Dataloop_create_indexed)(ints[0] /* count */,
						    &ints[1] /* blklens */,
						    &ints[ints[0]+1] /* disp */,
						    0 /* disp not in bytes */,
						    types[0] /* oldtype */,
						    dlp_p, dlsz_p, dldepth_p,
						    flag);
	    break;
	case MPI_COMBINER_HINDEXED_INTEGER:
	case MPI_COMBINER_HINDEXED:
	    if (combiner == MPI_COMBINER_HINDEXED_INTEGER) {
		disps = (MPI_Aint *) DLOOP_Malloc(ints[0] * sizeof(MPI_Aint));

		for (i=0; i < ints[0]; i++) {
		    disps[i] = (MPI_Aint) ints[ints[0] + 1 + i];
		}
	    }
	    else {
		disps = aints;
	    }

	    PREPEND_PREFIX(Dataloop_create_indexed)(ints[0] /* count */,
						    &ints[1] /* blklens */,
						    disps,
						    1 /* disp in bytes */,
						    types[0] /* oldtype */,
						    dlp_p, dlsz_p, dldepth_p,
						    flag);

	    if (combiner == MPI_COMBINER_HINDEXED_INTEGER) {
		DLOOP_Free(disps);
	    }

	    break;
	case MPI_COMBINER_STRUCT_INTEGER:
	case MPI_COMBINER_STRUCT:
	    for (i = 1; i < ints[0]; i++) {
		int type_combiner;
		MPIR_Type_get_envelope_impl(types[i], &dummy1, &dummy2, &dummy3, &type_combiner);

		if (type_combiner != MPI_COMBINER_NAMED) {
		    DLOOP_Handle_get_loopptr_macro(types[i], old_dlp, flag);
		    if (old_dlp == NULL)
		    {
			PREPEND_PREFIX(Dataloop_create)(types[i],
							&old_dlp,
							&old_dlsz,
							&old_dldepth,
							flag);
			
			DLOOP_Handle_set_loopptr_macro(types[i], old_dlp,
						       flag);
			DLOOP_Handle_set_loopsize_macro(types[i], old_dlsz,
							flag);
			DLOOP_Handle_set_loopdepth_macro(types[i], old_dldepth,
							 flag);
		    }
		}
	    }
	    if (combiner == MPI_COMBINER_STRUCT_INTEGER) {
		disps = (MPI_Aint *) DLOOP_Malloc(ints[0] * sizeof(MPI_Aint));

		for (i=0; i < ints[0]; i++) {
		    disps[i] = (MPI_Aint) ints[ints[0] + 1 + i];
		}
	    }
	    else {
		disps = aints;
	    }

            err = PREPEND_PREFIX(Dataloop_create_struct)(ints[0] /* count */,
                                                         &ints[1] /* blklens */,
                                                         disps,
                                                         types /* oldtype array */,
                                                         dlp_p, dlsz_p, dldepth_p,
                                                         flag);
            /* TODO if/when this function returns error codes, propagate this failure instead */
            DLOOP_Assert(0 == err);
            /* if (err) return err; */

	    if (combiner == MPI_COMBINER_STRUCT_INTEGER) {
		DLOOP_Free(disps);
	    }
	    break;
	case MPI_COMBINER_SUBARRAY:
	    ndims = ints[0];
	    PREPEND_PREFIX(Type_convert_subarray)(ndims,
						  &ints[1] /* sizes */,
						  &ints[1+ndims] /* subsizes */,
						  &ints[1+2*ndims] /* starts */,
						  ints[1+3*ndims] /* order */,
						  types[0],
						  &tmptype);

	    PREPEND_PREFIX(Dataloop_create)(tmptype,
					    dlp_p,
					    dlsz_p,
					    dldepth_p,
					    flag);
	    
	    MPIR_Type_free_impl(&tmptype);
	    break;
	case MPI_COMBINER_DARRAY:
	    ndims = ints[2];
	    PREPEND_PREFIX(Type_convert_darray)(ints[0] /* size */,
						ints[1] /* rank */,
						ndims,
						&ints[3] /* gsizes */,
						&ints[3+ndims] /*distribs */,
						&ints[3+2*ndims] /* dargs */,
						&ints[3+3*ndims] /* psizes */,
						ints[3+4*ndims] /* order */,
						types[0],
						&tmptype);

	    PREPEND_PREFIX(Dataloop_create)(tmptype,
					    dlp_p,
					    dlsz_p,
					    dldepth_p,
					    flag);

	    MPIR_Type_free_impl(&tmptype);
	    break;
	default:
	    DLOOP_Assert(0);
	    break;
    }

 clean_exit:

    PREPEND_PREFIX(Type_release_contents)(type, &ints, &aints, &types);

    /* for now we just leave the intermediate dataloops in place.
     * could remove them to save space if we wanted.
     */

    return;
}
Ejemplo n.º 15
0
/*@
  Dataloop_update - update pointers after a copy operation

Input Parameters:
+ dataloop - pointer to loop to update
- ptrdiff - value indicating offset between old and new pointer values

  This function is used to recursively update all the pointers in a
  dataloop tree.
@*/
void PREPEND_PREFIX(Dataloop_update)(DLOOP_Dataloop *dataloop,
				     DLOOP_Offset ptrdiff)
{
    /* OPT: only declare these variables down in the Struct case */
    int i;
    DLOOP_Dataloop **looparray;

    switch(dataloop->kind & DLOOP_KIND_MASK) {
	case DLOOP_KIND_CONTIG:
	case DLOOP_KIND_VECTOR:
	    /*
	     * All these really ugly assignments are really of the form:
	     *
	     * ((char *) dataloop->loop_params.c_t.loop) += ptrdiff;
	     *
	     * However, some compilers spit out warnings about casting on the
	     * LHS, so we get this much nastier form instead (using common
	     * struct for contig and vector):
	     */

	    if (!(dataloop->kind & DLOOP_FINAL_MASK)) {
		DLOOP_Assert(dataloop->loop_params.cm_t.dataloop);

		DLOOP_Ensure_Offset_fits_in_pointer(DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) dataloop->loop_params.cm_t.dataloop + ptrdiff);
	    
		dataloop->loop_params.cm_t.dataloop =
		    (DLOOP_Dataloop *) DLOOP_OFFSET_CAST_TO_VOID_PTR
		    (DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) dataloop->loop_params.cm_t.dataloop + ptrdiff);

		PREPEND_PREFIX(Dataloop_update)(dataloop->loop_params.cm_t.dataloop, ptrdiff);
	    }
	    break;

	case DLOOP_KIND_BLOCKINDEXED:
	    DLOOP_Assert(dataloop->loop_params.bi_t.offset_array);

	    DLOOP_Ensure_Offset_fits_in_pointer(DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) dataloop->loop_params.bi_t.offset_array + ptrdiff);

	    dataloop->loop_params.bi_t.offset_array =
		(DLOOP_Offset *) DLOOP_OFFSET_CAST_TO_VOID_PTR
		(DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) dataloop->loop_params.bi_t.offset_array + ptrdiff);

	    if (!(dataloop->kind & DLOOP_FINAL_MASK)) {
		DLOOP_Assert(dataloop->loop_params.bi_t.dataloop);

		DLOOP_Ensure_Offset_fits_in_pointer(DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) dataloop->loop_params.bi_t.dataloop + ptrdiff);

		dataloop->loop_params.bi_t.dataloop =
		    (DLOOP_Dataloop *) DLOOP_OFFSET_CAST_TO_VOID_PTR
		    (DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) dataloop->loop_params.bi_t.dataloop + ptrdiff);

		PREPEND_PREFIX(Dataloop_update)(dataloop->loop_params.bi_t.dataloop, ptrdiff);
	    }
	    break;

	case DLOOP_KIND_INDEXED:
	    DLOOP_Assert(dataloop->loop_params.i_t.blocksize_array);

	    DLOOP_Ensure_Offset_fits_in_pointer(DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) dataloop->loop_params.i_t.blocksize_array + ptrdiff);

	    dataloop->loop_params.i_t.blocksize_array =
		(DLOOP_Count *) DLOOP_OFFSET_CAST_TO_VOID_PTR
		(DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) dataloop->loop_params.i_t.blocksize_array + ptrdiff);

	    DLOOP_Assert(dataloop->loop_params.i_t.offset_array);

	    DLOOP_Ensure_Offset_fits_in_pointer(DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) dataloop->loop_params.i_t.offset_array + ptrdiff);

	    dataloop->loop_params.i_t.offset_array =
		(DLOOP_Offset *) DLOOP_OFFSET_CAST_TO_VOID_PTR
		(DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) dataloop->loop_params.i_t.offset_array + ptrdiff);

	    if (!(dataloop->kind & DLOOP_FINAL_MASK)) {
		DLOOP_Assert(dataloop->loop_params.i_t.dataloop);

		DLOOP_Ensure_Offset_fits_in_pointer(DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) dataloop->loop_params.i_t.dataloop + ptrdiff);

		dataloop->loop_params.i_t.dataloop =
		    (DLOOP_Dataloop *) DLOOP_OFFSET_CAST_TO_VOID_PTR
		    (DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) dataloop->loop_params.i_t.dataloop + ptrdiff);

		PREPEND_PREFIX(Dataloop_update)(dataloop->loop_params.i_t.dataloop, ptrdiff);
	    }
	    break;

	case DLOOP_KIND_STRUCT:
	    DLOOP_Assert(dataloop->loop_params.s_t.blocksize_array);

	    DLOOP_Ensure_Offset_fits_in_pointer(DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) dataloop->loop_params.s_t.blocksize_array + ptrdiff);

	    dataloop->loop_params.s_t.blocksize_array =
		(DLOOP_Count *) DLOOP_OFFSET_CAST_TO_VOID_PTR
		(DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) dataloop->loop_params.s_t.blocksize_array + ptrdiff);

	    DLOOP_Assert(dataloop->loop_params.s_t.offset_array);

	    DLOOP_Ensure_Offset_fits_in_pointer(DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) dataloop->loop_params.s_t.offset_array + ptrdiff);

	    dataloop->loop_params.s_t.offset_array =
		(DLOOP_Offset *) DLOOP_OFFSET_CAST_TO_VOID_PTR
		(DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) dataloop->loop_params.s_t.offset_array + ptrdiff);

	    if (dataloop->kind & DLOOP_FINAL_MASK) break;

	    DLOOP_Assert(dataloop->loop_params.s_t.dataloop_array);

	    DLOOP_Ensure_Offset_fits_in_pointer(DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) dataloop->loop_params.s_t.dataloop_array + ptrdiff);

	    dataloop->loop_params.s_t.dataloop_array =
		(DLOOP_Dataloop **) DLOOP_OFFSET_CAST_TO_VOID_PTR
		(DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) dataloop->loop_params.s_t.dataloop_array + ptrdiff);

	    /* fix the N dataloop pointers too */
	    looparray = dataloop->loop_params.s_t.dataloop_array;
	    for (i=0; i < dataloop->loop_params.s_t.count; i++) {
		DLOOP_Assert(looparray[i]);

		DLOOP_Ensure_Offset_fits_in_pointer(DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) looparray[i] + ptrdiff);

		looparray[i] = (DLOOP_Dataloop *) DLOOP_OFFSET_CAST_TO_VOID_PTR
		    (DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) looparray[i] + ptrdiff);
	    }

	    for (i=0; i < dataloop->loop_params.s_t.count; i++) {
		PREPEND_PREFIX(Dataloop_update)(looparray[i], ptrdiff);
	    }
	    break;
	default:
	    /* --BEGIN ERROR HANDLING-- */
	    DLOOP_Assert(0);
	    break;
	    /* --END ERROR HANDLING-- */
    }
    return;
}
Ejemplo n.º 16
0
/*@
   Dataloop_create_vector

   Arguments:
+  int icount
.  int iblocklength
.  MPI_Aint astride
.  int strideinbytes
.  MPI_Datatype oldtype
.  DLOOP_Dataloop **dlp_p
.  int *dlsz_p
.  int *dldepth_p
-  int flag

   Returns 0 on success, -1 on failure.

@*/
int PREPEND_PREFIX(Dataloop_create_vector)(int icount,
        int iblocklength,
        MPI_Aint astride,
        int strideinbytes,
        DLOOP_Type oldtype,
        DLOOP_Dataloop **dlp_p,
        int *dlsz_p,
        int *dldepth_p,
        int flag)
{
    int err, is_builtin;
    int new_loop_sz, new_loop_depth;

    DLOOP_Count count, blocklength;
    DLOOP_Offset stride;
    DLOOP_Dataloop *new_dlp;

    count       = (DLOOP_Count) icount; /* avoid subsequent casting */
    blocklength = (DLOOP_Count) iblocklength;
    stride      = (DLOOP_Offset) astride;

    /* if count or blocklength are zero, handle with contig code,
     * call it a int
     */
    if (count == 0 || blocklength == 0)
    {

        err = PREPEND_PREFIX(Dataloop_create_contiguous)(0,
                MPI_INT,
                dlp_p,
                dlsz_p,
                dldepth_p,
                flag);
        return err;
    }

    /* optimization:
     *
     * if count == 1, store as a contiguous rather than a vector dataloop.
     */
    if (count == 1) {
        err = PREPEND_PREFIX(Dataloop_create_contiguous)(iblocklength,
                oldtype,
                dlp_p,
                dlsz_p,
                dldepth_p,
                flag);
        return err;
    }

    is_builtin = (DLOOP_Handle_hasloop_macro(oldtype)) ? 0 : 1;

    if (is_builtin) {
        new_loop_sz = sizeof(DLOOP_Dataloop);
        new_loop_depth = 1;
    }
    else {
        int old_loop_sz = 0, old_loop_depth = 0;

        DLOOP_Handle_get_loopsize_macro(oldtype, old_loop_sz, flag);
        DLOOP_Handle_get_loopdepth_macro(oldtype, old_loop_depth, flag);

        /* TODO: ACCOUNT FOR PADDING IN LOOP_SZ HERE */
        new_loop_sz = sizeof(DLOOP_Dataloop) + old_loop_sz;
        new_loop_depth = old_loop_depth + 1;
    }


    if (is_builtin) {
        DLOOP_Offset basic_sz = 0;

        PREPEND_PREFIX(Dataloop_alloc)(DLOOP_KIND_VECTOR,
                                       count,
                                       &new_dlp,
                                       &new_loop_sz);
        /* --BEGIN ERROR HANDLING-- */
        if (!new_dlp) return -1;
        /* --END ERROR HANDLING-- */

        DLOOP_Handle_get_size_macro(oldtype, basic_sz);
        new_dlp->kind = DLOOP_KIND_VECTOR | DLOOP_FINAL_MASK;

        if (flag == DLOOP_DATALOOP_ALL_BYTES)
        {

            blocklength       *= basic_sz;
            new_dlp->el_size   = 1;
            new_dlp->el_extent = 1;
            new_dlp->el_type   = MPI_BYTE;

            if(!strideinbytes)
                /* the stride was specified in units of oldtype, now
                   that we're using bytes, rather than oldtype, we
                   need to update stride. */
                stride *= basic_sz;
        }
        else
        {
            new_dlp->el_size   = basic_sz;
            new_dlp->el_extent = new_dlp->el_size;
            new_dlp->el_type   = oldtype;
        }
    }
    else { /* user-defined base type (oldtype) */
        DLOOP_Dataloop *old_loop_ptr;
        int old_loop_sz = 0;

        DLOOP_Handle_get_loopptr_macro(oldtype, old_loop_ptr, flag);
        DLOOP_Handle_get_loopsize_macro(oldtype, old_loop_sz, flag);

        PREPEND_PREFIX(Dataloop_alloc_and_copy)(DLOOP_KIND_VECTOR,
                                                count,
                                                old_loop_ptr,
                                                old_loop_sz,
                                                &new_dlp,
                                                &new_loop_sz);
        /* --BEGIN ERROR HANDLING-- */
        if (!new_dlp) return -1;
        /* --END ERROR HANDLING-- */

        new_dlp->kind = DLOOP_KIND_VECTOR;
        DLOOP_Handle_get_size_macro(oldtype, new_dlp->el_size);
        DLOOP_Handle_get_extent_macro(oldtype, new_dlp->el_extent);
        DLOOP_Handle_get_basic_type_macro(oldtype, new_dlp->el_type);
    }

    /* vector-specific members
     *
     * stride stored in dataloop is always in bytes for local rep of type
     */
    new_dlp->loop_params.v_t.count     = count;
    new_dlp->loop_params.v_t.blocksize = blocklength;
    new_dlp->loop_params.v_t.stride    = (strideinbytes) ? stride :
                                         stride * new_dlp->el_extent;

    *dlp_p     = new_dlp;
    *dlsz_p    = new_loop_sz;
    *dldepth_p = new_loop_depth;

    return 0;
}
Ejemplo n.º 17
0
/*
  DLOOP_Type_calc_footprint_struct - calculate size, lb, ub, extent,
                                     and alignsize for a struct type
*/
static void DLOOP_Type_calc_footprint_struct(MPI_Datatype type,
					     int struct_combiner,
					     int *ints,
					     MPI_Aint *aints,
					     MPI_Datatype *types,
					     DLOOP_Type_footprint *tfp)
{
    int i, found_sticky_lb = 0, found_sticky_ub = 0, first_iter = 1;
    DLOOP_Offset tmp_lb, tmp_ub, tmp_extent, tmp_true_lb, tmp_true_ub;
    DLOOP_Offset max_alignsz = 0, tmp_size = 0, min_lb = 0, max_ub = 0;
    DLOOP_Offset min_true_lb = 0, max_true_ub = 0;

    int nr_ints, nr_aints, nr_types, combiner;

    /* used to store parameters for constituent types */
    DLOOP_Type_footprint cfp;
    DLOOP_Offset size, lb, ub, true_lb, true_ub, extent, alignsz;
    int sticky_lb, sticky_ub;

    /* find first non-zero blocklength element */
    for (i=0; i < ints[0] && ints[i+1] == 0; i++);

    if (i == ints[0]) /* all zero-length blocks */ {
	tfp->size = tfp->lb = tfp->ub = tfp->extent = tfp->alignsz = 0;
	tfp->has_sticky_lb = tfp->has_sticky_ub = 0;
	return;
    }

    for (; i < ints[0]; i++) {
	/* skip zero blocklength elements */
	if (ints[i+1] == 0) continue;

	MPIR_Type_get_envelope_impl(types[i], &nr_ints, &nr_aints, &nr_types, &combiner);

	/* opt: could just inline assignments for combiner == NAMED case */

	PREPEND_PREFIX(Type_calc_footprint)(types[i], &cfp);
	size      = cfp.size;
	lb        = cfp.lb;
	ub        = cfp.ub;
	true_lb   = cfp.true_lb;
	true_ub   = cfp.true_ub;
	extent    = cfp.extent;
	alignsz   = cfp.alignsz;
	sticky_lb = cfp.has_sticky_lb;
	sticky_ub = cfp.has_sticky_ub;

	DLOOP_DATATYPE_BLOCK_LB_UB(ints[i+1] /* blklen */,
				   aints[i] /* disp */,
				   lb, ub, extent,
				   tmp_lb, tmp_ub);

	tmp_true_lb = tmp_lb + (true_lb - lb);
	tmp_true_ub = tmp_ub + (true_ub - ub);
	tmp_size += size * (DLOOP_Offset) ints[i+1];

	if (combiner == MPI_COMBINER_NAMED) {
	    /* NOTE: This is a special case. If a user creates a struct
	     *       with a named type at a non-zero displacement, the
	     *       alignment may be different than expected due to
	     *       special compiler rules for this case. Thus we must
	     *       over-ride the value that we obtained from
	     *       Type_calc_footprint() above.
	     */
	    alignsz = DLOOP_Named_type_alignsize(types[i], aints[i]);
	}

	if (max_alignsz < alignsz) max_alignsz = alignsz;

	/* We save this LB if:
	 * (1) this is our first iteration where we saw a nonzero blklen,
	 * (2) we haven't found a sticky LB and this LB is lower than
	 *     any we have previously seen,
	 * (3) we haven't found a sticky LB and this one is sticky, or
	 * (4) this sticky LB is lower than any we have previously seen.
	 */
	if ((first_iter) ||
	    (!found_sticky_lb && min_lb > tmp_lb) ||
	    (!found_sticky_lb && sticky_lb) ||
	    (sticky_lb && min_lb > tmp_lb))
	{
	    min_lb = tmp_lb;
	    if (sticky_lb) found_sticky_lb = 1;
	}

	if ((first_iter) ||
	    (!found_sticky_ub && max_ub < tmp_ub) ||
	    (!found_sticky_ub && sticky_ub) ||
	    (sticky_ub && max_ub < tmp_ub))
	{
	    max_ub = tmp_ub;
	    if (sticky_ub) found_sticky_ub = 1;
	}

	if ((first_iter) ||
	    (tmp_true_lb > min_true_lb))
	{
	    min_true_lb = tmp_true_lb;
	}

	if ((first_iter) ||
	    (tmp_true_ub < max_true_ub))
	{
	    max_true_ub = tmp_true_ub;
	}

	first_iter = 0;
    }

    /* calculate extent, not including potential padding */
    tmp_extent = max_ub - min_lb;

    /* account for padding if no sticky LB/UB is found */
    if ((!found_sticky_lb) && (!found_sticky_ub)) {
	DLOOP_Offset epsilon;

	epsilon = (max_alignsz > 0) ? tmp_extent % max_alignsz : 0;

	if (epsilon) {
	    max_ub += (max_alignsz - epsilon);
	    tmp_extent = max_ub - min_lb;
	}
    }

    tfp->size    = tmp_size;
    tfp->lb      = min_lb;
    tfp->ub      = max_ub;
    tfp->true_lb = min_true_lb;
    tfp->true_ub = max_true_ub;
    tfp->extent  = tmp_extent;
    tfp->alignsz = max_alignsz;
    tfp->has_sticky_lb = found_sticky_lb;
    tfp->has_sticky_ub = found_sticky_ub;
    return;
}
Ejemplo n.º 18
0
void PREPEND_PREFIX(Type_calc_footprint)(MPI_Datatype type,
					 DLOOP_Type_footprint *tfp)
{
    int mpi_errno;
    int nr_ints, nr_aints, nr_types, combiner;
    int *ints;
    MPI_Aint *aints;
    MPI_Datatype *types;

    /* used to store parameters for constituent types */
    DLOOP_Offset size = 0, lb = 0, ub = 0, true_lb = 0, true_ub = 0;
    DLOOP_Offset extent = 0, alignsz;
    int has_sticky_lb, has_sticky_ub;

    /* used for vector/hvector/hvector_integer calculations */
    DLOOP_Offset stride;

    /* used for indexed/hindexed calculations */
    DLOOP_Offset disp;

    /* used for calculations on types with more than one block of data */
    DLOOP_Offset i, min_lb, max_ub, ntypes, tmp_lb, tmp_ub;

    /* used for processing subarray and darray types */
    int ndims;
    MPI_Datatype tmptype;

    MPIR_Type_get_envelope_impl(type, &nr_ints, &nr_aints, &nr_types, &combiner);

    if (combiner == MPI_COMBINER_NAMED) {
	int mpisize;
	MPI_Aint mpiextent;

	MPIR_Type_size_impl(type, &mpisize);
	MPIR_Type_extent_impl(type, &mpiextent);
	tfp->size    = (DLOOP_Offset) mpisize;
	tfp->lb      = 0;
	tfp->ub      = (DLOOP_Offset) mpiextent;
	tfp->true_lb = 0;
	tfp->true_ub = (DLOOP_Offset) mpiextent;
	tfp->extent  = (DLOOP_Offset) mpiextent;
	tfp->alignsz = DLOOP_Named_type_alignsize(type, (MPI_Aint) 0);
	tfp->has_sticky_lb = (type == MPI_LB) ? 1 : 0;
	tfp->has_sticky_ub = (type == MPI_UB) ? 1 : 0;

	goto clean_exit;
    }

    /* get access to contents; need it immediately to check for zero count */
    PREPEND_PREFIX(Type_access_contents)(type, &ints, &aints, &types);

    /* knock out all the zero count cases */
    if ((combiner == MPI_COMBINER_CONTIGUOUS ||
	 combiner == MPI_COMBINER_VECTOR ||
	 combiner == MPI_COMBINER_HVECTOR_INTEGER ||
	 combiner == MPI_COMBINER_HVECTOR ||
	 combiner == MPI_COMBINER_INDEXED_BLOCK ||
	 combiner == MPI_COMBINER_HINDEXED_BLOCK ||
	 combiner == MPI_COMBINER_INDEXED ||
	 combiner == MPI_COMBINER_HINDEXED_INTEGER ||
	 combiner == MPI_COMBINER_STRUCT_INTEGER ||
	 combiner == MPI_COMBINER_STRUCT) && ints[0] == 0)
    {
	tfp->size = tfp->lb = tfp->ub = tfp->extent = tfp->alignsz = 0;
	tfp->true_lb = tfp->true_ub = 0;
	tfp->has_sticky_lb = tfp->has_sticky_ub = 0;
	goto clean_exit;
    }

    if (combiner != MPI_COMBINER_STRUCT &&
	combiner != MPI_COMBINER_STRUCT_INTEGER)
    {
	DLOOP_Type_footprint cfp;

	PREPEND_PREFIX(Type_calc_footprint)(types[0], &cfp);
	size    = cfp.size;
	lb      = cfp.lb;
	ub      = cfp.ub;
	true_lb = cfp.true_lb;
	true_ub = cfp.true_ub;
	extent  = cfp.extent;
	alignsz = cfp.alignsz;
	has_sticky_lb = cfp.has_sticky_lb;
	has_sticky_ub = cfp.has_sticky_ub;

	/* initialize some common values so we don't have to assign
	 * them in every case below.
	 */
	tfp->alignsz = alignsz;
	tfp->has_sticky_lb = has_sticky_lb;
	tfp->has_sticky_ub = has_sticky_ub;

    }

    switch(combiner)
    {
	case MPI_COMBINER_DUP:
	    tfp->size    = size;
	    tfp->lb      = lb;
	    tfp->ub      = ub;
	    tfp->true_lb = true_lb;
	    tfp->true_ub = true_ub;
	    tfp->extent  = extent;
	    break;
	case MPI_COMBINER_RESIZED:
	    tfp->size    = size;
	    tfp->lb      = aints[0]; /* lb */
	    tfp->ub      = aints[0] + aints[1];
	    tfp->true_lb = true_lb;
	    tfp->true_ub = true_ub;
	    tfp->extent  = aints[1]; /* extent */
	    tfp->has_sticky_lb = 1;
	    tfp->has_sticky_ub = 1;
	    break;
	case MPI_COMBINER_CONTIGUOUS:
	    DLOOP_DATATYPE_CONTIG_LB_UB(ints[0] /* count */,
					lb, ub, extent,
					tfp->lb, tfp->ub);
	    tfp->true_lb = tfp->lb + (true_lb - lb);
	    tfp->true_ub = tfp->ub + (true_ub - ub);
	    tfp->size    = (DLOOP_Offset) ints[0] * size;
	    tfp->extent  = tfp->ub - tfp->lb;
	    break;
	case MPI_COMBINER_VECTOR:
	case MPI_COMBINER_HVECTOR:
	case MPI_COMBINER_HVECTOR_INTEGER:
	    if (combiner == MPI_COMBINER_VECTOR) stride = (DLOOP_Offset) ints[2] * extent;
	    else if (combiner == MPI_COMBINER_HVECTOR) stride = aints[0];
	    else /* HVECTOR_INTEGER */ stride = (DLOOP_Offset) ints[2];

	    DLOOP_DATATYPE_VECTOR_LB_UB(ints[0] /* count */,
					stride /* stride in bytes */,
					ints[1] /* blklen */,
					lb, ub, extent,
					tfp->lb, tfp->ub);
	    tfp->true_lb = tfp->lb + (true_lb - lb);
	    tfp->true_ub = tfp->ub + (true_ub - ub);
	    tfp->size    = (DLOOP_Offset) ints[0] * (DLOOP_Offset) ints[1] * size;
	    tfp->extent  = tfp->ub - tfp->lb;
	    break;
	case MPI_COMBINER_INDEXED_BLOCK:
	    /* prime min_lb and max_ub */
	    DLOOP_DATATYPE_BLOCK_LB_UB(ints[1] /* blklen */,
				       (DLOOP_Offset) ints[2] * extent /* disp */,
				       lb, ub, extent,
				       min_lb, max_ub);

	    for (i=1; i < ints[0]; i++) {
		DLOOP_DATATYPE_BLOCK_LB_UB(ints[1] /* blklen */,
					   (DLOOP_Offset) ints[i+2] * extent /* disp */,
					   lb, ub, extent,
					   tmp_lb, tmp_ub);
		if (tmp_lb < min_lb) min_lb = tmp_lb;
		if (tmp_ub > max_ub) max_ub = tmp_ub;
	    }
	    tfp->size    = (DLOOP_Offset) ints[0] * (DLOOP_Offset) ints[1] * size;
	    tfp->lb      = min_lb;
	    tfp->ub      = max_ub;
	    tfp->true_lb = min_lb + (true_lb - lb);
	    tfp->true_ub = max_ub + (true_ub - ub);
	    tfp->extent  = tfp->ub - tfp->lb;
	    break;
	case MPI_COMBINER_HINDEXED_BLOCK:
	    /* prime min_lb and max_ub */
	    DLOOP_DATATYPE_BLOCK_LB_UB(ints[1] /* blklen */,
				       (DLOOP_Offset) ints[2] /* disp */,
				       lb, ub, extent,
				       min_lb, max_ub);

	    for (i=1; i < ints[0]; i++) {
		DLOOP_DATATYPE_BLOCK_LB_UB(ints[1] /* blklen */,
					   (DLOOP_Offset) ints[i+2] /* disp */,
					   lb, ub, extent,
					   tmp_lb, tmp_ub);
		if (tmp_lb < min_lb) min_lb = tmp_lb;
		if (tmp_ub > max_ub) max_ub = tmp_ub;
	    }
	    tfp->size    = (DLOOP_Offset) ints[0] * (DLOOP_Offset) ints[1] * size;
	    tfp->lb      = min_lb;
	    tfp->ub      = max_ub;
	    tfp->true_lb = min_lb + (true_lb - lb);
	    tfp->true_ub = max_ub + (true_ub - ub);
	    tfp->extent  = tfp->ub - tfp->lb;
	    break;
	case MPI_COMBINER_INDEXED:
	case MPI_COMBINER_HINDEXED_INTEGER:
	case MPI_COMBINER_HINDEXED:
	    /* find first non-zero blocklength element */
	    for (i=0; i < ints[0] && ints[i+1] == 0; i++);
	    if (i == ints[0]) {
		/* all zero blocklengths */
		tfp->size = tfp->lb = tfp->ub = tfp->extent = tfp->alignsz = 0;
		tfp->has_sticky_lb = tfp->has_sticky_ub = 0;
	    }
	    else {
		/* prime min_lb, max_ub, count */
		ntypes = ints[i+1];
		if (combiner == MPI_COMBINER_INDEXED)
		    disp = (DLOOP_Offset) ints[ints[0]+i+1] * extent;
		else if (combiner == MPI_COMBINER_HINDEXED_INTEGER)
		    disp = (DLOOP_Offset) ints[ints[0]+i+1];
		else /* MPI_COMBINER_HINDEXED */
		    disp = aints[i];

		DLOOP_DATATYPE_BLOCK_LB_UB(ints[i+1] /* blklen */,
					   disp,
					   lb, ub, extent,
					   min_lb, max_ub);

		for (i++; i < ints[0]; i++) {
		    /* skip zero blocklength elements */
		    if (ints[i+1] == 0) continue;

		    ntypes += ints[i+1];
		    if (combiner == MPI_COMBINER_INDEXED)
			disp = (DLOOP_Offset) ints[ints[0]+i+1] * extent;
		    else if (combiner == MPI_COMBINER_HINDEXED_INTEGER)
			disp = (DLOOP_Offset) ints[ints[0]+i+1];
		    else /* MPI_COMBINER_HINDEXED */
			disp = aints[i];

		    DLOOP_DATATYPE_BLOCK_LB_UB(ints[i+1],
					       disp,
					       lb, ub, extent,
					       tmp_lb, tmp_ub);
		    if (tmp_lb < min_lb) min_lb = tmp_lb;
		    if (tmp_ub > max_ub) max_ub = tmp_ub;
		}
		tfp->size    = ntypes * size;
		tfp->lb      = min_lb;
		tfp->ub      = max_ub;
		tfp->true_lb = min_lb + (true_lb - lb);
		tfp->true_ub = max_ub + (true_ub - ub);
		tfp->extent  = tfp->ub - tfp->lb;
	    }
	    break;
	case MPI_COMBINER_STRUCT_INTEGER:
	    DLOOP_Assert(combiner != MPI_COMBINER_STRUCT_INTEGER);
	    break;
	case MPI_COMBINER_STRUCT:
	    /* sufficiently complicated to pull out into separate fn */
	    DLOOP_Type_calc_footprint_struct(type,
					     combiner, ints, aints, types,
					     tfp);
	    break;
	case MPI_COMBINER_SUBARRAY:
	    ndims = ints[0];
	    PREPEND_PREFIX(Type_convert_subarray)(ndims,
						  &ints[1] /* sizes */,
						  &ints[1+ndims] /* subsz */,
						  &ints[1+2*ndims] /* strts */,
						  ints[1+3*ndims] /* order */,
						  types[0],
						  &tmptype);
	    PREPEND_PREFIX(Type_calc_footprint)(tmptype, tfp);
	    MPIR_Type_free_impl(&tmptype);
	    break;
	case MPI_COMBINER_DARRAY:
	    ndims = ints[2];

	    PREPEND_PREFIX(Type_convert_darray)(ints[0] /* size */,
						ints[1] /* rank */,
						ndims,
						&ints[3] /* gsizes */,
						&ints[3+ndims] /*distribs */,
						&ints[3+2*ndims] /* dargs */,
						&ints[3+3*ndims] /* psizes */,
						ints[3+4*ndims] /* order */,
						types[0],
						&tmptype);

	    PREPEND_PREFIX(Type_calc_footprint)(tmptype, tfp);
	    MPIR_Type_free_impl(&tmptype);
	    break;
	case MPI_COMBINER_F90_REAL:
	case MPI_COMBINER_F90_COMPLEX:
	case MPI_COMBINER_F90_INTEGER:
	default:
	    DLOOP_Assert(0);
	    break;
    }

 clean_exit:
    PREPEND_PREFIX(Type_release_contents)(type, &ints, &aints, &types);
    return;
}