Exemple #1
0
/* MPID_Segment_mpi_flatten - flatten a type into a representation
 *                            appropriate for passing to hindexed create.
 *
 * NOTE: blocks will be in units of bytes when returned.
 *
 * WARNING: there's potential for overflow here as we convert from
 *          various types into an index of bytes.
 *
 * Parameters:
 * segp    - pointer to segment structure
 * first   - first byte in segment to pack
 * lastp   - in/out parameter describing last byte to pack (and afterwards
 *           the last byte _actually_ packed)
 *           NOTE: actually returns index of byte _after_ last one packed
 * blklens, disps - the usual blocklength and displacement arrays for MPI
 * lengthp - in/out parameter describing length of array (and afterwards
 *           the amount of the array that has actual data)
 */
void PREPEND_PREFIX(Segment_mpi_flatten)(DLOOP_Segment *segp,
					 DLOOP_Offset first,
					 DLOOP_Offset *lastp,
					 DLOOP_Size *blklens,
					 MPI_Aint *disps,
					 DLOOP_Size *lengthp)
{
    struct PREPEND_PREFIX(mpi_flatten_params) params;

    DLOOP_Assert(*lengthp > 0);

    params.index   = 0;
    params.length  = *lengthp;
    params.blklens = blklens;
    params.disps   = disps;

    PREPEND_PREFIX(Segment_manipulate)(segp,
				       first,
				       lastp,
				       DLOOP_Leaf_contig_mpi_flatten,
				       DLOOP_Leaf_vector_mpi_flatten,
				       DLOOP_Leaf_blkidx_mpi_flatten,
				       DLOOP_Leaf_index_mpi_flatten,
				       NULL,
				       &params);

    /* last value already handled by MPID_Segment_manipulate */
    *lengthp = params.index;
    return;
}
/* MPID_Leaf_contig_count_block
 *
 * Note: because bufp is just an offset, we can ignore it in our
 *       calculations of # of contig regions.
 */
static int DLOOP_Leaf_contig_count_block(DLOOP_Offset *blocks_p,
					 DLOOP_Type el_type,
					 DLOOP_Offset rel_off,
					 DLOOP_Buffer bufp ATTRIBUTE((unused)),
					 void *v_paramp)
{
    DLOOP_Offset size, el_size;
    struct PREPEND_PREFIX(contig_blocks_params) *paramp = v_paramp;

    DLOOP_Assert(*blocks_p > 0);

    DLOOP_Handle_get_size_macro(el_type, el_size);
    size = *blocks_p * el_size;

#ifdef MPID_SP_VERBOSE
    MPIU_dbg_printf("contig count block: count = %d, buf+off = %d, lastloc = " DLOOP_OFFSET_FMT_DEC_SPEC "\n",
		    (int) paramp->count,
		    (int) ((char *) bufp + rel_off),
		    paramp->last_loc);
#endif

    if (paramp->count > 0 && rel_off == paramp->last_loc)
    {
	/* this region is adjacent to the last */
	paramp->last_loc += size;
    }
    else {
	/* new region */
	paramp->last_loc = rel_off + size;
	paramp->count++;
    }
    return 0;
}
Exemple #3
0
/* from MPICH PAC_C_MAX_DOUBLE_FP_ALIGN test:
 *
 * Determines maximum struct alignment with floats and doubles.
 *
 * Return value is 1, 2, 4, or 8.
 */
static int DLOOP_Structalign_double_max()
{
    int is_packed  = 1;
    int is_two     = 1;
    int is_four    = 1;
    int is_eight   = 1;
    struct { char a; double b; } char_double;
    struct { double b; char a; } double_char;
    int size, extent1, extent2;

    size = sizeof(char) + sizeof(double);
    extent1 = sizeof(char_double);
    extent2 = sizeof(double_char);
    if (size != extent1) is_packed = 0;
    if ((extent1 % 2) != 0 && (extent2 % 2) != 0) is_two = 0;
    if ((extent1 % 4) != 0 && (extent2 % 4) != 0) is_four = 0;
    if (sizeof(double) == 8 && (extent1 % 8) != 0 && (extent2 % 8) != 0)
	is_eight = 0;

    if (is_eight) { is_four = 0; is_two = 0; }
    if (is_four) is_two = 0;

    DLOOP_Assert(is_packed + is_two + is_four + is_eight == 1);

    if (is_packed) return 1;
    if (is_two)    return 2;
    if (is_four)   return 4;
    return 8;
}
Exemple #4
0
/* from MPICH PAC_C_MAX_FP_ALIGN test:
 *
 * Checks for max C struct floating point alignment. Note that
 * in this test we are *only* testing float types, whereas in
 * the original test we were testing double and long double also.
 *
 * Return value is 1, 2, 4, 8, or 16.
 */
static int DLOOP_Structalign_float_max()
{
    int is_packed  = 1;
    int is_two     = 1;
    int is_four    = 1;
    int is_eight   = 1;
    int is_sixteen = 1;
    struct { char a; float b; } char_float;
    struct { float b; char a; } float_char;
    int size, extent1, extent2;

    size = sizeof(char) + sizeof(float);
    extent1 = sizeof(char_float);
    extent2 = sizeof(float_char);
    if (size != extent1) is_packed = 0;
    if ((extent1 % 2) != 0 && (extent2 % 2) != 0) is_two = 0;
    if ((extent1 % 4) != 0 && (extent2 % 4) != 0) is_four = 0;
    if (sizeof(float) == 8 && (extent1 % 8) != 0 && (extent2 % 8) != 0)
	is_eight = 0;

    if (is_sixteen) { is_eight = 0; is_four = 0; is_two = 0; }
    if (is_eight) { is_four = 0; is_two = 0; }
    if (is_four) is_two = 0;

    DLOOP_Assert(is_packed + is_two + is_four + is_eight + is_sixteen == 1);

    if (is_packed)  return 1;
    if (is_two)     return 2;
    if (is_four)    return 4;
    if (is_eight)   return 8;
    return 16;
}
Exemple #5
0
/*@
  Dataloop_dup - make a copy of a dataloop

  Returns 0 on success, -1 on failure.
@*/
void PREPEND_PREFIX(Dataloop_dup)(DLOOP_Dataloop *old_loop,
				  DLOOP_Count old_loop_sz,
				  DLOOP_Dataloop **new_loop_p)
{
    DLOOP_Dataloop *new_loop;

    DLOOP_Assert(old_loop != NULL);
    DLOOP_Assert(old_loop_sz > 0);

    new_loop = (DLOOP_Dataloop *) DLOOP_Malloc(old_loop_sz);
    if (new_loop == NULL) {
	*new_loop_p = NULL;
	return;
    }

    PREPEND_PREFIX(Dataloop_copy)(new_loop, old_loop, old_loop_sz);
    *new_loop_p = new_loop;
    return;
}
/*@
   Dataloop_create_pairtype - create dataloop for a pairtype

   Arguments:
+  MPI_Datatype type - the pairtype
.  DLOOP_Dataloop **output_dataloop_ptr
.  int output_dataloop_size
.  int output_dataloop_depth
-  int flag

.N Errors
.N Returns 0 on success, -1 on failure.

   Note:
   This function simply creates the appropriate input parameters for
   use with Dataloop_create_struct and then calls that function.

   This same function could be used to create dataloops for any type
   that actually consists of two distinct elements.
@*/
int PREPEND_PREFIX(Dataloop_create_pairtype)(MPI_Datatype type,
        DLOOP_Dataloop **dlp_p,
        int *dlsz_p,
        int *dldepth_p,
        int flag)
{
    int blocks[2] = { 1, 1 };
    MPI_Aint disps[2];
    MPI_Datatype types[2];

    DLOOP_Assert(type == MPI_FLOAT_INT || type == MPI_DOUBLE_INT ||
                 type == MPI_LONG_INT || type == MPI_SHORT_INT ||
                 type == MPI_LONG_DOUBLE_INT || type == MPI_2INT);

    switch(type) {
    case MPI_FLOAT_INT:
        PAIRTYPE_CONTENTS(MPI_FLOAT, float, MPI_INT, int);
        break;
    case MPI_DOUBLE_INT:
        PAIRTYPE_CONTENTS(MPI_DOUBLE, double, MPI_INT, int);
        break;
    case MPI_LONG_INT:
        PAIRTYPE_CONTENTS(MPI_LONG, long, MPI_INT, int);
        break;
    case MPI_SHORT_INT:
        PAIRTYPE_CONTENTS(MPI_SHORT, short, MPI_INT, int);
        break;
    case MPI_LONG_DOUBLE_INT:
        PAIRTYPE_CONTENTS(MPI_LONG_DOUBLE, long double, MPI_INT, int);
        break;
    case MPI_2INT:
        PAIRTYPE_CONTENTS(MPI_INT, int, MPI_INT, int);
        break;
    }

    return PREPEND_PREFIX(Dataloop_create_struct)(2,
            blocks,
            disps,
            types,
            dlp_p,
            dlsz_p,
            dldepth_p,
            flag);
}
Exemple #7
0
/* from MPICH PAC_C_MAX_LONGDOUBLE_FP_ALIGN test:
 *
 * Determines maximum alignment of structs with long doubles.
 *
 * Return value is 1, 2, 4, 8, or 16.
 */
static int DLOOP_Structalign_long_double_max()
{
    int is_packed  = 1;
    int is_two     = 1;
    int is_four    = 1;
    int is_eight   = 1;
    int is_sixteen = 1;
    struct { char a; long double b; } char_long_double;
    struct { long double b; char a; } long_double_char;
    struct { long double a; int b; char c; } long_double_int_char;
    int size, extent1, extent2;

    size = sizeof(char) + sizeof(long double);
    extent1 = sizeof(char_long_double);
    extent2 = sizeof(long_double_char);
    if (size != extent1) is_packed = 0;
    if ((extent1 % 2) != 0 && (extent2 % 2) != 0) is_two = 0;
    if ((extent1 % 4) != 0 && (extent2 % 4) != 0) is_four = 0;
    if (sizeof(long double) >= 8 && (extent1 % 8) != 0 && (extent2 % 8) != 0)
	is_eight = 0;
    if (sizeof(long double) > 8 && (extent1 % 16) != 0
	&& (extent2 % 16) != 0) is_sixteen = 0;

    extent1 = sizeof(long_double_int_char);
    if ((extent1 % 2) != 0) is_two = 0;
    if ((extent1 % 4) != 0) is_four = 0;
    if (sizeof(long double) >= 8 && (extent1 % 8) != 0)	is_eight = 0;
    if (sizeof(long double) > 8 && (extent1 % 16) != 0) is_sixteen = 0;

    if (is_sixteen) { is_eight = 0; is_four = 0; is_two = 0; }
    if (is_eight) { is_four = 0; is_two = 0; }
    if (is_four) is_two = 0;

    DLOOP_Assert(is_packed + is_two + is_four + is_eight + is_sixteen == 1);

    if (is_packed)  return 1;
    if (is_two)     return 2;
    if (is_four)    return 4;
    if (is_eight)   return 8;
    return 16;
}
int MPIDU_Type_indexed(int count,
		      const int *blocklength_array,
		      const void *displacement_array,
		      int dispinbytes,
		      MPI_Datatype oldtype,
		      MPI_Datatype *newtype)
{
    int mpi_errno = MPI_SUCCESS;
    int is_builtin, old_is_contig;
    int i;
    MPI_Aint contig_count;
    MPI_Aint el_sz, el_ct, old_ct, old_sz;
    MPI_Aint old_lb, old_ub, old_extent, old_true_lb, old_true_ub;
    MPI_Aint min_lb = 0, max_ub = 0, eff_disp;
    MPI_Datatype el_type;

    MPIDU_Datatype *new_dtp;

    if (count == 0) return MPIDU_Type_zerolen(newtype);

    /* sanity check that blocklens are all non-negative */
    for (i = 0; i < count; ++i) {
        DLOOP_Assert(blocklength_array[i] >= 0);
    }

    /* allocate new datatype object and handle */
    new_dtp = (MPIDU_Datatype *) MPIR_Handle_obj_alloc(&MPIDU_Datatype_mem);
    /* --BEGIN ERROR HANDLING-- */
    if (!new_dtp)
    {
	mpi_errno = MPIR_Err_create_code(MPI_SUCCESS,
					 MPIR_ERR_RECOVERABLE,
					 "MPIDU_Type_indexed",
					 __LINE__,
					 MPI_ERR_OTHER,
					 "**nomem",
					 0);
	return mpi_errno;
    }
    /* --END ERROR HANDLING-- */

    /* handle is filled in by MPIR_Handle_obj_alloc() */
    MPIR_Object_set_ref(new_dtp, 1);
    new_dtp->is_permanent = 0;
    new_dtp->is_committed = 0;
    new_dtp->attributes   = NULL;
    new_dtp->cache_id     = 0;
    new_dtp->name[0]      = 0;
    new_dtp->contents     = NULL;

    new_dtp->dataloop       = NULL;
    new_dtp->dataloop_size  = -1;
    new_dtp->dataloop_depth = -1;
    new_dtp->hetero_dloop       = NULL;
    new_dtp->hetero_dloop_size  = -1;
    new_dtp->hetero_dloop_depth = -1;

    is_builtin = (HANDLE_GET_KIND(oldtype) == HANDLE_KIND_BUILTIN);

    if (is_builtin)
    {
	/* builtins are handled differently than user-defined types because
	 * they have no associated dataloop or datatype structure.
	 */
	el_sz      = MPIDU_Datatype_get_basic_size(oldtype);
	old_sz     = el_sz;
	el_ct      = 1;
	el_type    = oldtype;

	old_lb        = 0;
	old_true_lb   = 0;
	old_ub        = (MPI_Aint) el_sz;
	old_true_ub   = (MPI_Aint) el_sz;
	old_extent    = (MPI_Aint) el_sz;
	old_is_contig = 1;

	new_dtp->has_sticky_ub = 0;
	new_dtp->has_sticky_lb = 0;

        MPIR_Assign_trunc(new_dtp->alignsize, el_sz, MPI_Aint);
	new_dtp->builtin_element_size = el_sz;
	new_dtp->basic_type       = el_type;

	new_dtp->max_contig_blocks = count;
    }
    else
    {
	/* user-defined base type (oldtype) */
	MPIDU_Datatype *old_dtp;

	MPIDU_Datatype_get_ptr(oldtype, old_dtp);

	/* Ensure that "builtin_element_size" fits into an int datatype. */
	MPIR_Ensure_Aint_fits_in_int(old_dtp->builtin_element_size);

	el_sz   = old_dtp->builtin_element_size;
	old_sz  = old_dtp->size;
	el_ct   = old_dtp->n_builtin_elements;
	el_type = old_dtp->basic_type;

	old_lb        = old_dtp->lb;
	old_true_lb   = old_dtp->true_lb;
	old_ub        = old_dtp->ub;
	old_true_ub   = old_dtp->true_ub;
	old_extent    = old_dtp->extent;
	old_is_contig = old_dtp->is_contig;

	new_dtp->has_sticky_lb = old_dtp->has_sticky_lb;
	new_dtp->has_sticky_ub = old_dtp->has_sticky_ub;
	new_dtp->builtin_element_size  = (MPI_Aint) el_sz;
	new_dtp->basic_type        = el_type;

        new_dtp->max_contig_blocks = 0;
        for(i=0; i<count; i++)
            new_dtp->max_contig_blocks 
                += old_dtp->max_contig_blocks
                    * ((MPI_Aint ) blocklength_array[i]);
    }

    /* find the first nonzero blocklength element */
    i = 0;
    while (i < count && blocklength_array[i] == 0) i++;

    if (i == count) {
	MPIR_Handle_obj_free(&MPIDU_Datatype_mem, new_dtp);
	return MPIDU_Type_zerolen(newtype);
    }

    /* priming for loop */
    old_ct = blocklength_array[i];
    eff_disp = (dispinbytes) ? ((MPI_Aint *) displacement_array)[i] :
	(((MPI_Aint) ((int *) displacement_array)[i]) * old_extent);

    MPIDU_DATATYPE_BLOCK_LB_UB((MPI_Aint) blocklength_array[i],
			      eff_disp,
			      old_lb,
			      old_ub,
			      old_extent,
			      min_lb,
			      max_ub);

    /* determine min lb, max ub, and count of old types in remaining
     * nonzero size blocks
     */
    for (i++; i < count; i++)
    {
	MPI_Aint tmp_lb, tmp_ub;
	
	if (blocklength_array[i] > 0) {
	    old_ct += blocklength_array[i]; /* add more oldtypes */
	
	    eff_disp = (dispinbytes) ? ((MPI_Aint *) displacement_array)[i] :
		(((MPI_Aint) ((int *) displacement_array)[i]) * old_extent);
	
	    /* calculate ub and lb for this block */
	    MPIDU_DATATYPE_BLOCK_LB_UB((MPI_Aint)(blocklength_array[i]),
				      eff_disp,
				      old_lb,
				      old_ub,
				      old_extent,
				      tmp_lb,
				      tmp_ub);
	
	    if (tmp_lb < min_lb) min_lb = tmp_lb;
	    if (tmp_ub > max_ub) max_ub = tmp_ub;
	}
    }

    new_dtp->size = old_ct * old_sz;

    new_dtp->lb      = min_lb;
    new_dtp->ub      = max_ub;
    new_dtp->true_lb = min_lb + (old_true_lb - old_lb);
    new_dtp->true_ub = max_ub + (old_true_ub - old_ub);
    new_dtp->extent  = max_ub - min_lb;

    new_dtp->n_builtin_elements = old_ct * el_ct;

    /* new type is only contig for N types if it's all one big
     * block, its size and extent are the same, and the old type
     * was also contiguous.
     */
    new_dtp->is_contig = 0;
    if(old_is_contig)
    {
	MPI_Aint *blklens = MPL_malloc(count *sizeof(MPI_Aint));
	for (i=0; i<count; i++)
		blklens[i] = blocklength_array[i];
        contig_count = MPIDU_Type_indexed_count_contig(count,
						  blklens,
						  displacement_array,
						  dispinbytes,
						  old_extent);
        new_dtp->max_contig_blocks = contig_count;
        if( (contig_count == 1) &&
            ((MPI_Aint) new_dtp->size == new_dtp->extent))
        {
            new_dtp->is_contig = 1;
        }
	MPL_free(blklens);
    }

    *newtype = new_dtp->handle;
    return mpi_errno;
}
Exemple #9
0
void MPIDU_Type_calc_footprint(MPI_Datatype type, DLOOP_Type_footprint *tfp)
{
    int mpi_errno;
    int nr_ints, nr_aints, nr_types, combiner;
    int *ints;
    MPI_Aint *aints;
    MPI_Datatype *types;

    /* used to store parameters for constituent types */
    DLOOP_Offset size = 0, lb = 0, ub = 0, true_lb = 0, true_ub = 0;
    DLOOP_Offset extent = 0, alignsz;
    int has_sticky_lb, has_sticky_ub;

    /* used for vector/hvector/hvector_integer calculations */
    DLOOP_Offset stride;

    /* used for indexed/hindexed calculations */
    DLOOP_Offset disp;

    /* used for calculations on types with more than one block of data */
    DLOOP_Offset i, min_lb, max_ub, ntypes, tmp_lb, tmp_ub;

    /* used for processing subarray and darray types */
    int ndims;
    MPI_Datatype tmptype;

    MPIR_Type_get_envelope(type, &nr_ints, &nr_aints, &nr_types, &combiner);

    if (combiner == MPI_COMBINER_NAMED) {
	int mpisize;
	MPI_Aint mpiextent;

	MPIR_Datatype_get_size_macro(type, mpisize);
	MPIR_Datatype_get_extent_macro(type, mpiextent);
	tfp->size    = (DLOOP_Offset) mpisize;
	tfp->lb      = 0;
	tfp->ub      = (DLOOP_Offset) mpiextent;
	tfp->true_lb = 0;
	tfp->true_ub = (DLOOP_Offset) mpiextent;
	tfp->extent  = (DLOOP_Offset) mpiextent;
	tfp->alignsz = DLOOP_Named_type_alignsize(type, (MPI_Aint) 0);
	tfp->has_sticky_lb = (type == MPI_LB) ? 1 : 0;
	tfp->has_sticky_ub = (type == MPI_UB) ? 1 : 0;

	goto clean_exit;
    }

    /* get access to contents; need it immediately to check for zero count */
    MPIDU_Type_access_contents(type, &ints, &aints, &types);

    /* knock out all the zero count cases */
    if ((combiner == MPI_COMBINER_CONTIGUOUS ||
	 combiner == MPI_COMBINER_VECTOR ||
	 combiner == MPI_COMBINER_HVECTOR_INTEGER ||
	 combiner == MPI_COMBINER_HVECTOR ||
	 combiner == MPI_COMBINER_INDEXED_BLOCK ||
	 combiner == MPI_COMBINER_HINDEXED_BLOCK ||
	 combiner == MPI_COMBINER_INDEXED ||
	 combiner == MPI_COMBINER_HINDEXED_INTEGER ||
	 combiner == MPI_COMBINER_STRUCT_INTEGER ||
	 combiner == MPI_COMBINER_STRUCT) && ints[0] == 0)
    {
	tfp->size = tfp->lb = tfp->ub = tfp->extent = tfp->alignsz = 0;
	tfp->true_lb = tfp->true_ub = 0;
	tfp->has_sticky_lb = tfp->has_sticky_ub = 0;
	goto clean_exit;
    }

    if (combiner != MPI_COMBINER_STRUCT &&
	combiner != MPI_COMBINER_STRUCT_INTEGER)
    {
	DLOOP_Type_footprint cfp;

	MPIDU_Type_calc_footprint(types[0], &cfp);
	size    = cfp.size;
	lb      = cfp.lb;
	ub      = cfp.ub;
	true_lb = cfp.true_lb;
	true_ub = cfp.true_ub;
	extent  = cfp.extent;
	alignsz = cfp.alignsz;
	has_sticky_lb = cfp.has_sticky_lb;
	has_sticky_ub = cfp.has_sticky_ub;

	/* initialize some common values so we don't have to assign
	 * them in every case below.
	 */
	tfp->alignsz = alignsz;
	tfp->has_sticky_lb = has_sticky_lb;
	tfp->has_sticky_ub = has_sticky_ub;

    }

    switch(combiner)
    {
	case MPI_COMBINER_DUP:
	    tfp->size    = size;
	    tfp->lb      = lb;
	    tfp->ub      = ub;
	    tfp->true_lb = true_lb;
	    tfp->true_ub = true_ub;
	    tfp->extent  = extent;
	    break;
	case MPI_COMBINER_RESIZED:
	    tfp->size    = size;
	    tfp->lb      = aints[0]; /* lb */
	    tfp->ub      = aints[0] + aints[1];
	    tfp->true_lb = true_lb;
	    tfp->true_ub = true_ub;
	    tfp->extent  = aints[1]; /* extent */
	    tfp->has_sticky_lb = 1;
	    tfp->has_sticky_ub = 1;
	    break;
	case MPI_COMBINER_CONTIGUOUS:
	    DLOOP_DATATYPE_CONTIG_LB_UB(ints[0] /* count */,
					lb, ub, extent,
					tfp->lb, tfp->ub);
	    tfp->true_lb = tfp->lb + (true_lb - lb);
	    tfp->true_ub = tfp->ub + (true_ub - ub);
	    tfp->size    = (DLOOP_Offset) ints[0] * size;
	    tfp->extent  = tfp->ub - tfp->lb;
	    break;
	case MPI_COMBINER_VECTOR:
	case MPI_COMBINER_HVECTOR:
	case MPI_COMBINER_HVECTOR_INTEGER:
	    if (combiner == MPI_COMBINER_VECTOR) stride = (DLOOP_Offset) ints[2] * extent;
	    else if (combiner == MPI_COMBINER_HVECTOR) stride = aints[0];
	    else /* HVECTOR_INTEGER */ stride = (DLOOP_Offset) ints[2];

	    DLOOP_DATATYPE_VECTOR_LB_UB(ints[0] /* count */,
					stride /* stride in bytes */,
					ints[1] /* blklen */,
					lb, ub, extent,
					tfp->lb, tfp->ub);
	    tfp->true_lb = tfp->lb + (true_lb - lb);
	    tfp->true_ub = tfp->ub + (true_ub - ub);
	    tfp->size    = (DLOOP_Offset) ints[0] * (DLOOP_Offset) ints[1] * size;
	    tfp->extent  = tfp->ub - tfp->lb;
	    break;
	case MPI_COMBINER_INDEXED_BLOCK:
	    /* prime min_lb and max_ub */
	    DLOOP_DATATYPE_BLOCK_LB_UB(ints[1] /* blklen */,
				       (DLOOP_Offset) ints[2] * extent /* disp */,
				       lb, ub, extent,
				       min_lb, max_ub);

	    for (i=1; i < ints[0]; i++) {
		DLOOP_DATATYPE_BLOCK_LB_UB(ints[1] /* blklen */,
					   (DLOOP_Offset) ints[i+2] * extent /* disp */,
					   lb, ub, extent,
					   tmp_lb, tmp_ub);
		if (tmp_lb < min_lb) min_lb = tmp_lb;
		if (tmp_ub > max_ub) max_ub = tmp_ub;
	    }
	    tfp->size    = (DLOOP_Offset) ints[0] * (DLOOP_Offset) ints[1] * size;
	    tfp->lb      = min_lb;
	    tfp->ub      = max_ub;
	    tfp->true_lb = min_lb + (true_lb - lb);
	    tfp->true_ub = max_ub + (true_ub - ub);
	    tfp->extent  = tfp->ub - tfp->lb;
	    break;
	case MPI_COMBINER_HINDEXED_BLOCK:
	    /* prime min_lb and max_ub */
	    DLOOP_DATATYPE_BLOCK_LB_UB(ints[1] /* blklen */,
				       (DLOOP_Offset) ints[2] /* disp */,
				       lb, ub, extent,
				       min_lb, max_ub);

	    for (i=1; i < ints[0]; i++) {
		DLOOP_DATATYPE_BLOCK_LB_UB(ints[1] /* blklen */,
					   (DLOOP_Offset) ints[i+2] /* disp */,
					   lb, ub, extent,
					   tmp_lb, tmp_ub);
		if (tmp_lb < min_lb) min_lb = tmp_lb;
		if (tmp_ub > max_ub) max_ub = tmp_ub;
	    }
	    tfp->size    = (DLOOP_Offset) ints[0] * (DLOOP_Offset) ints[1] * size;
	    tfp->lb      = min_lb;
	    tfp->ub      = max_ub;
	    tfp->true_lb = min_lb + (true_lb - lb);
	    tfp->true_ub = max_ub + (true_ub - ub);
	    tfp->extent  = tfp->ub - tfp->lb;
	    break;
	case MPI_COMBINER_INDEXED:
	case MPI_COMBINER_HINDEXED_INTEGER:
	case MPI_COMBINER_HINDEXED:
	    /* find first non-zero blocklength element */
	    for (i=0; i < ints[0] && ints[i+1] == 0; i++);
	    if (i == ints[0]) {
		/* all zero blocklengths */
		tfp->size = tfp->lb = tfp->ub = tfp->extent = tfp->alignsz = 0;
		tfp->has_sticky_lb = tfp->has_sticky_ub = 0;
	    }
	    else {
		/* prime min_lb, max_ub, count */
		ntypes = ints[i+1];
		if (combiner == MPI_COMBINER_INDEXED)
		    disp = (DLOOP_Offset) ints[ints[0]+i+1] * extent;
		else if (combiner == MPI_COMBINER_HINDEXED_INTEGER)
		    disp = (DLOOP_Offset) ints[ints[0]+i+1];
		else /* MPI_COMBINER_HINDEXED */
		    disp = aints[i];

		DLOOP_DATATYPE_BLOCK_LB_UB(ints[i+1] /* blklen */,
					   disp,
					   lb, ub, extent,
					   min_lb, max_ub);

		for (i++; i < ints[0]; i++) {
		    /* skip zero blocklength elements */
		    if (ints[i+1] == 0) continue;

		    ntypes += ints[i+1];
		    if (combiner == MPI_COMBINER_INDEXED)
			disp = (DLOOP_Offset) ints[ints[0]+i+1] * extent;
		    else if (combiner == MPI_COMBINER_HINDEXED_INTEGER)
			disp = (DLOOP_Offset) ints[ints[0]+i+1];
		    else /* MPI_COMBINER_HINDEXED */
			disp = aints[i];

		    DLOOP_DATATYPE_BLOCK_LB_UB(ints[i+1],
					       disp,
					       lb, ub, extent,
					       tmp_lb, tmp_ub);
		    if (tmp_lb < min_lb) min_lb = tmp_lb;
		    if (tmp_ub > max_ub) max_ub = tmp_ub;
		}
		tfp->size    = ntypes * size;
		tfp->lb      = min_lb;
		tfp->ub      = max_ub;
		tfp->true_lb = min_lb + (true_lb - lb);
		tfp->true_ub = max_ub + (true_ub - ub);
		tfp->extent  = tfp->ub - tfp->lb;
	    }
	    break;
	case MPI_COMBINER_STRUCT_INTEGER:
	    DLOOP_Assert(combiner != MPI_COMBINER_STRUCT_INTEGER);
	    break;
	case MPI_COMBINER_STRUCT:
	    /* sufficiently complicated to pull out into separate fn */
	    DLOOP_Type_calc_footprint_struct(type,
					     combiner, ints, aints, types,
					     tfp);
	    break;
	case MPI_COMBINER_SUBARRAY:
	    ndims = ints[0];
	    MPIDU_Type_convert_subarray(ndims,
						  &ints[1] /* sizes */,
						  &ints[1+ndims] /* subsz */,
						  &ints[1+2*ndims] /* strts */,
						  ints[1+3*ndims] /* order */,
						  types[0],
						  &tmptype);
	    MPIDU_Type_calc_footprint(tmptype, tfp);
	    MPIR_Type_free_impl(&tmptype);
	    break;
	case MPI_COMBINER_DARRAY:
	    ndims = ints[2];

	    MPIDU_Type_convert_darray(ints[0] /* size */,
						ints[1] /* rank */,
						ndims,
						&ints[3] /* gsizes */,
						&ints[3+ndims] /*distribs */,
						&ints[3+2*ndims] /* dargs */,
						&ints[3+3*ndims] /* psizes */,
						ints[3+4*ndims] /* order */,
						types[0],
						&tmptype);

	    MPIDU_Type_calc_footprint(tmptype, tfp);
	    MPIR_Type_free_impl(&tmptype);
	    break;
	case MPI_COMBINER_F90_REAL:
	case MPI_COMBINER_F90_COMPLEX:
	case MPI_COMBINER_F90_INTEGER:
	default:
	    DLOOP_Assert(0);
	    break;
    }

 clean_exit:
    MPIDU_Type_release_contents(type, &ints, &aints, &types);
    return;
}
Exemple #10
0
/* from MPICH PAC_C_MAX_INTEGER_ALIGN test:
 *
 * Tests for max C struct integer alignment. Note that this is for *all*
 * integer types.
 *
 * Return value is 1, 2, 4, or 8.
 */
static int DLOOP_Structalign_integer_max()
{
    int is_packed  = 1;
    int is_two     = 1;
    int is_four    = 1;
    int is_eight   = 1;

    int size, extent;

    struct { char a; int b; } char_int;
    struct { char a; short b; } char_short;
    struct { char a; long b; } char_long;
    struct { char a; int b; char c; } char_int_char;
    struct { char a; short b; char c; } char_short_char;
#ifdef HAVE_LONG_LONG_INT
    struct { long long int a; char b; } lli_c;
    struct { char a; long long int b; } c_lli;
    int extent2;
#endif

    /* assume max integer alignment isn't 8 if we don't have
     * an eight-byte value.
     */
#ifdef HAVE_LONG_LONG_INT
    if (sizeof(int) < 8 && sizeof(long) < 8 && sizeof(long long int) < 8)
	is_eight = 0;
#else
    if (sizeof(int) < 8 && sizeof(long) < 8) is_eight = 0;
#endif

    size = sizeof(char) + sizeof(int);
    extent = sizeof(char_int);
    if (size != extent) is_packed = 0;
    if ((extent % 2) != 0) is_two = 0;
    if ((extent % 4) != 0) is_four = 0;
    if (sizeof(int) == 8 && (extent % 8) != 0) is_eight = 0;

    size = sizeof(char) + sizeof(short);
    extent = sizeof(char_short);
    if (size != extent) is_packed = 0;
    if ((extent % 2) != 0) is_two = 0;
    if (sizeof(short) == 4 && (extent % 4) != 0) is_four = 0;
    if (sizeof(short) == 8 && (extent % 8) != 0) is_eight = 0;

    size = sizeof(char) + sizeof(long);
    extent = sizeof(char_long);
    if (size != extent) is_packed = 0;
    if ((extent % 2) != 0) is_two = 0;
    if ((extent % 4) != 0) is_four = 0;
    if (sizeof(long) == 8 && (extent % 8) != 0) is_eight = 0;

#ifdef HAVE_LONG_LONG_INT
    size = sizeof(char) + sizeof(long long int);
    extent = sizeof(lli_c);
    extent2 = sizeof(c_lli);
    if (size != extent) is_packed = 0;
    if ((extent % 2) != 0 && (extent2 % 2) != 0) is_two = 0;
    if ((extent % 4) != 0 && (extent2 % 4) != 0) is_four = 0;
    if (sizeof(long long int) >= 8 && (extent % 8) != 0 && (extent2 % 8) != 0)
	is_eight = 0;
#endif

    size = sizeof(char) + sizeof(int) + sizeof(char);
    extent = sizeof(char_int_char);
    if (size != extent) is_packed = 0;
    if ((extent % 2) != 0) is_two = 0;
    if ((extent % 4) != 0) is_four = 0;
    if (sizeof(int) == 8 && (extent % 8) != 0) is_eight = 0;

    size = sizeof(char) + sizeof(short) + sizeof(char);
    extent = sizeof(char_short_char);
    if (size != extent) is_packed = 0;
    if ((extent % 2) != 0) is_two = 0;
    if (sizeof(short) == 4 && (extent % 4) != 0) is_four = 0;
    if (sizeof(short) == 8 && (extent % 8) != 0) is_eight = 0;

    if (is_eight) { is_four = 0; is_two = 0; }
    if (is_four) is_two = 0;

    DLOOP_Assert(is_packed + is_two + is_four + is_eight == 1);

    if (is_packed) return 1;
    if (is_two)    return 2;
    if (is_four)   return 4;
    return 8;
}
/* DLOOP_Type_indexed_array_copy()
 *
 * Copies arrays into place, combining adjacent contiguous regions and
 * dropping zero-length regions.
 *
 * Extent passed in is for the original type.
 *
 * Output displacements are always output in bytes, while block
 * lengths are always output in terms of the base type.
 */
static void DLOOP_Type_indexed_array_copy(DLOOP_Count count,
					  DLOOP_Count contig_count,
					  int *in_blklen_array,
					  void *in_disp_array,
					  DLOOP_Count *out_blklen_array,
					  DLOOP_Offset *out_disp_array,
					  int dispinbytes,
					  DLOOP_Offset old_extent)
{
    DLOOP_Count i, cur_idx = 0;

    out_blklen_array[0] = (DLOOP_Count) in_blklen_array[0];

    if (!dispinbytes)
    {
	out_disp_array[0] = (DLOOP_Offset)
	    ((int *) in_disp_array)[0] * old_extent;
	
	for (i = 1; i < count; i++)
	{
	    if (in_blklen_array[i] == 0)
	    {
		continue;
	    }
	    else if (out_disp_array[cur_idx] +
		     ((DLOOP_Offset) out_blklen_array[cur_idx]) * old_extent ==
		     ((DLOOP_Offset) ((int *) in_disp_array)[i]) * old_extent)
	    {
		/* adjacent to current block; add to block */
		out_blklen_array[cur_idx] += (DLOOP_Count) in_blklen_array[i];
	    }
	    else
	    {
		cur_idx++;
		DLOOP_Assert(cur_idx < contig_count);
		out_disp_array[cur_idx] =
		    ((DLOOP_Offset) ((int *) in_disp_array)[i]) * old_extent;
		out_blklen_array[cur_idx] = in_blklen_array[i];
	    }
	}
    }
    else /* input displacements already in bytes */
    {
	out_disp_array[0] = (DLOOP_Offset) ((MPI_Aint *) in_disp_array)[0];
	
	for (i = 1; i < count; i++)
	{
	    if (in_blklen_array[i] == 0)
	    {
		continue;
	    }
	    else if (out_disp_array[cur_idx] +
		     ((DLOOP_Offset) out_blklen_array[cur_idx]) * old_extent ==
		     ((DLOOP_Offset) ((MPI_Aint *) in_disp_array)[i]))
	    {
		/* adjacent to current block; add to block */
		out_blklen_array[cur_idx] += in_blklen_array[i];
	    }
	    else
	    {
		cur_idx++;
		DLOOP_Assert(cur_idx < contig_count);
		out_disp_array[cur_idx] =
		    (DLOOP_Offset) ((MPI_Aint *) in_disp_array)[i];
		out_blklen_array[cur_idx] = (DLOOP_Count) in_blklen_array[i];
	    }
	}
    }

    DLOOP_Assert(cur_idx == contig_count - 1);
    return;
}
/* DLOOP_Type_indexed_array_copy()
 *
 * Copies arrays into place, combining adjacent contiguous regions and
 * dropping zero-length regions.
 *
 * Extent passed in is for the original type.
 *
 * Output displacements are always output in bytes, while block
 * lengths are always output in terms of the base type.
 */
static void DLOOP_Type_indexed_array_copy(DLOOP_Count count,
					  DLOOP_Count contig_count,
					  const DLOOP_Size *in_blklen_array,
					  const void *in_disp_array,
					  DLOOP_Count *out_blklen_array,
					  DLOOP_Offset *out_disp_array,
					  int dispinbytes,
					  DLOOP_Offset old_extent)
{
    DLOOP_Count i, first, cur_idx = 0;

    /* Skip any initial zero-length blocks */
    for (first = 0; first < count; ++first)
        if ((DLOOP_Count) in_blklen_array[first])
            break;
    
    out_blklen_array[0] = (DLOOP_Count) in_blklen_array[first];

    if (!dispinbytes)
    {
	out_disp_array[0] = (DLOOP_Offset)
	    ((int *) in_disp_array)[first] * old_extent;
	
	for (i = first+1; i < count; ++i)
	{
	    if (in_blklen_array[i] == 0)
	    {
		continue;
	    }
	    else if (out_disp_array[cur_idx] +
		     ((DLOOP_Offset) out_blklen_array[cur_idx]) * old_extent ==
		     ((DLOOP_Offset) ((int *) in_disp_array)[i]) * old_extent)
	    {
		/* adjacent to current block; add to block */
		out_blklen_array[cur_idx] += (DLOOP_Count) in_blklen_array[i];
	    }
	    else
	    {
		cur_idx++;
		DLOOP_Assert(cur_idx < contig_count);
		out_disp_array[cur_idx] =
		    ((DLOOP_Offset) ((int *) in_disp_array)[i]) * old_extent;
		out_blklen_array[cur_idx] = in_blklen_array[i];
	    }
	}
    }
    else /* input displacements already in bytes */
    {
	out_disp_array[0] = (DLOOP_Offset) ((MPI_Aint *) in_disp_array)[first];
	
	for (i = first+1; i < count; ++i)
	{
	    if (in_blklen_array[i] == 0)
	    {
		continue;
	    }
	    else if (out_disp_array[cur_idx] +
		     ((DLOOP_Offset) out_blklen_array[cur_idx]) * old_extent ==
		     ((DLOOP_Offset) ((MPI_Aint *) in_disp_array)[i]))
	    {
		/* adjacent to current block; add to block */
		out_blklen_array[cur_idx] += in_blklen_array[i];
	    }
	    else
	    {
		cur_idx++;
		DLOOP_Assert(cur_idx < contig_count);
		out_disp_array[cur_idx] =
		    (DLOOP_Offset) ((MPI_Aint *) in_disp_array)[i];
		out_blklen_array[cur_idx] = (DLOOP_Count) in_blklen_array[i];
	    }
	}
    }

    DLOOP_Assert(cur_idx == contig_count - 1);
    return;
}
Exemple #13
0
/*@
  Dataloop_update - update pointers after a copy operation

Input Parameters:
+ dataloop - pointer to loop to update
- ptrdiff - value indicating offset between old and new pointer values

  This function is used to recursively update all the pointers in a
  dataloop tree.
@*/
void PREPEND_PREFIX(Dataloop_update)(DLOOP_Dataloop *dataloop,
				     DLOOP_Offset ptrdiff)
{
    /* OPT: only declare these variables down in the Struct case */
    int i;
    DLOOP_Dataloop **looparray;

    switch(dataloop->kind & DLOOP_KIND_MASK) {
	case DLOOP_KIND_CONTIG:
	case DLOOP_KIND_VECTOR:
	    /*
	     * All these really ugly assignments are really of the form:
	     *
	     * ((char *) dataloop->loop_params.c_t.loop) += ptrdiff;
	     *
	     * However, some compilers spit out warnings about casting on the
	     * LHS, so we get this much nastier form instead (using common
	     * struct for contig and vector):
	     */

	    if (!(dataloop->kind & DLOOP_FINAL_MASK)) {
		DLOOP_Assert(dataloop->loop_params.cm_t.dataloop);

		DLOOP_Ensure_Offset_fits_in_pointer(DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) dataloop->loop_params.cm_t.dataloop + ptrdiff);
	    
		dataloop->loop_params.cm_t.dataloop =
		    (DLOOP_Dataloop *) DLOOP_OFFSET_CAST_TO_VOID_PTR
		    (DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) dataloop->loop_params.cm_t.dataloop + ptrdiff);

		PREPEND_PREFIX(Dataloop_update)(dataloop->loop_params.cm_t.dataloop, ptrdiff);
	    }
	    break;

	case DLOOP_KIND_BLOCKINDEXED:
	    DLOOP_Assert(dataloop->loop_params.bi_t.offset_array);

	    DLOOP_Ensure_Offset_fits_in_pointer(DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) dataloop->loop_params.bi_t.offset_array + ptrdiff);

	    dataloop->loop_params.bi_t.offset_array =
		(DLOOP_Offset *) DLOOP_OFFSET_CAST_TO_VOID_PTR
		(DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) dataloop->loop_params.bi_t.offset_array + ptrdiff);

	    if (!(dataloop->kind & DLOOP_FINAL_MASK)) {
		DLOOP_Assert(dataloop->loop_params.bi_t.dataloop);

		DLOOP_Ensure_Offset_fits_in_pointer(DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) dataloop->loop_params.bi_t.dataloop + ptrdiff);

		dataloop->loop_params.bi_t.dataloop =
		    (DLOOP_Dataloop *) DLOOP_OFFSET_CAST_TO_VOID_PTR
		    (DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) dataloop->loop_params.bi_t.dataloop + ptrdiff);

		PREPEND_PREFIX(Dataloop_update)(dataloop->loop_params.bi_t.dataloop, ptrdiff);
	    }
	    break;

	case DLOOP_KIND_INDEXED:
	    DLOOP_Assert(dataloop->loop_params.i_t.blocksize_array);

	    DLOOP_Ensure_Offset_fits_in_pointer(DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) dataloop->loop_params.i_t.blocksize_array + ptrdiff);

	    dataloop->loop_params.i_t.blocksize_array =
		(DLOOP_Count *) DLOOP_OFFSET_CAST_TO_VOID_PTR
		(DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) dataloop->loop_params.i_t.blocksize_array + ptrdiff);

	    DLOOP_Assert(dataloop->loop_params.i_t.offset_array);

	    DLOOP_Ensure_Offset_fits_in_pointer(DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) dataloop->loop_params.i_t.offset_array + ptrdiff);

	    dataloop->loop_params.i_t.offset_array =
		(DLOOP_Offset *) DLOOP_OFFSET_CAST_TO_VOID_PTR
		(DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) dataloop->loop_params.i_t.offset_array + ptrdiff);

	    if (!(dataloop->kind & DLOOP_FINAL_MASK)) {
		DLOOP_Assert(dataloop->loop_params.i_t.dataloop);

		DLOOP_Ensure_Offset_fits_in_pointer(DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) dataloop->loop_params.i_t.dataloop + ptrdiff);

		dataloop->loop_params.i_t.dataloop =
		    (DLOOP_Dataloop *) DLOOP_OFFSET_CAST_TO_VOID_PTR
		    (DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) dataloop->loop_params.i_t.dataloop + ptrdiff);

		PREPEND_PREFIX(Dataloop_update)(dataloop->loop_params.i_t.dataloop, ptrdiff);
	    }
	    break;

	case DLOOP_KIND_STRUCT:
	    DLOOP_Assert(dataloop->loop_params.s_t.blocksize_array);

	    DLOOP_Ensure_Offset_fits_in_pointer(DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) dataloop->loop_params.s_t.blocksize_array + ptrdiff);

	    dataloop->loop_params.s_t.blocksize_array =
		(DLOOP_Count *) DLOOP_OFFSET_CAST_TO_VOID_PTR
		(DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) dataloop->loop_params.s_t.blocksize_array + ptrdiff);

	    DLOOP_Assert(dataloop->loop_params.s_t.offset_array);

	    DLOOP_Ensure_Offset_fits_in_pointer(DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) dataloop->loop_params.s_t.offset_array + ptrdiff);

	    dataloop->loop_params.s_t.offset_array =
		(DLOOP_Offset *) DLOOP_OFFSET_CAST_TO_VOID_PTR
		(DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) dataloop->loop_params.s_t.offset_array + ptrdiff);

	    if (dataloop->kind & DLOOP_FINAL_MASK) break;

	    DLOOP_Assert(dataloop->loop_params.s_t.dataloop_array);

	    DLOOP_Ensure_Offset_fits_in_pointer(DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) dataloop->loop_params.s_t.dataloop_array + ptrdiff);

	    dataloop->loop_params.s_t.dataloop_array =
		(DLOOP_Dataloop **) DLOOP_OFFSET_CAST_TO_VOID_PTR
		(DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) dataloop->loop_params.s_t.dataloop_array + ptrdiff);

	    /* fix the N dataloop pointers too */
	    looparray = dataloop->loop_params.s_t.dataloop_array;
	    for (i=0; i < dataloop->loop_params.s_t.count; i++) {
		DLOOP_Assert(looparray[i]);

		DLOOP_Ensure_Offset_fits_in_pointer(DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) looparray[i] + ptrdiff);

		looparray[i] = (DLOOP_Dataloop *) DLOOP_OFFSET_CAST_TO_VOID_PTR
		    (DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) looparray[i] + ptrdiff);
	    }

	    for (i=0; i < dataloop->loop_params.s_t.count; i++) {
		PREPEND_PREFIX(Dataloop_update)(looparray[i], ptrdiff);
	    }
	    break;
	default:
	    /* --BEGIN ERROR HANDLING-- */
	    DLOOP_Assert(0);
	    break;
	    /* --END ERROR HANDLING-- */
    }
    return;
}
Exemple #14
0
static int DLOOP_Leaf_index_mpi_flatten(DLOOP_Offset *blocks_p,
					DLOOP_Count count,
					DLOOP_Count *blockarray,
					DLOOP_Offset *offsetarray,
					DLOOP_Type el_type,
					DLOOP_Offset rel_off,
					void *bufp,
					void *v_paramp)
{
    int i;
    DLOOP_Size size, blocks_left;
    DLOOP_Offset el_size;
    struct PREPEND_PREFIX(mpi_flatten_params) *paramp = v_paramp;

    DLOOP_Handle_get_size_macro(el_type, el_size);
    blocks_left = *blocks_p;

    for (i=0; i < count && blocks_left > 0; i++) {
	int last_idx;
	char *last_end = NULL;

	if (blocks_left > blockarray[i]) {
	    size = blockarray[i] * el_size;
	    blocks_left -= blockarray[i];
	}
	else {
	    /* last pass */
	    size = blocks_left *  el_size;
	    blocks_left = 0;
	}

	last_idx = paramp->index - 1;
	if (last_idx >= 0) {
	    /* Since disps can be negative, we cannot use
	     * DLOOP_Ensure_Offset_fits_in_pointer to verify that disps +
	     * blklens fits in a pointer.  Nor can we use
	     * DLOOP_OFFSET_CAST_TO_VOID_PTR to cast the sum to a pointer.
	     * Just let it truncate, if the sizeof a pointer is less
	     * than the sizeof an MPI_Aint.
	     */
	    last_end = (char *) DLOOP_OFFSET_CAST_TO_VOID_PTR
		       (paramp->disps[last_idx] +
			(MPI_Aint)(paramp->blklens[last_idx]));
	}

	/* Since bufp can be a displacement and can be negative, we
	 * cannot use DLOOP_Ensure_Offset_fits_in_pointer to ensure the
	 * sum fits in a pointer.  Just let it truncate.
	 */
        if ((last_idx == paramp->length-1) &&
            (last_end != ((char *) bufp + rel_off + offsetarray[i])))
	{
	    /* we have used up all our entries, and this one doesn't fit on
	     * the end of the last one.
	     */
	    *blocks_p -= (blocks_left + (size /  el_size));
	    return 1;
	}
        else if (last_idx >= 0 && (last_end == ((char *) bufp + rel_off + offsetarray[i])))
	{
	    /* add this size to the last vector rather than using up new one */
	    paramp->blklens[last_idx] += size;
	}
	else {
	    /* Since bufp can be a displacement and can be negative, we cannot
	     * use DLOOP_VOID_PTR_CAST_TO_OFFSET to cast the sum to a pointer.
	     * Just let it sign extend.
	     */
            paramp->disps[last_idx+1]   = DLOOP_PTR_DISP_CAST_TO_OFFSET bufp +
		rel_off + offsetarray[i];
	    paramp->blklens[last_idx+1] = size; /* these blocks are in bytes */
	    paramp->index++;
	}
    }

    /* if we get here then we processed ALL the blocks; don't need to update
     * blocks_p
     */
    DLOOP_Assert(blocks_left == 0);
    return 0;
}
Exemple #15
0
/*@
  Dataloop_alloc_and_copy - allocate the resources used to store a
                            dataloop and copy in old dataloop as
			    appropriate

Input Parameters:
+ kind          - kind of dataloop to allocate
. count         - number of elements in dataloop (kind dependent)
. old_loop      - pointer to old dataloop (or NULL for none)
. old_loop_sz   - size of old dataloop (should be zero if old_loop is NULL)
. new_loop_p    - address at which to store new dataloop pointer
- new_loop_sz_p - pointer to integer in which to store new loop size

  Notes:
  The count parameter passed into this function will often be different
  from the count passed in at the MPI layer.
@*/
void PREPEND_PREFIX(Dataloop_alloc_and_copy)(int kind,
					     DLOOP_Count count,
					     DLOOP_Dataloop *old_loop,
					     DLOOP_Size old_loop_sz,
					     DLOOP_Dataloop **new_loop_p,
					     DLOOP_Size *new_loop_sz_p)
{
    DLOOP_Size new_loop_sz = 0;
    int align_sz = 8; /* default aligns everything to 8-byte boundaries */
    int epsilon;
    DLOOP_Size loop_sz = sizeof(DLOOP_Dataloop);
    DLOOP_Size off_sz = 0, blk_sz = 0, ptr_sz = 0, extent_sz = 0;

    char *pos;
    DLOOP_Dataloop *new_loop;

#ifdef HAVE_MAX_STRUCT_ALIGNMENT
    if (align_sz > HAVE_MAX_STRUCT_ALIGNMENT) {
	align_sz = HAVE_MAX_STRUCT_ALIGNMENT;
    }
#endif

    if (old_loop != NULL) {
	DLOOP_Assert((old_loop_sz % align_sz) == 0);
    }

    /* calculate the space that we actually need for everything */
    switch (kind) {
	case DLOOP_KIND_STRUCT:
	    /* need space for dataloop pointers and extents */
	    ptr_sz = count * sizeof(DLOOP_Dataloop *);
	    extent_sz = count * sizeof(DLOOP_Offset);
	case DLOOP_KIND_INDEXED:
	    /* need space for block sizes */
	    blk_sz = count * sizeof(DLOOP_Count);
	case DLOOP_KIND_BLOCKINDEXED:
	    /* need space for block offsets */
	    off_sz = count * sizeof(DLOOP_Offset);
	case DLOOP_KIND_CONTIG:
	case DLOOP_KIND_VECTOR:
	    break;
	default:
	    DLOOP_Assert(0);
    }

    /* pad everything that we're going to allocate */
    epsilon = loop_sz % align_sz;
    if (epsilon) loop_sz += align_sz - epsilon;

    epsilon = off_sz % align_sz;
    if (epsilon) off_sz += align_sz - epsilon;

    epsilon = blk_sz % align_sz;
    if (epsilon) blk_sz += align_sz - epsilon;

    epsilon = ptr_sz % align_sz;
    if (epsilon) ptr_sz += align_sz - epsilon;

    epsilon = extent_sz % align_sz;
    if (epsilon) extent_sz += align_sz - epsilon;

    new_loop_sz += loop_sz + off_sz + blk_sz + ptr_sz +
	extent_sz + old_loop_sz;

    /* allocate space */
    new_loop = (DLOOP_Dataloop *) DLOOP_Malloc(new_loop_sz);
    if (new_loop == NULL) {
	*new_loop_p = NULL;
	return;
    }

#ifdef DLOOP_DEBUG_MEMORY
    DLOOP_dbg_printf("DLOOP_Dataloop_alloc_and_copy: new loop @ %x (tot sz = %z, loop = %z, off = %z, blk = %z, ptr = %z, extent = %z, old = %z)\n",
		     (int) new_loop,
		     new_loop_sz,
		     loop_sz,
		     off_sz,
		     blk_sz,
		     ptr_sz,
		     extent_sz,
		     old_loop_sz);
#endif

    /* set all the pointers in the new dataloop structure */
    switch (kind) {
	case DLOOP_KIND_STRUCT:
	    /* order is:
	     * - pointers
	     * - blocks
	     * - offsets
	     * - extents
	     */
	    new_loop->loop_params.s_t.dataloop_array =
		(DLOOP_Dataloop **) (((char *) new_loop) + loop_sz);
	    new_loop->loop_params.s_t.blocksize_array =
		(DLOOP_Count *) (((char *) new_loop) + loop_sz + ptr_sz);
	    new_loop->loop_params.s_t.offset_array =
		(DLOOP_Offset *) (((char *) new_loop) + loop_sz +
				  ptr_sz + blk_sz);
	    new_loop->loop_params.s_t.el_extent_array =
		(DLOOP_Offset *) (((char *) new_loop) + loop_sz +
				  ptr_sz + blk_sz + off_sz);
	    break;
	case DLOOP_KIND_INDEXED:
	    /* order is:
	     * - blocks
	     * - offsets
	     */
	    new_loop->loop_params.i_t.blocksize_array =
		(DLOOP_Count *) (((char *) new_loop) + loop_sz);
	    new_loop->loop_params.i_t.offset_array =
		(DLOOP_Offset *) (((char *) new_loop) + loop_sz + blk_sz);
	    if (old_loop == NULL) {
		new_loop->loop_params.i_t.dataloop = NULL;
	    }
	    else {
		new_loop->loop_params.i_t.dataloop =
		    (DLOOP_Dataloop *) (((char *) new_loop) +
					(new_loop_sz - old_loop_sz));
	    }
	    break;
	case DLOOP_KIND_BLOCKINDEXED:
	    new_loop->loop_params.bi_t.offset_array =
		(DLOOP_Offset *) (((char *) new_loop) + loop_sz);
	    if (old_loop == NULL) {
		new_loop->loop_params.bi_t.dataloop = NULL;
	    }
	    else {
		new_loop->loop_params.bi_t.dataloop =
		    (DLOOP_Dataloop *) (((char *) new_loop) +
					(new_loop_sz - old_loop_sz));
	    }
	    break;
	case DLOOP_KIND_CONTIG:
	    if (old_loop == NULL) {
		new_loop->loop_params.c_t.dataloop = NULL;
	    }
	    else {
		new_loop->loop_params.c_t.dataloop =
		    (DLOOP_Dataloop *) (((char *) new_loop) +
					(new_loop_sz - old_loop_sz));
	    }
	    break;
	case DLOOP_KIND_VECTOR:
	    if (old_loop == NULL) {
		new_loop->loop_params.v_t.dataloop = NULL;
	    }
	    else {
		new_loop->loop_params.v_t.dataloop =
		    (DLOOP_Dataloop *) (((char *) new_loop) +
					(new_loop_sz - old_loop_sz));
	    }
	    break;
	default:
	    DLOOP_Assert(0);
    }

    pos = ((char *) new_loop) + (new_loop_sz - old_loop_sz);
    if (old_loop != NULL) {
	PREPEND_PREFIX(Dataloop_copy)(pos, old_loop, old_loop_sz);
    }

    *new_loop_p    = new_loop;
    *new_loop_sz_p = new_loop_sz;
    return;
}
Exemple #16
0
/*@
  Dataloop_stream_size - return the size of the data described by the dataloop

Input Parameters:
+ dl_p   - pointer to dataloop for which we will return the size
- sizefn - function for determining size of types in the corresponding stream
           (passing NULL will instead result in el_size values being used)

@*/
DLOOP_Offset
PREPEND_PREFIX(Dataloop_stream_size)(struct DLOOP_Dataloop *dl_p,
				     DLOOP_Offset (*sizefn)(DLOOP_Type el_type))
{
    DLOOP_Offset tmp_sz, tmp_ct = 1;

    for (;;)
    {
        if ((dl_p->kind & DLOOP_KIND_MASK) == DLOOP_KIND_STRUCT)
        {
            int i;

            tmp_sz = 0;
            for (i = 0; i < dl_p->loop_params.s_t.count; i++)
            {
                tmp_sz += (DLOOP_Offset)(dl_p->loop_params.s_t.blocksize_array[i]) *
                    PREPEND_PREFIX(Dataloop_stream_size)(dl_p->loop_params.s_t.dataloop_array[i], sizefn);
            }
            return tmp_sz * tmp_ct;
        }

        switch (dl_p->kind & DLOOP_KIND_MASK) {
        case DLOOP_KIND_CONTIG:
            tmp_ct *= (DLOOP_Offset)(dl_p->loop_params.c_t.count);
#ifdef DLOOP_DEBUG_SIZE
            DLOOP_dbg_printf("stream_size: contig: ct = %d; new tot_ct = " DLOOP_OFFSET_FMT_DEC_SPEC "\n",
                             (int) dl_p->loop_params.c_t.count, (DLOOP_Offset) tmp_ct);
#endif
            break;
        case DLOOP_KIND_VECTOR:
            tmp_ct *= (DLOOP_Offset)(dl_p->loop_params.v_t.count) *
		      (DLOOP_Offset)(dl_p->loop_params.v_t.blocksize);
#ifdef DLOOP_DEBUG_SIZE
            DLOOP_dbg_printf("stream_size: vector: ct = %d; blk = %d; new tot_ct = " DLOOP_OFFSET_FMT_DEC_SPEC "\n",
                             (int) dl_p->loop_params.v_t.count,
                             (int) dl_p->loop_params.v_t.blocksize,
                             (DLOOP_Offset) tmp_ct);
#endif
            break;
        case DLOOP_KIND_BLOCKINDEXED:
            tmp_ct *= (DLOOP_Offset)(dl_p->loop_params.bi_t.count) *
		      (DLOOP_Offset)(dl_p->loop_params.bi_t.blocksize);
#ifdef DLOOP_DEBUG_SIZE
            DLOOP_dbg_printf("stream_size: blkindexed: blks = %d; new tot_ct = " DLOOP_OFFSET_FMT_DEC_SPEC "\n",
                             (int) dl_p->loop_params.bi_t.count *
                             (int) dl_p->loop_params.bi_t.blocksize,
                             (DLOOP_Offset) tmp_ct);
#endif
            break;
        case DLOOP_KIND_INDEXED:
            tmp_ct *= (DLOOP_Offset)(dl_p->loop_params.i_t.total_blocks);
#ifdef DLOOP_DEBUG_SIZE
            DLOOP_dbg_printf("stream_size: contig: blks = %d; new tot_ct = " DLOOP_OFFSET_FMT_DEC_SPEC "\n",
                             (int) dl_p->loop_params.i_t.total_blocks,
                             (DLOOP_Offset) tmp_ct);
#endif
            break;
        default:
            /* --BEGIN ERROR HANDLING-- */
            DLOOP_Assert(0);
            break;
            /* --END ERROR HANDLING-- */
        }

        if (dl_p->kind & DLOOP_FINAL_MASK) break;
        else {
            DLOOP_Assert(dl_p->loop_params.cm_t.dataloop != NULL);
            dl_p = dl_p->loop_params.cm_t.dataloop;
        }
    }

    /* call fn for size using bottom type, or use size if fnptr is NULL */
    tmp_sz = ((sizefn) ? sizefn(dl_p->el_type) : dl_p->el_size);

    return tmp_sz * tmp_ct;
}
Exemple #17
0
/*@
  Dataloop_print - dump a dataloop tree to stdout for debugging
  purposes

Input Parameters:
+ dataloop - root of tree to dump
- depth - starting depth; used to help keep up with where we are in the tree
@*/
void PREPEND_PREFIX(Dataloop_print)(struct DLOOP_Dataloop *dataloop,
				    int depth)
{
    int i;

    if (dataloop == NULL)
    {
        DLOOP_dbg_printf("dataloop is NULL (probably basic type)\n");
        return;
    }

    DLOOP_dbg_printf("loc=%p, treedepth=%d, kind=%d, el_extent=" DLOOP_OFFSET_FMT_DEC_SPEC "\n",
		     dataloop, (int) depth, (int) dataloop->kind, (DLOOP_Offset) dataloop->el_extent);
    switch(dataloop->kind & DLOOP_KIND_MASK) {
	case DLOOP_KIND_CONTIG:
	    DLOOP_dbg_printf("\tCONTIG: count=%d, datatype=%p\n",
			     (int) dataloop->loop_params.c_t.count,
			     dataloop->loop_params.c_t.dataloop);
	    if (!(dataloop->kind & DLOOP_FINAL_MASK))
		PREPEND_PREFIX(Dataloop_print)(dataloop->loop_params.c_t.dataloop, depth+1);
	    break;
	case DLOOP_KIND_VECTOR:
	    DLOOP_dbg_printf("\tVECTOR: count=%d, blksz=%d, stride=" DLOOP_OFFSET_FMT_DEC_SPEC ", datatype=%p\n",
			     (int) dataloop->loop_params.v_t.count,
			     (int) dataloop->loop_params.v_t.blocksize,
			     (DLOOP_Offset) dataloop->loop_params.v_t.stride,
			     dataloop->loop_params.v_t.dataloop);
	    if (!(dataloop->kind & DLOOP_FINAL_MASK))
		PREPEND_PREFIX(Dataloop_print)(dataloop->loop_params.v_t.dataloop, depth+1);
	    break;
	case DLOOP_KIND_BLOCKINDEXED:
	    DLOOP_dbg_printf("\tBLOCKINDEXED: count=%d, blksz=%d, datatype=%p\n",
			     (int) dataloop->loop_params.bi_t.count,
			     (int) dataloop->loop_params.bi_t.blocksize,
			     dataloop->loop_params.bi_t.dataloop);
	    /* print out offsets later */
	    if (!(dataloop->kind & DLOOP_FINAL_MASK))
		PREPEND_PREFIX(Dataloop_print)(dataloop->loop_params.bi_t.dataloop, depth+1);
	    break;
	case DLOOP_KIND_INDEXED:
	    DLOOP_dbg_printf("\tINDEXED: count=%d, datatype=%p\n",
			     (int) dataloop->loop_params.i_t.count,
			     dataloop->loop_params.i_t.dataloop);
	    /* print out blocksizes and offsets later */
	    if (!(dataloop->kind & DLOOP_FINAL_MASK))
		PREPEND_PREFIX(Dataloop_print)(dataloop->loop_params.i_t.dataloop, depth+1);
	    break;
	case DLOOP_KIND_STRUCT:
	    DLOOP_dbg_printf("\tSTRUCT: count=%d\n", (int) dataloop->loop_params.s_t.count);
	    DLOOP_dbg_printf("\tblocksizes:\n");
	    for (i=0; i < dataloop->loop_params.s_t.count; i++)
		DLOOP_dbg_printf("\t\t%d\n", (int) dataloop->loop_params.s_t.blocksize_array[i]);
	    DLOOP_dbg_printf("\toffsets:\n");
	    for (i=0; i < dataloop->loop_params.s_t.count; i++)
		DLOOP_dbg_printf("\t\t" DLOOP_OFFSET_FMT_DEC_SPEC "\n", (DLOOP_Offset) dataloop->loop_params.s_t.offset_array[i]);
	    DLOOP_dbg_printf("\tdatatypes:\n");
	    for (i=0; i < dataloop->loop_params.s_t.count; i++)
		DLOOP_dbg_printf("\t\t%p\n", dataloop->loop_params.s_t.dataloop_array[i]);
	    if (dataloop->kind & DLOOP_FINAL_MASK) break;

	    for (i=0; i < dataloop->loop_params.s_t.count; i++) {
		PREPEND_PREFIX(Dataloop_print)(dataloop->loop_params.s_t.dataloop_array[i],depth+1);
	    }
	    break;
	default:
	    DLOOP_Assert(0);
	    break;
    }
    return;
}
Exemple #18
0
/* DLOOP_Leaf_vector_mpi_flatten
 *
 * Input Parameters:
 * blocks_p - [inout] pointer to a count of blocks (total, for all noncontiguous pieces)
 * count    - # of noncontiguous regions
 * blksz    - size of each noncontiguous region
 * stride   - distance in bytes from start of one region to start of next
 * el_type - elemental type (e.g. MPI_INT)
 * ...
 *
 * Note: this is only called when the starting position is at the beginning
 * of a whole block in a vector type.
 *
 * TODO: MAKE THIS CODE SMARTER, USING THE SAME GENERAL APPROACH AS IN THE
 *       COUNT BLOCK CODE ABOVE.
 */
static int DLOOP_Leaf_vector_mpi_flatten(DLOOP_Offset *blocks_p,
					 DLOOP_Count count,
					 DLOOP_Count blksz,
					 DLOOP_Offset stride,
					 DLOOP_Type el_type,
					 DLOOP_Offset rel_off, /* offset into buffer */
					 void *bufp, /* start of buffer */
					 void *v_paramp)
{
    int i;
    DLOOP_Size size, blocks_left;
    DLOOP_Offset el_size;
    struct PREPEND_PREFIX(mpi_flatten_params) *paramp = v_paramp;

    DLOOP_Handle_get_size_macro(el_type, el_size);
    blocks_left = *blocks_p;

    for (i=0; i < count && blocks_left > 0; i++) {
	int last_idx;
	char *last_end = NULL;

	if (blocks_left > blksz) {
	    size = blksz * el_size;
	    blocks_left -= blksz;
	}
	else {
	    /* last pass */
	    size = blocks_left * el_size;
	    blocks_left = 0;
	}

	last_idx = paramp->index - 1;
	if (last_idx >= 0) {
	    /* Since disps can be negative, we cannot use
	     * DLOOP_Ensure_Offset_fits_in_pointer to verify that disps +
	     * blklens fits in a pointer.  Nor can we use
	     * DLOOP_OFFSET_CAST_TO_VOID_PTR to cast the sum to a pointer.
	     * Just let it truncate, if the sizeof a pointer is less
	     * than the sizeof an MPI_Aint.
	     */
	    last_end = (char *) DLOOP_OFFSET_CAST_TO_VOID_PTR
		       (paramp->disps[last_idx] +
			 (MPI_Aint)(paramp->blklens[last_idx]));
	}

	/* Since bufp can be a displacement and can be negative, we cannot use
	 * DLOOP_Ensure_Offset_fits_in_pointer to ensure the sum fits in a pointer.
	 * Just let it truncate.
	 */
        if ((last_idx == paramp->length-1) &&
            (last_end != ((char *) bufp + rel_off)))
	{
	    /* we have used up all our entries, and this one doesn't fit on
	     * the end of the last one.
	     */
	    *blocks_p -= (blocks_left + (size / el_size));
#ifdef MPID_SP_VERBOSE
	    MPL_DBG_MSG_FMT(MPIR_DBG_DATATYPE,VERBOSE,(MPL_DBG_FDEST,"\t[vector to vec exiting (1): next ind = %d, " DLOOP_OFFSET_FMT_DEC_SPEC " blocks processed.\n",
			    paramp->u.pack_vector.index,
                            *blocks_p));
#endif
	    return 1;
	}
        else if (last_idx >= 0 && (last_end == ((char *) bufp + rel_off)))
	{
	    /* add this size to the last vector rather than using up new one */
	    paramp->blklens[last_idx] += size;
	}
	else {
	    /* Since bufp can be a displacement and can be negative, we cannot use
	     * DLOOP_VOID_PTR_CAST_TO_OFFSET to cast the sum to a pointer.  Just let it
	     * sign extend.
	     */
            paramp->disps[last_idx+1]   = DLOOP_PTR_DISP_CAST_TO_OFFSET bufp + rel_off;
	    paramp->blklens[last_idx+1] = size;
	    paramp->index++;
	}

	rel_off += stride;
    }

#ifdef MPID_SP_VERBOSE
    MPL_DBG_MSG_FMT(MPIR_DBG_DATATYPE,VERBOSE,(MPL_DBG_FDEST,"\t[vector to vec exiting (2): next ind = %d, " DLOOP_OFFSET_FMT_DEC_SPEC " blocks processed.\n",
		    paramp->u.pack_vector.index,
                    *blocks_p));
#endif

    /* if we get here then we processed ALL the blocks; don't need to update
     * blocks_p
     */

    DLOOP_Assert(blocks_left == 0);
    return 0;
}
Exemple #19
0
void PREPEND_PREFIX(Dataloop_create)(MPI_Datatype type,
				     DLOOP_Dataloop **dlp_p,
				     int *dlsz_p,
				     int *dldepth_p,
				     int flag)
{
    int i;
    int err;

    int nr_ints, nr_aints, nr_types, combiner;
    MPI_Datatype *types;
    int *ints;
    MPI_Aint *aints;

    DLOOP_Dataloop *old_dlp;
    int old_dlsz, old_dldepth;

    int dummy1, dummy2, dummy3, type0_combiner, ndims;
    MPI_Datatype tmptype;

    MPI_Aint stride;
    MPI_Aint *disps;

    MPIR_Type_get_envelope_impl(type, &nr_ints, &nr_aints, &nr_types, &combiner);

    /* some named types do need dataloops; handle separately. */
    if (combiner == MPI_COMBINER_NAMED) {
	DLOOP_Dataloop_create_named(type, dlp_p, dlsz_p, dldepth_p, flag);
	return;
    }
    else if (combiner == MPI_COMBINER_F90_REAL ||
             combiner == MPI_COMBINER_F90_COMPLEX ||
             combiner == MPI_COMBINER_F90_INTEGER)
    {
        MPI_Datatype f90basetype;
        DLOOP_Handle_get_basic_type_macro(type, f90basetype);
        PREPEND_PREFIX(Dataloop_create_contiguous)(1 /* count */,
                                                   f90basetype,
                                                   dlp_p, dlsz_p,
                                                   dldepth_p,
                                                   flag);
        return;
    }

    /* Q: should we also check for "hasloop", or is the COMBINER
     *    check above enough to weed out everything that wouldn't
     *    have a loop?
     */
    DLOOP_Handle_get_loopptr_macro(type, old_dlp, flag);
    if (old_dlp != NULL) {
	/* dataloop already created; just return it. */
	*dlp_p = old_dlp;
	DLOOP_Handle_get_loopsize_macro(type, *dlsz_p, flag);
	DLOOP_Handle_get_loopdepth_macro(type, *dldepth_p, flag);
	return;
    }

    PREPEND_PREFIX(Type_access_contents)(type, &ints, &aints, &types);

    /* first check for zero count on types where that makes sense */
    switch(combiner) {
	case MPI_COMBINER_CONTIGUOUS:
	case MPI_COMBINER_VECTOR:
	case MPI_COMBINER_HVECTOR_INTEGER:
	case MPI_COMBINER_HVECTOR:
	case MPI_COMBINER_INDEXED_BLOCK:
	case MPI_COMBINER_HINDEXED_BLOCK:
	case MPI_COMBINER_INDEXED:
	case MPI_COMBINER_HINDEXED_INTEGER:
	case MPI_COMBINER_HINDEXED:
	case MPI_COMBINER_STRUCT_INTEGER:
	case MPI_COMBINER_STRUCT:
	    if (ints[0] == 0) {
		PREPEND_PREFIX(Dataloop_create_contiguous)(0,
							   MPI_INT,
							   dlp_p,
							   dlsz_p,
							   dldepth_p,
							   flag);
		goto clean_exit;
	    }
	    break;
	default:
	    break;
    }

    /* recurse, processing types "below" this one before processing
     * this one, if those type don't already have dataloops.
     *
     * note: in the struct case below we'll handle any additional
     *       types "below" the current one.
     */
    MPIR_Type_get_envelope_impl(types[0], &dummy1, &dummy2, &dummy3, &type0_combiner);
    if (type0_combiner != MPI_COMBINER_NAMED)
    {
	DLOOP_Handle_get_loopptr_macro(types[0], old_dlp, flag);
	if (old_dlp == NULL)
	{
	    /* no dataloop already present; create and store one */
	    PREPEND_PREFIX(Dataloop_create)(types[0],
					    &old_dlp,
					    &old_dlsz,
					    &old_dldepth,
					    flag);

	    DLOOP_Handle_set_loopptr_macro(types[0], old_dlp, flag);
	    DLOOP_Handle_set_loopsize_macro(types[0], old_dlsz, flag);
	    DLOOP_Handle_set_loopdepth_macro(types[0], old_dldepth, flag);
	}
	else {
	    DLOOP_Handle_get_loopsize_macro(types[0], old_dlsz, flag);
	    DLOOP_Handle_get_loopdepth_macro(types[0], old_dldepth, flag);
	}
    }
       
    switch(combiner)
    {
	case MPI_COMBINER_DUP:
	    if (type0_combiner != MPI_COMBINER_NAMED) {
		PREPEND_PREFIX(Dataloop_dup)(old_dlp, old_dlsz, dlp_p);
		*dlsz_p    = old_dlsz;
		*dldepth_p = old_dldepth;
	    }
	    else {
		PREPEND_PREFIX(Dataloop_create_contiguous)(1,
							   types[0], 
							   dlp_p, dlsz_p,
							   dldepth_p,
							   flag);
	    }
	    break;
	case MPI_COMBINER_RESIZED:
	    if (type0_combiner != MPI_COMBINER_NAMED) {
		PREPEND_PREFIX(Dataloop_dup)(old_dlp, old_dlsz, dlp_p);
		*dlsz_p    = old_dlsz;
		*dldepth_p = old_dldepth;
	    }
	    else {
		PREPEND_PREFIX(Dataloop_create_contiguous)(1,
							   types[0], 
							   dlp_p, dlsz_p,
							   dldepth_p,
							   flag);

		(*dlp_p)->el_extent = aints[1]; /* extent */
	    }
	    break;
	case MPI_COMBINER_CONTIGUOUS:
	    PREPEND_PREFIX(Dataloop_create_contiguous)(ints[0] /* count */,
						       types[0] /* oldtype */,
						       dlp_p, dlsz_p,
						       dldepth_p,
						       flag);
	    break;
	case MPI_COMBINER_VECTOR:
	    PREPEND_PREFIX(Dataloop_create_vector)(ints[0] /* count */,
						   ints[1] /* blklen */,
						   ints[2] /* stride */,
						   0 /* stride not bytes */,
						   types[0] /* oldtype */,
						   dlp_p, dlsz_p, dldepth_p,
						   flag);
	    break;
	case MPI_COMBINER_HVECTOR_INTEGER:
	case MPI_COMBINER_HVECTOR:
	    /* fortran hvector has integer stride in bytes */
	    if (combiner == MPI_COMBINER_HVECTOR_INTEGER) {
		stride = (MPI_Aint) ints[2];
	    }
	    else {
		stride = aints[0];
	    }

	    PREPEND_PREFIX(Dataloop_create_vector)(ints[0] /* count */,
						   ints[1] /* blklen */,
						   stride,
						   1 /* stride in bytes */,
						   types[0] /* oldtype */,
						   dlp_p, dlsz_p, dldepth_p,
						   flag);
	    break;
	case MPI_COMBINER_INDEXED_BLOCK:
	    PREPEND_PREFIX(Dataloop_create_blockindexed)(ints[0] /* count */,
							 ints[1] /* blklen */,
							 &ints[2] /* disps */,
							 0 /* disp not bytes */,
							 types[0] /* oldtype */,
							 dlp_p, dlsz_p,
							 dldepth_p,
							 flag);
	    break;
	case MPI_COMBINER_HINDEXED_BLOCK:
            disps = (MPI_Aint *) DLOOP_Malloc(ints[0] * sizeof(MPI_Aint));
            for (i = 0; i < ints[0]; i++)
                disps[i] = aints[i];
	    PREPEND_PREFIX(Dataloop_create_blockindexed)(ints[0] /* count */,
							 ints[1] /* blklen */,
							 disps /* disps */,
							 1 /* disp is bytes */,
							 types[0] /* oldtype */,
							 dlp_p, dlsz_p,
							 dldepth_p,
							 flag);
            DLOOP_Free(disps);
	    break;
	case MPI_COMBINER_INDEXED:
	    PREPEND_PREFIX(Dataloop_create_indexed)(ints[0] /* count */,
						    &ints[1] /* blklens */,
						    &ints[ints[0]+1] /* disp */,
						    0 /* disp not in bytes */,
						    types[0] /* oldtype */,
						    dlp_p, dlsz_p, dldepth_p,
						    flag);
	    break;
	case MPI_COMBINER_HINDEXED_INTEGER:
	case MPI_COMBINER_HINDEXED:
	    if (combiner == MPI_COMBINER_HINDEXED_INTEGER) {
		disps = (MPI_Aint *) DLOOP_Malloc(ints[0] * sizeof(MPI_Aint));

		for (i=0; i < ints[0]; i++) {
		    disps[i] = (MPI_Aint) ints[ints[0] + 1 + i];
		}
	    }
	    else {
		disps = aints;
	    }

	    PREPEND_PREFIX(Dataloop_create_indexed)(ints[0] /* count */,
						    &ints[1] /* blklens */,
						    disps,
						    1 /* disp in bytes */,
						    types[0] /* oldtype */,
						    dlp_p, dlsz_p, dldepth_p,
						    flag);

	    if (combiner == MPI_COMBINER_HINDEXED_INTEGER) {
		DLOOP_Free(disps);
	    }

	    break;
	case MPI_COMBINER_STRUCT_INTEGER:
	case MPI_COMBINER_STRUCT:
	    for (i = 1; i < ints[0]; i++) {
		int type_combiner;
		MPIR_Type_get_envelope_impl(types[i], &dummy1, &dummy2, &dummy3, &type_combiner);

		if (type_combiner != MPI_COMBINER_NAMED) {
		    DLOOP_Handle_get_loopptr_macro(types[i], old_dlp, flag);
		    if (old_dlp == NULL)
		    {
			PREPEND_PREFIX(Dataloop_create)(types[i],
							&old_dlp,
							&old_dlsz,
							&old_dldepth,
							flag);
			
			DLOOP_Handle_set_loopptr_macro(types[i], old_dlp,
						       flag);
			DLOOP_Handle_set_loopsize_macro(types[i], old_dlsz,
							flag);
			DLOOP_Handle_set_loopdepth_macro(types[i], old_dldepth,
							 flag);
		    }
		}
	    }
	    if (combiner == MPI_COMBINER_STRUCT_INTEGER) {
		disps = (MPI_Aint *) DLOOP_Malloc(ints[0] * sizeof(MPI_Aint));

		for (i=0; i < ints[0]; i++) {
		    disps[i] = (MPI_Aint) ints[ints[0] + 1 + i];
		}
	    }
	    else {
		disps = aints;
	    }

            err = PREPEND_PREFIX(Dataloop_create_struct)(ints[0] /* count */,
                                                         &ints[1] /* blklens */,
                                                         disps,
                                                         types /* oldtype array */,
                                                         dlp_p, dlsz_p, dldepth_p,
                                                         flag);
            /* TODO if/when this function returns error codes, propagate this failure instead */
            DLOOP_Assert(0 == err);
            /* if (err) return err; */

	    if (combiner == MPI_COMBINER_STRUCT_INTEGER) {
		DLOOP_Free(disps);
	    }
	    break;
	case MPI_COMBINER_SUBARRAY:
	    ndims = ints[0];
	    PREPEND_PREFIX(Type_convert_subarray)(ndims,
						  &ints[1] /* sizes */,
						  &ints[1+ndims] /* subsizes */,
						  &ints[1+2*ndims] /* starts */,
						  ints[1+3*ndims] /* order */,
						  types[0],
						  &tmptype);

	    PREPEND_PREFIX(Dataloop_create)(tmptype,
					    dlp_p,
					    dlsz_p,
					    dldepth_p,
					    flag);
	    
	    MPIR_Type_free_impl(&tmptype);
	    break;
	case MPI_COMBINER_DARRAY:
	    ndims = ints[2];
	    PREPEND_PREFIX(Type_convert_darray)(ints[0] /* size */,
						ints[1] /* rank */,
						ndims,
						&ints[3] /* gsizes */,
						&ints[3+ndims] /*distribs */,
						&ints[3+2*ndims] /* dargs */,
						&ints[3+3*ndims] /* psizes */,
						ints[3+4*ndims] /* order */,
						types[0],
						&tmptype);

	    PREPEND_PREFIX(Dataloop_create)(tmptype,
					    dlp_p,
					    dlsz_p,
					    dldepth_p,
					    flag);

	    MPIR_Type_free_impl(&tmptype);
	    break;
	default:
	    DLOOP_Assert(0);
	    break;
    }

 clean_exit:

    PREPEND_PREFIX(Type_release_contents)(type, &ints, &aints, &types);

    /* for now we just leave the intermediate dataloops in place.
     * could remove them to save space if we wanted.
     */

    return;
}