示例#1
0
/*@
  Dataloop_create_struct - create the dataloop representation for a
  struct datatype

Input Parameters:
+ count - number of blocks in vector
. blklens - number of elements in each block
. disps - offsets of blocks from start of type in bytes
- oldtypes - types (using handle) of datatypes on which vector is based

Output Parameters:
+ dlp_p - pointer to address in which to place pointer to new dataloop
- dlsz_p - pointer to address in which to place size of new dataloop

  Return Value:
  0 on success, -1 on failure.

  Notes:
  This function relies on others, like Dataloop_create_indexed, to create
  types in some cases. This call (like all the rest) takes int blklens
  and MPI_Aint displacements, so it's possible to overflow when working
  with a particularly large struct type in some cases. This isn't detected
  or corrected in this code at this time.

@*/
int MPIR_Dataloop_create_struct(DLOOP_Count count,
					   const int *blklens,
					   const MPI_Aint *disps,
					   const DLOOP_Type *oldtypes,
					   DLOOP_Dataloop **dlp_p,
					   MPI_Aint *dlsz_p,
					   int *dldepth_p,
					   int flag)
{
    int err, i, nr_basics = 0, nr_derived = 0, type_pos = 0;

    DLOOP_Type first_basic = MPI_DATATYPE_NULL,
	first_derived = MPI_DATATYPE_NULL;

    /* variables used in general case only */
    int loop_idx, new_loop_depth;
    int old_loop_depth = 0;
    MPI_Aint new_loop_sz, old_loop_sz = 0;

    DLOOP_Dataloop *new_dlp, *curpos;

    /* if count is zero, handle with contig code, call it a int */
    if (count == 0)
    {
	err = MPIR_Dataloop_create_contiguous(0,
							 MPI_INT,
							 dlp_p,
							 dlsz_p,
							 dldepth_p,
							 flag);
	return err;
    }

    /* browse the old types and characterize */
    for (i=0; i < count; i++)
    {
	/* ignore type elements with a zero blklen */
	if (blklens[i] == 0) continue;

	if (oldtypes[i] != MPI_LB && oldtypes[i] != MPI_UB)
	{
	    int is_builtin;

	    is_builtin =
		(DLOOP_Handle_hasloop_macro(oldtypes[i])) ? 0 : 1;

	    if (is_builtin)
	    {
		if (nr_basics == 0)
		{
		    first_basic = oldtypes[i];
		    type_pos = i;
		}
		else if (oldtypes[i] != first_basic)
		{
		    first_basic = MPI_DATATYPE_NULL;
		}
		nr_basics++;
	    }
	    else /* derived type */
	    {
		if (nr_derived == 0)
		{
		    first_derived = oldtypes[i];
		    type_pos = i;
		}
		else if (oldtypes[i] != first_derived)
		{
		    first_derived = MPI_DATATYPE_NULL;
		}
		nr_derived++;
	    }
	}
    }

    /* note on optimizations:
     *
     * because LB, UB, and extent calculations are handled as part of
     * the Datatype, we can safely ignore them in all our calculations
     * here.
     */

    /* optimization:
     *
     * if there were only MPI_LBs and MPI_UBs in the struct type,
     * treat it as a zero-element contiguous (just as count == 0).
     */
    if (nr_basics == 0 && nr_derived == 0)
    {
	err = MPIR_Dataloop_create_contiguous(0,
							 MPI_INT,
							 dlp_p,
							 dlsz_p,
							 dldepth_p,
							 flag);
	return err;
    }

    /* optimization:
     *
     * if there is only one unique instance of a type in the struct, treat it
     * as a blockindexed type.
     *
     * notes:
     *
     * if the displacement happens to be zero, the blockindexed code will
     * optimize this into a contig.
     */
    if (nr_basics + nr_derived == 1)
    {
	/* type_pos is index to only real type in array */
	err = MPIR_Dataloop_create_blockindexed
	    (1, /* count */
	     blklens[type_pos],
	     &disps[type_pos],
	     1, /* displacement in bytes */
	     oldtypes[type_pos],
	     dlp_p,
	     dlsz_p,
	     dldepth_p,
	     flag);

	return err;
    }

    /* optimization:
     *
     * if there only one unique type (more than one instance) in the
     * struct, treat it as an indexed type.
     *
     * notes:
     *
     * this will apply to a single type with an LB/UB, as those
     * are handled elsewhere.
     *
     */
    if (((nr_derived == 0) && (first_basic != MPI_DATATYPE_NULL)) ||
	((nr_basics == 0) && (first_derived != MPI_DATATYPE_NULL)))
    {
	return DLOOP_Dataloop_create_unique_type_struct(count,
							blklens,
							disps,
							oldtypes,
							type_pos,
							dlp_p,
							dlsz_p,
							dldepth_p,
							flag);
    }

    /* optimization:
     *
     * if there are no derived types and caller indicated either a
     * homogeneous system or the "all bytes" conversion, convert
     * everything to bytes and use an indexed type.
     */
    if (nr_derived == 0 && ((flag == DLOOP_DATALOOP_HOMOGENEOUS) ||
			    (flag == DLOOP_DATALOOP_ALL_BYTES)))
    {
	return DLOOP_Dataloop_create_basic_all_bytes_struct(count,
							    blklens,
							    disps,
							    oldtypes,
							    dlp_p,
							    dlsz_p,
							    dldepth_p,
							    flag);
    }

    /* optimization:
     *
     * if caller asked for homogeneous or all bytes representation,
     * flatten the type and store it as an indexed type so that
     * there are no branches in the dataloop tree.
     */
    if ((flag == DLOOP_DATALOOP_HOMOGENEOUS) ||
	     (flag == DLOOP_DATALOOP_ALL_BYTES))
    {
	return DLOOP_Dataloop_create_flattened_struct(count,
						      blklens,
						      disps,
						      oldtypes,
						      dlp_p,
						      dlsz_p,
						      dldepth_p,
						      flag);
    }

    /* scan through types and gather derived type info */
    for (i=0; i < count; i++)
    {
	/* ignore type elements with a zero blklen */
	if (blklens[i] == 0) continue;

	if (DLOOP_Handle_hasloop_macro(oldtypes[i]))
	{
	    int tmp_loop_depth;
	    MPI_Aint tmp_loop_sz;

	    DLOOP_Handle_get_loopdepth_macro(oldtypes[i], tmp_loop_depth, flag);
	    DLOOP_Handle_get_loopsize_macro(oldtypes[i], tmp_loop_sz, flag);

	    if (tmp_loop_depth > old_loop_depth)
	    {
		old_loop_depth = tmp_loop_depth;
	    }
	    old_loop_sz += tmp_loop_sz;
	}
    }

    /* general case below: 2 or more distinct types that are either
     * basics or derived, and for which we want to preserve the types
     * themselves.
     */

    if (nr_basics > 0)
    {
	/* basics introduce an extra level of depth, so if our new depth
	 * must be at least 2 if there are basics.
	 */
	new_loop_depth = ((old_loop_depth+1) > 2) ? (old_loop_depth+1) : 2;
    }
    else
    {
	new_loop_depth = old_loop_depth + 1;
    }

    MPIR_Dataloop_struct_alloc((DLOOP_Count) nr_basics + nr_derived,
					  old_loop_sz,
					  nr_basics,
					  &curpos,
					  &new_dlp,
					  &new_loop_sz);
    /* --BEGIN ERROR HANDLING-- */
    if (!new_dlp)
    {
	return DLOOP_Dataloop_create_struct_memory_error();
    }
    /* --END ERROR HANDLING-- */


    new_dlp->kind = DLOOP_KIND_STRUCT;
    new_dlp->el_size = -1; /* not valid for struct */
    new_dlp->el_extent = -1; /* not valid for struct; see el_extent_array */
    new_dlp->el_type = MPI_DATATYPE_NULL; /* not valid for struct */

    new_dlp->loop_params.s_t.count = (DLOOP_Count) nr_basics + nr_derived;

    /* note: curpos points to first byte in "old dataloop" region of
     * newly allocated space.
     */

    for (i=0, loop_idx = 0; i < count; i++)
    {
	int is_builtin;

	/* ignore type elements with a zero blklen */
	if (blklens[i] == 0) continue;

	is_builtin = (DLOOP_Handle_hasloop_macro(oldtypes[i])) ? 0 : 1;

	if (is_builtin)
	{
	    DLOOP_Dataloop *dummy_dlp;
	    int dummy_depth;
	    MPI_Aint dummy_sz;

	    /* LBs and UBs already taken care of -- skip them */
	    if (oldtypes[i] == MPI_LB || oldtypes[i] == MPI_UB)
	    {
		continue;
	    }

	    /* build a contig dataloop for this basic and point to that
	     *
	     * optimization:
	     *
	     * push the count (blklen) from the struct down into the
	     * contig so we can process more at the leaf.
	     */
	    err = MPIR_Dataloop_create_contiguous(blklens[i],
							     oldtypes[i],
							     &dummy_dlp,
							     &dummy_sz,
							     &dummy_depth,
							     flag);

	    /* --BEGIN ERROR HANDLING-- */
	    if (err) {
		/* TODO: FREE ALLOCATED RESOURCES */
		return -1;
	    }
	    /* --END ERROR HANDLING-- */

	    /* copy the new contig loop into place in the struct memory
	     * region
	     */
	    MPIR_Dataloop_copy(curpos, dummy_dlp, dummy_sz);
	    new_dlp->loop_params.s_t.dataloop_array[loop_idx] = curpos;
	    curpos = (DLOOP_Dataloop *) ((char *) curpos + dummy_sz);

	    /* we stored the block size in the contig -- use 1 here */
	    new_dlp->loop_params.s_t.blocksize_array[loop_idx] = 1;
	    new_dlp->loop_params.s_t.el_extent_array[loop_idx] =
		((DLOOP_Offset) blklens[i]) * dummy_dlp->el_extent;
	    MPIR_Dataloop_free(&dummy_dlp);
	}
	else
	{
	    DLOOP_Dataloop *old_loop_ptr;
	    DLOOP_Offset old_extent;

	    DLOOP_Handle_get_loopptr_macro(oldtypes[i], old_loop_ptr, flag);
	    DLOOP_Handle_get_loopsize_macro(oldtypes[i], old_loop_sz, flag);
	    DLOOP_Handle_get_extent_macro(oldtypes[i], old_extent);

	    MPIR_Dataloop_copy(curpos, old_loop_ptr, old_loop_sz);
	    new_dlp->loop_params.s_t.dataloop_array[loop_idx] = curpos;
	    curpos = (DLOOP_Dataloop *) ((char *) curpos + old_loop_sz);

	    new_dlp->loop_params.s_t.blocksize_array[loop_idx] =
		(DLOOP_Count) blklens[i];
	    new_dlp->loop_params.s_t.el_extent_array[loop_idx] =
		old_extent;
	}
	new_dlp->loop_params.s_t.offset_array[loop_idx] =
	    (DLOOP_Offset) disps[i];
	loop_idx++;
    }

    *dlp_p     = new_dlp;
    *dlsz_p    = new_loop_sz;
    *dldepth_p = new_loop_depth;

    return 0;
}
/*@
   Dataloop_create_blockindexed - create blockindexed dataloop

   Arguments:
+  DLOOP_Count count
.  void *displacement_array (array of either MPI_Aints or ints)
.  int displacement_in_bytes (boolean)
.  MPI_Datatype old_type
.  DLOOP_Dataloop **output_dataloop_ptr
.  int output_dataloop_size
.  int output_dataloop_depth
-  int flag

.N Errors
.N Returns 0 on success, -1 on failure.
@*/
int MPIR_Dataloop_create_blockindexed(DLOOP_Count icount,
                                      DLOOP_Count iblklen,
                                      const void *disp_array,
                                      int dispinbytes,
                                      DLOOP_Type oldtype,
                                      DLOOP_Dataloop ** dlp_p,
                                      DLOOP_Size * dlsz_p, int *dldepth_p, int flag)
{
    int err, is_builtin, is_vectorizable = 1;
    int i, old_loop_depth;
    DLOOP_Size new_loop_sz;

    DLOOP_Count contig_count, count, blklen;
    DLOOP_Offset old_extent, eff_disp0, eff_disp1, last_stride;
    DLOOP_Dataloop *new_dlp;

    count = (DLOOP_Count) icount;       /* avoid subsequent casting */
    blklen = (DLOOP_Count) iblklen;

    /* if count or blklen are zero, handle with contig code, call it a int */
    if (count == 0 || blklen == 0) {
        err = MPIR_Dataloop_create_contiguous(0, MPI_INT, dlp_p, dlsz_p, dldepth_p, flag);
        return err;
    }

    is_builtin = (DLOOP_Handle_hasloop_macro(oldtype)) ? 0 : 1;

    if (is_builtin) {
        DLOOP_Handle_get_size_macro(oldtype, old_extent);
        old_loop_depth = 0;
    } else {
        DLOOP_Handle_get_extent_macro(oldtype, old_extent);
        DLOOP_Handle_get_loopdepth_macro(oldtype, old_loop_depth);
    }

    contig_count = MPIR_Type_blockindexed_count_contig(count,
                                                       blklen, disp_array, dispinbytes, old_extent);

    /* optimization:
     *
     * if contig_count == 1 and block starts at displacement 0,
     * store it as a contiguous rather than a blockindexed dataloop.
     */
    if ((contig_count == 1) &&
        ((!dispinbytes && ((int *) disp_array)[0] == 0) ||
         (dispinbytes && ((MPI_Aint *) disp_array)[0] == 0))) {
        err = MPIR_Dataloop_create_contiguous(icount * iblklen,
                                              oldtype, dlp_p, dlsz_p, dldepth_p, flag);
        return err;
    }

    /* optimization:
     *
     * if contig_count == 1 store it as a blockindexed with one
     * element rather than as a lot of individual blocks.
     */
    if (contig_count == 1) {
        /* adjust count and blklen and drop through */
        blklen *= count;
        count = 1;
        iblklen *= icount;
        icount = 1;
    }

    /* optimization:
     *
     * if displacements start at zero and result in a fixed stride,
     * store it as a vector rather than a blockindexed dataloop.
     */
    eff_disp0 = (dispinbytes) ? ((DLOOP_Offset) ((MPI_Aint *) disp_array)[0]) :
        (((DLOOP_Offset) ((int *) disp_array)[0]) * old_extent);

    if (count > 1 && eff_disp0 == (DLOOP_Offset) 0) {
        eff_disp1 = (dispinbytes) ?
            ((DLOOP_Offset) ((MPI_Aint *) disp_array)[1]) :
            (((DLOOP_Offset) ((int *) disp_array)[1]) * old_extent);
        last_stride = eff_disp1 - eff_disp0;

        for (i = 2; i < count; i++) {
            eff_disp0 = eff_disp1;
            eff_disp1 = (dispinbytes) ?
                ((DLOOP_Offset) ((MPI_Aint *) disp_array)[i]) :
                (((DLOOP_Offset) ((int *) disp_array)[i]) * old_extent);
            if (eff_disp1 - eff_disp0 != last_stride) {
                is_vectorizable = 0;
                break;
            }
        }
        if (is_vectorizable) {
            err = MPIR_Dataloop_create_vector(count, blklen, last_stride, 1,    /* strideinbytes */
                                              oldtype, dlp_p, dlsz_p, dldepth_p, flag);
            return err;
        }
    }

    /* TODO: optimization:
     *
     * if displacements result in a fixed stride, but first displacement
     * is not zero, store it as a blockindexed (blklen == 1) of a vector.
     */

    /* TODO: optimization:
     *
     * if a blockindexed of a contig, absorb the contig into the blocklen
     * parameter and keep the same overall depth
     */

    /* otherwise storing as a blockindexed dataloop */

    /* Q: HOW CAN WE TELL IF IT IS WORTH IT TO STORE AS AN
     * INDEXED WITH FEWER CONTIG BLOCKS (IF CONTIG_COUNT IS SMALL)?
     */

    if (is_builtin) {
        MPIR_Dataloop_alloc(DLOOP_KIND_BLOCKINDEXED, count, &new_dlp, &new_loop_sz);
        /* --BEGIN ERROR HANDLING-- */
        if (!new_dlp)
            return -1;
        /* --END ERROR HANDLING-- */

        new_dlp->kind = DLOOP_KIND_BLOCKINDEXED | DLOOP_FINAL_MASK;

        if (flag == DLOOP_DATALOOP_ALL_BYTES) {
            blklen *= old_extent;
            new_dlp->el_size = 1;
            new_dlp->el_extent = 1;
            new_dlp->el_type = MPI_BYTE;
        } else {
            new_dlp->el_size = old_extent;
            new_dlp->el_extent = old_extent;
            new_dlp->el_type = oldtype;
        }
    } else {
        DLOOP_Dataloop *old_loop_ptr = NULL;
        MPI_Aint old_loop_sz = 0;

        DLOOP_Handle_get_loopptr_macro(oldtype, old_loop_ptr);
        DLOOP_Handle_get_loopsize_macro(oldtype, old_loop_sz);

        MPIR_Dataloop_alloc_and_copy(DLOOP_KIND_BLOCKINDEXED,
                                     count, old_loop_ptr, old_loop_sz, &new_dlp, &new_loop_sz);
        /* --BEGIN ERROR HANDLING-- */
        if (!new_dlp)
            return -1;
        /* --END ERROR HANDLING-- */

        new_dlp->kind = DLOOP_KIND_BLOCKINDEXED;

        DLOOP_Handle_get_size_macro(oldtype, new_dlp->el_size);
        DLOOP_Handle_get_extent_macro(oldtype, new_dlp->el_extent);
        DLOOP_Handle_get_basic_type_macro(oldtype, new_dlp->el_type);
    }

    new_dlp->loop_params.bi_t.count = count;
    new_dlp->loop_params.bi_t.blocksize = blklen;

    /* copy in displacement parameters
     *
     * regardless of dispinbytes, we store displacements in bytes in loop.
     */
    DLOOP_Type_blockindexed_array_copy(count,
                                       disp_array,
                                       new_dlp->loop_params.bi_t.offset_array,
                                       dispinbytes, old_extent);

    *dlp_p = new_dlp;
    *dlsz_p = new_loop_sz;
    *dldepth_p = old_loop_depth + 1;

    return 0;
}
示例#3
0
static int DLOOP_Dataloop_create_flattened_struct(DLOOP_Count count,
						  const int *blklens,
						  const MPI_Aint *disps,
						  const DLOOP_Type *oldtypes,
						  DLOOP_Dataloop **dlp_p,
						  MPI_Aint *dlsz_p,
						  int *dldepth_p,
						  int flag)
{
    /* arbitrary types, convert to bytes and use indexed */
    int i, err, nr_blks = 0;
    DLOOP_Size *tmp_blklens;
    MPI_Aint *tmp_disps; /* since we're calling another fn that takes
			    this type as an input parameter */
    DLOOP_Offset bytes;
    DLOOP_Segment *segp;

    int first_ind;
    DLOOP_Size last_ind;

    segp = MPIR_Segment_alloc();
    /* --BEGIN ERROR HANDLING-- */
    if (!segp) {
	return DLOOP_Dataloop_create_struct_memory_error();
    }
    /* --END ERROR HANDLING-- */

    /* use segment code once to count contiguous regions */
    for (i=0; i < count; i++)
    {
	int is_basic;

	/* ignore type elements with a zero blklen */
	if (blklens[i] == 0) continue;

	is_basic = (DLOOP_Handle_hasloop_macro(oldtypes[i])) ? 0 : 1;

	if (is_basic && (oldtypes[i] != MPI_LB &&
			 oldtypes[i] != MPI_UB))
	{
	    nr_blks++;
	}
        else /* derived type; get a count of contig blocks */
        {
            DLOOP_Count tmp_nr_blks, sz;

            DLOOP_Handle_get_size_macro(oldtypes[i], sz);

            /* if the derived type has some data to contribute,
             * add to flattened representation */
            if (sz > 0) {
                err = MPIR_Segment_init(NULL,
                                                   (DLOOP_Count) blklens[i],
                                                   oldtypes[i],
                                                   segp,
                                                   flag);
                if (err) return err;

                bytes = SEGMENT_IGNORE_LAST;

                MPIR_Segment_count_contig_blocks(segp,
                                                            0,
                                                            &bytes,
                                                            &tmp_nr_blks);

                nr_blks += tmp_nr_blks;
            }
        }
    }

    /* it's possible for us to get to this point only to realize that
     * there isn't any data in this type. in that case do what we always
     * do: store a simple contig of zero ints and call it done.
     */
    if (nr_blks == 0) {
	MPIR_Segment_free(segp);
	err = MPIR_Dataloop_create_contiguous(0,
							 MPI_INT,
							 dlp_p,
							 dlsz_p,
							 dldepth_p,
							 flag);
	return err;

    }

    nr_blks += 2; /* safety measure */

    tmp_blklens = (DLOOP_Size *) DLOOP_Malloc(nr_blks * sizeof(DLOOP_Size), MPL_MEM_DATATYPE);
    /* --BEGIN ERROR HANDLING-- */
    if (!tmp_blklens) {
	MPIR_Segment_free(segp);
	return DLOOP_Dataloop_create_struct_memory_error();
    }
    /* --END ERROR HANDLING-- */


    tmp_disps = (MPI_Aint *) DLOOP_Malloc(nr_blks * sizeof(MPI_Aint), MPL_MEM_DATATYPE);
    /* --BEGIN ERROR HANDLING-- */
    if (!tmp_disps) {
	DLOOP_Free(tmp_blklens);
	MPIR_Segment_free(segp);
	return DLOOP_Dataloop_create_struct_memory_error();
    }
    /* --END ERROR HANDLING-- */

    /* use segment code again to flatten the type */
    first_ind = 0;
    for (i=0; i < count; i++)
    {
	int is_basic;
	DLOOP_Count sz = -1;

	is_basic = (DLOOP_Handle_hasloop_macro(oldtypes[i])) ? 0 : 1;
	if (!is_basic) DLOOP_Handle_get_size_macro(oldtypes[i], sz);

	/* we're going to use the segment code to flatten the type.
	 * we put in our displacement as the buffer location, and use
	 * the blocklength as the count value to get N contiguous copies
	 * of the type.
	 *
	 * Note that we're going to get back values in bytes, so that will
	 * be our new element type.
	 */
	if (oldtypes[i] != MPI_UB &&
	    oldtypes[i] != MPI_LB &&
	    blklens[i] != 0 &&
	    (is_basic || sz > 0))
	{
	    err = MPIR_Segment_init((char *) DLOOP_OFFSET_CAST_TO_VOID_PTR disps[i],
					 (DLOOP_Count) blklens[i],
					 oldtypes[i],
					 segp,
					 0 /* homogeneous */);
            if (err) return err;

	    last_ind = nr_blks - first_ind;
	    bytes = SEGMENT_IGNORE_LAST;
	    MPIR_Segment_mpi_flatten(segp,
						0,
						&bytes,
						&tmp_blklens[first_ind],
						&tmp_disps[first_ind],
						&last_ind);
            if (err) return err;
	    first_ind += last_ind;
	}
    }
    nr_blks = first_ind;

#if 0
    if (MPL_DBG_SELECTED(MPIR_DBG_DATATYPE,VERBOSE)) {
	MPL_DBG_OUT(MPIR_DBG_DATATYPE,"--- start of flattened type ---");
        for (i=0; i < nr_blks; i++) {
	MPL_DBG_OUT_FMT(MPIR_DBG_DATATYPE,(MPL_DBG_FDEST,
				   "a[%d] = (%d, " DLOOP_OFFSET_FMT_DEC_SPEC ")", i,
				   tmp_blklens[i], tmp_disps[i]));
	}
	MPL_DBG_OUT(MPIR_DBG_DATATYPE,"--- end of flattened type ---");
    }
#endif

    MPIR_Segment_free(segp);

    err = MPIR_Dataloop_create_indexed(nr_blks,
						  tmp_blklens,
						  tmp_disps,
						  1, /* disp in bytes */
						  MPI_BYTE,
						  dlp_p,
						  dlsz_p,
						  dldepth_p,
						  flag);

    DLOOP_Free(tmp_blklens);
    DLOOP_Free(tmp_disps);

    return err;
}