/* MPID_Leaf_contig_count_block
 *
 * Note: because bufp is just an offset, we can ignore it in our
 *       calculations of # of contig regions.
 */
static int DLOOP_Leaf_contig_count_block(DLOOP_Offset *blocks_p,
					 DLOOP_Type el_type,
					 DLOOP_Offset rel_off,
					 DLOOP_Buffer bufp ATTRIBUTE((unused)),
					 void *v_paramp)
{
    DLOOP_Offset size, el_size;
    struct PREPEND_PREFIX(contig_blocks_params) *paramp = v_paramp;

    DLOOP_Assert(*blocks_p > 0);

    DLOOP_Handle_get_size_macro(el_type, el_size);
    size = *blocks_p * el_size;

#ifdef MPID_SP_VERBOSE
    MPIU_dbg_printf("contig count block: count = %d, buf+off = %d, lastloc = " DLOOP_OFFSET_FMT_DEC_SPEC "\n",
		    (int) paramp->count,
		    (int) ((char *) bufp + rel_off),
		    paramp->last_loc);
#endif

    if (paramp->count > 0 && rel_off == paramp->last_loc)
    {
	/* this region is adjacent to the last */
	paramp->last_loc += size;
    }
    else {
	/* new region */
	paramp->last_loc = rel_off + size;
	paramp->count++;
    }
    return 0;
}
Exemple #2
0
/* DLOOP_Leaf_contig_mpi_flatten
 *
 */
static int DLOOP_Leaf_contig_mpi_flatten(DLOOP_Offset *blocks_p,
					 DLOOP_Type el_type,
					 DLOOP_Offset rel_off,
					 void *bufp,
					 void *v_paramp)
{
    int last_idx;
    DLOOP_Offset size;
    DLOOP_Offset el_size;
    char *last_end = NULL;
    struct PREPEND_PREFIX(mpi_flatten_params) *paramp = v_paramp;

    DLOOP_Handle_get_size_macro(el_type, el_size);
    size = *blocks_p * el_size;

    last_idx = paramp->index - 1;
    if (last_idx >= 0) {
	/* Since disps can be negative, we cannot use
	 * DLOOP_Ensure_Offset_fits_in_pointer to verify that disps +
	 * blklens fits in a pointer.  Just let it truncate, if the
	 * sizeof a pointer is less than the sizeof an MPI_Aint.
	 */
	last_end = (char*) DLOOP_OFFSET_CAST_TO_VOID_PTR
	           (paramp->disps[last_idx] + ((DLOOP_Offset) paramp->blklens[last_idx]));
    }

    /* Since bufp can be a displacement and can be negative, we cannot
     * use DLOOP_Ensure_Offset_fits_in_pointer to ensure the sum fits in
     * a pointer.  Just let it truncate.
     */
    if ((last_idx == paramp->length-1) &&
        (last_end != ((char *) bufp + rel_off)))
    {
	/* we have used up all our entries, and this region doesn't fit on
	 * the end of the last one.  setting blocks to 0 tells manipulation
	 * function that we are done (and that we didn't process any blocks).
	 */
	*blocks_p = 0;
	return 1;
    }
    else if (last_idx >= 0 && (last_end == ((char *) bufp + rel_off)))
    {
	/* add this size to the last vector rather than using up another one */
	paramp->blklens[last_idx] += size;
    }
    else {
	/* Since bufp can be a displacement and can be negative, we cannot use
	 * DLOOP_VOID_PTR_CAST_TO_OFFSET to cast the sum to a pointer.  Just let it
	 * sign extend.
	 */
        paramp->disps[last_idx+1]   = DLOOP_PTR_DISP_CAST_TO_OFFSET bufp + rel_off;
	paramp->blklens[last_idx+1] = size;
	paramp->index++;
    }
    return 0;
}
/*@
   Dataloop_contiguous - create the dataloop representation for a
   contiguous datatype

   Input Parameters:
+  int icount,
.  DLOOP_Type oldtype
-  int flag

   Output Parameters:
+  DLOOP_Dataloop **dlp_p,
.  DLOOP_Size *dlsz_p,
-  int *dldepth_p,


.N Errors
.N Returns 0 on success, -1 on failure.
@*/
int PREPEND_PREFIX(Dataloop_create_contiguous)(DLOOP_Count icount,
					       DLOOP_Type oldtype,
					       DLOOP_Dataloop **dlp_p,
					       DLOOP_Size *dlsz_p,
					       int *dldepth_p,
					       int flag)
{
    DLOOP_Count count;
    int is_builtin, apply_contig_coalescing = 0;
    int new_loop_depth;
    DLOOP_Size new_loop_sz;

    DLOOP_Dataloop *new_dlp;

    count = icount;

    is_builtin = (DLOOP_Handle_hasloop_macro(oldtype)) ? 0 : 1;

    if (is_builtin)
    {
	new_loop_depth = 1;
    }
    else
    {
	int old_loop_depth = 0;
	DLOOP_Offset old_size = 0, old_extent = 0;
	DLOOP_Dataloop *old_loop_ptr;

	DLOOP_Handle_get_loopdepth_macro(oldtype, old_loop_depth, flag);
	DLOOP_Handle_get_loopptr_macro(oldtype, old_loop_ptr, flag);
	DLOOP_Handle_get_size_macro(oldtype, old_size);
	DLOOP_Handle_get_extent_macro(oldtype, old_extent);

	/* if we have a simple combination of contigs, coalesce */
	if (((old_loop_ptr->kind & DLOOP_KIND_MASK) == DLOOP_KIND_CONTIG)
	    && (old_size == old_extent))
	{
	    /* will just copy contig and multiply count */
	    apply_contig_coalescing = 1;
	    new_loop_depth          = old_loop_depth;
	}
	else
	{
	    new_loop_depth = old_loop_depth + 1;
	}
    }

    if (is_builtin)
    {
	DLOOP_Offset basic_sz = 0;

	PREPEND_PREFIX(Dataloop_alloc)(DLOOP_KIND_CONTIG,
				       count,
				       &new_dlp,
				       &new_loop_sz);
	/* --BEGIN ERROR HANDLING-- */
	if (!new_dlp) return -1;
	/* --END ERROR HANDLING-- */

	DLOOP_Handle_get_size_macro(oldtype, basic_sz);
	new_dlp->kind = DLOOP_KIND_CONTIG | DLOOP_FINAL_MASK;

	if (flag == DLOOP_DATALOOP_ALL_BYTES)
	{
	    count             *= basic_sz;
	    new_dlp->el_size   = 1;
	    new_dlp->el_extent = 1;
	    new_dlp->el_type   = MPI_BYTE;
	}
	else
	{
	    new_dlp->el_size   = basic_sz;
	    new_dlp->el_extent = new_dlp->el_size;
	    new_dlp->el_type   = oldtype;
	}

	new_dlp->loop_params.c_t.count = count;
    }
    else
    {
	/* user-defined base type (oldtype) */
	DLOOP_Dataloop *old_loop_ptr;
	MPI_Aint old_loop_sz = 0;

	DLOOP_Handle_get_loopptr_macro(oldtype, old_loop_ptr, flag);
	DLOOP_Handle_get_loopsize_macro(oldtype, old_loop_sz, flag);

	if (apply_contig_coalescing)
	{
	    /* make a copy of the old loop and multiply the count */
	    PREPEND_PREFIX(Dataloop_dup)(old_loop_ptr,
					 old_loop_sz,
					 &new_dlp);
	    /* --BEGIN ERROR HANDLING-- */
	    if (!new_dlp) return -1;
	    /* --END ERROR HANDLING-- */

	    new_dlp->loop_params.c_t.count *= count;

	    new_loop_sz = old_loop_sz;
	    DLOOP_Handle_get_loopdepth_macro(oldtype, new_loop_depth, flag);
	}
	else
	{
	    /* allocate space for new loop including copy of old */
	    PREPEND_PREFIX(Dataloop_alloc_and_copy)(DLOOP_KIND_CONTIG,
						    count,
						    old_loop_ptr,
						    old_loop_sz,
						    &new_dlp,
						    &new_loop_sz);
	    /* --BEGIN ERROR HANDLING-- */
	    if (!new_dlp) return -1;
	    /* --END ERROR HANDLING-- */

	    new_dlp->kind = DLOOP_KIND_CONTIG;
	    DLOOP_Handle_get_size_macro(oldtype, new_dlp->el_size);
	    DLOOP_Handle_get_extent_macro(oldtype, new_dlp->el_extent);
	    DLOOP_Handle_get_basic_type_macro(oldtype, new_dlp->el_type);
	    
	    new_dlp->loop_params.c_t.count = count;
	}
    }

    *dlp_p     = new_dlp;
    *dlsz_p    = new_loop_sz;
    *dldepth_p = new_loop_depth;

    return 0;
}
/*@
   Dataloop_create_blockindexed - create blockindexed dataloop

   Arguments:
+  DLOOP_Count count
.  void *displacement_array (array of either MPI_Aints or ints)
.  int displacement_in_bytes (boolean)
.  MPI_Datatype old_type
.  DLOOP_Dataloop **output_dataloop_ptr
.  int output_dataloop_size
.  int output_dataloop_depth
-  int flag

.N Errors
.N Returns 0 on success, -1 on failure.
@*/
int MPIR_Dataloop_create_blockindexed(DLOOP_Count icount,
                                      DLOOP_Count iblklen,
                                      const void *disp_array,
                                      int dispinbytes,
                                      DLOOP_Type oldtype,
                                      DLOOP_Dataloop ** dlp_p,
                                      DLOOP_Size * dlsz_p, int *dldepth_p, int flag)
{
    int err, is_builtin, is_vectorizable = 1;
    int i, old_loop_depth;
    DLOOP_Size new_loop_sz;

    DLOOP_Count contig_count, count, blklen;
    DLOOP_Offset old_extent, eff_disp0, eff_disp1, last_stride;
    DLOOP_Dataloop *new_dlp;

    count = (DLOOP_Count) icount;       /* avoid subsequent casting */
    blklen = (DLOOP_Count) iblklen;

    /* if count or blklen are zero, handle with contig code, call it a int */
    if (count == 0 || blklen == 0) {
        err = MPIR_Dataloop_create_contiguous(0, MPI_INT, dlp_p, dlsz_p, dldepth_p, flag);
        return err;
    }

    is_builtin = (DLOOP_Handle_hasloop_macro(oldtype)) ? 0 : 1;

    if (is_builtin) {
        DLOOP_Handle_get_size_macro(oldtype, old_extent);
        old_loop_depth = 0;
    } else {
        DLOOP_Handle_get_extent_macro(oldtype, old_extent);
        DLOOP_Handle_get_loopdepth_macro(oldtype, old_loop_depth);
    }

    contig_count = MPIR_Type_blockindexed_count_contig(count,
                                                       blklen, disp_array, dispinbytes, old_extent);

    /* optimization:
     *
     * if contig_count == 1 and block starts at displacement 0,
     * store it as a contiguous rather than a blockindexed dataloop.
     */
    if ((contig_count == 1) &&
        ((!dispinbytes && ((int *) disp_array)[0] == 0) ||
         (dispinbytes && ((MPI_Aint *) disp_array)[0] == 0))) {
        err = MPIR_Dataloop_create_contiguous(icount * iblklen,
                                              oldtype, dlp_p, dlsz_p, dldepth_p, flag);
        return err;
    }

    /* optimization:
     *
     * if contig_count == 1 store it as a blockindexed with one
     * element rather than as a lot of individual blocks.
     */
    if (contig_count == 1) {
        /* adjust count and blklen and drop through */
        blklen *= count;
        count = 1;
        iblklen *= icount;
        icount = 1;
    }

    /* optimization:
     *
     * if displacements start at zero and result in a fixed stride,
     * store it as a vector rather than a blockindexed dataloop.
     */
    eff_disp0 = (dispinbytes) ? ((DLOOP_Offset) ((MPI_Aint *) disp_array)[0]) :
        (((DLOOP_Offset) ((int *) disp_array)[0]) * old_extent);

    if (count > 1 && eff_disp0 == (DLOOP_Offset) 0) {
        eff_disp1 = (dispinbytes) ?
            ((DLOOP_Offset) ((MPI_Aint *) disp_array)[1]) :
            (((DLOOP_Offset) ((int *) disp_array)[1]) * old_extent);
        last_stride = eff_disp1 - eff_disp0;

        for (i = 2; i < count; i++) {
            eff_disp0 = eff_disp1;
            eff_disp1 = (dispinbytes) ?
                ((DLOOP_Offset) ((MPI_Aint *) disp_array)[i]) :
                (((DLOOP_Offset) ((int *) disp_array)[i]) * old_extent);
            if (eff_disp1 - eff_disp0 != last_stride) {
                is_vectorizable = 0;
                break;
            }
        }
        if (is_vectorizable) {
            err = MPIR_Dataloop_create_vector(count, blklen, last_stride, 1,    /* strideinbytes */
                                              oldtype, dlp_p, dlsz_p, dldepth_p, flag);
            return err;
        }
    }

    /* TODO: optimization:
     *
     * if displacements result in a fixed stride, but first displacement
     * is not zero, store it as a blockindexed (blklen == 1) of a vector.
     */

    /* TODO: optimization:
     *
     * if a blockindexed of a contig, absorb the contig into the blocklen
     * parameter and keep the same overall depth
     */

    /* otherwise storing as a blockindexed dataloop */

    /* Q: HOW CAN WE TELL IF IT IS WORTH IT TO STORE AS AN
     * INDEXED WITH FEWER CONTIG BLOCKS (IF CONTIG_COUNT IS SMALL)?
     */

    if (is_builtin) {
        MPIR_Dataloop_alloc(DLOOP_KIND_BLOCKINDEXED, count, &new_dlp, &new_loop_sz);
        /* --BEGIN ERROR HANDLING-- */
        if (!new_dlp)
            return -1;
        /* --END ERROR HANDLING-- */

        new_dlp->kind = DLOOP_KIND_BLOCKINDEXED | DLOOP_FINAL_MASK;

        if (flag == DLOOP_DATALOOP_ALL_BYTES) {
            blklen *= old_extent;
            new_dlp->el_size = 1;
            new_dlp->el_extent = 1;
            new_dlp->el_type = MPI_BYTE;
        } else {
            new_dlp->el_size = old_extent;
            new_dlp->el_extent = old_extent;
            new_dlp->el_type = oldtype;
        }
    } else {
        DLOOP_Dataloop *old_loop_ptr = NULL;
        MPI_Aint old_loop_sz = 0;

        DLOOP_Handle_get_loopptr_macro(oldtype, old_loop_ptr);
        DLOOP_Handle_get_loopsize_macro(oldtype, old_loop_sz);

        MPIR_Dataloop_alloc_and_copy(DLOOP_KIND_BLOCKINDEXED,
                                     count, old_loop_ptr, old_loop_sz, &new_dlp, &new_loop_sz);
        /* --BEGIN ERROR HANDLING-- */
        if (!new_dlp)
            return -1;
        /* --END ERROR HANDLING-- */

        new_dlp->kind = DLOOP_KIND_BLOCKINDEXED;

        DLOOP_Handle_get_size_macro(oldtype, new_dlp->el_size);
        DLOOP_Handle_get_extent_macro(oldtype, new_dlp->el_extent);
        DLOOP_Handle_get_basic_type_macro(oldtype, new_dlp->el_type);
    }

    new_dlp->loop_params.bi_t.count = count;
    new_dlp->loop_params.bi_t.blocksize = blklen;

    /* copy in displacement parameters
     *
     * regardless of dispinbytes, we store displacements in bytes in loop.
     */
    DLOOP_Type_blockindexed_array_copy(count,
                                       disp_array,
                                       new_dlp->loop_params.bi_t.offset_array,
                                       dispinbytes, old_extent);

    *dlp_p = new_dlp;
    *dlsz_p = new_loop_sz;
    *dldepth_p = old_loop_depth + 1;

    return 0;
}
int PREPEND_PREFIX(Dataloop_create_indexed)(DLOOP_Count icount,
					    const DLOOP_Size *blocklength_array,
					    const void *displacement_array,
					    int dispinbytes,
					    MPI_Datatype oldtype,
					    DLOOP_Dataloop **dlp_p,
					    DLOOP_Size *dlsz_p,
					    int *dldepth_p,
					    int flag)
{
    int err, is_builtin;
    int old_loop_depth;
    MPI_Aint i;
    DLOOP_Size new_loop_sz, blksz;
    DLOOP_Count first;

    DLOOP_Count old_type_count = 0, contig_count, count;
    DLOOP_Offset old_extent;
    struct DLOOP_Dataloop *new_dlp;

    count = (DLOOP_Count) icount; /* avoid subsequent casting */


    /* if count is zero, handle with contig code, call it an int */
    if (count == 0)
    {
	err = PREPEND_PREFIX(Dataloop_create_contiguous)(0,
							 MPI_INT,
							 dlp_p,
							 dlsz_p,
							 dldepth_p,
							 flag);
	return err;
    }

    /* Skip any initial zero-length blocks */
    for (first = 0; first < count; first++)
        if ((DLOOP_Count) blocklength_array[first])
            break;
    

    is_builtin = (DLOOP_Handle_hasloop_macro(oldtype)) ? 0 : 1;

    if (is_builtin)
    {
	DLOOP_Handle_get_extent_macro(oldtype, old_extent);
	old_loop_depth = 0;
    }
    else
    {
	DLOOP_Handle_get_extent_macro(oldtype, old_extent);
	DLOOP_Handle_get_loopdepth_macro(oldtype, old_loop_depth, flag);
    }

    for (i=first; i < count; i++)
    {
	old_type_count += (DLOOP_Count) blocklength_array[i];
    }

    contig_count = PREPEND_PREFIX(Type_indexed_count_contig)(count,
                                                             blocklength_array,
                                                             displacement_array,
                                                             dispinbytes,
                                                             old_extent);

    /* if contig_count is zero (no data), handle with contig code */
    if (contig_count == 0)
    {
	err = PREPEND_PREFIX(Dataloop_create_contiguous)(0,
							 MPI_INT,
							 dlp_p,
							 dlsz_p,
							 dldepth_p,
							 flag);
	return err;
    }

    /* optimization:
     *
     * if contig_count == 1 and block starts at displacement 0,
     * store it as a contiguous rather than an indexed dataloop.
     */    
    if ((contig_count == 1) &&
	((!dispinbytes && ((int *) displacement_array)[first] == 0) ||
	 (dispinbytes && ((MPI_Aint *) displacement_array)[first] == 0)))
    {
	err = PREPEND_PREFIX(Dataloop_create_contiguous)(old_type_count,
							 oldtype,
							 dlp_p,
							 dlsz_p,
							 dldepth_p,
							 flag);
	return err;
    }

    /* optimization:
     *
     * if contig_count == 1 (and displacement != 0), store this as
     * a single element blockindexed rather than a lot of individual
     * blocks.
     */
    if (contig_count == 1)
    {
        const void *disp_arr_tmp; /* no ternary assignment to avoid clang warnings */
        if (dispinbytes)
            disp_arr_tmp = &(((const MPI_Aint *)displacement_array)[first]);
        else
            disp_arr_tmp = &(((const int *)displacement_array)[first]);
	err = PREPEND_PREFIX(Dataloop_create_blockindexed)(1,
							   old_type_count,
							   disp_arr_tmp,
							   dispinbytes,
							   oldtype,
							   dlp_p,
							   dlsz_p,
							   dldepth_p,
							   flag);

	return err;
    }

    /* optimization:
     *
     * if block length is the same for all blocks, store it as a
     * blockindexed rather than an indexed dataloop.
     */
    blksz = blocklength_array[first];
    for (i = first+1; i < count; i++)
    {
	if (blocklength_array[i] != blksz)
	{
	    blksz--;
	    break;
	}
    }
    if (blksz == blocklength_array[first])
    {
        const void *disp_arr_tmp; /* no ternary assignment to avoid clang warnings */
        if (dispinbytes)
            disp_arr_tmp = &(((const MPI_Aint *)displacement_array)[first]);
        else
            disp_arr_tmp = &(((const int *)displacement_array)[first]);
	err = PREPEND_PREFIX(Dataloop_create_blockindexed)(icount-first,
							   blksz,
							   disp_arr_tmp,
							   dispinbytes,
							   oldtype,
							   dlp_p,
							   dlsz_p,
							   dldepth_p,
							   flag);

	return err;
    }

    /* note: blockindexed looks for the vector optimization */

    /* TODO: optimization:
     *
     * if an indexed of a contig, absorb the contig into the blocklen array
     * and keep the same overall depth
     */

    /* otherwise storing as an indexed dataloop */

    if (is_builtin)
    {
	PREPEND_PREFIX(Dataloop_alloc)(DLOOP_KIND_INDEXED,
				       count,
				       &new_dlp,
				       &new_loop_sz);
	/* --BEGIN ERROR HANDLING-- */
	if (!new_dlp) return -1;
	/* --END ERROR HANDLING-- */

	new_dlp->kind = DLOOP_KIND_INDEXED | DLOOP_FINAL_MASK;

	if (flag == DLOOP_DATALOOP_ALL_BYTES)
	{
	    /* blocklengths are modified below */
	    new_dlp->el_size   = 1;
	    new_dlp->el_extent = 1;
	    new_dlp->el_type   = MPI_BYTE;
	}
	else
	{
	    new_dlp->el_size   = old_extent;
	    new_dlp->el_extent = old_extent;
	    new_dlp->el_type   = oldtype;
	}
    }
    else
    {
	DLOOP_Dataloop *old_loop_ptr = NULL;
	MPI_Aint old_loop_sz = 0;

	DLOOP_Handle_get_loopptr_macro(oldtype, old_loop_ptr, flag);
	DLOOP_Handle_get_loopsize_macro(oldtype, old_loop_sz, flag);

	PREPEND_PREFIX(Dataloop_alloc_and_copy)(DLOOP_KIND_INDEXED,
						contig_count,
						old_loop_ptr,
						old_loop_sz,
						&new_dlp,
						&new_loop_sz);
	/* --BEGIN ERROR HANDLING-- */
	if (!new_dlp) return -1;
	/* --END ERROR HANDLING-- */

	new_dlp->kind = DLOOP_KIND_INDEXED;

	DLOOP_Handle_get_size_macro(oldtype, new_dlp->el_size);
	DLOOP_Handle_get_extent_macro(oldtype, new_dlp->el_extent);
	DLOOP_Handle_get_basic_type_macro(oldtype, new_dlp->el_type);
    }

    new_dlp->loop_params.i_t.count        = contig_count;
    new_dlp->loop_params.i_t.total_blocks = old_type_count;

    /* copy in blocklength and displacement parameters (in that order)
     *
     * regardless of dispinbytes, we store displacements in bytes in loop.
     */
    DLOOP_Type_indexed_array_copy(count,
				  contig_count,
				  blocklength_array,
				  displacement_array,
				  new_dlp->loop_params.i_t.blocksize_array,
				  new_dlp->loop_params.i_t.offset_array,
				  dispinbytes,
				  old_extent);

    if (is_builtin && (flag == DLOOP_DATALOOP_ALL_BYTES))
    {
	DLOOP_Count *tmp_blklen_array =
	    new_dlp->loop_params.i_t.blocksize_array;

	for (i=0; i < contig_count; i++)
	{
	    /* increase block lengths so they are in bytes */
	    tmp_blklen_array[i] *= old_extent;
	}

        new_dlp->loop_params.i_t.total_blocks *= old_extent;
    }

    *dlp_p     = new_dlp;
    *dlsz_p    = new_loop_sz;
    *dldepth_p = old_loop_depth + 1;

    return MPI_SUCCESS;
}
Exemple #6
0
static int DLOOP_Leaf_index_mpi_flatten(DLOOP_Offset *blocks_p,
					DLOOP_Count count,
					DLOOP_Count *blockarray,
					DLOOP_Offset *offsetarray,
					DLOOP_Type el_type,
					DLOOP_Offset rel_off,
					void *bufp,
					void *v_paramp)
{
    int i;
    DLOOP_Size size, blocks_left;
    DLOOP_Offset el_size;
    struct PREPEND_PREFIX(mpi_flatten_params) *paramp = v_paramp;

    DLOOP_Handle_get_size_macro(el_type, el_size);
    blocks_left = *blocks_p;

    for (i=0; i < count && blocks_left > 0; i++) {
	int last_idx;
	char *last_end = NULL;

	if (blocks_left > blockarray[i]) {
	    size = blockarray[i] * el_size;
	    blocks_left -= blockarray[i];
	}
	else {
	    /* last pass */
	    size = blocks_left *  el_size;
	    blocks_left = 0;
	}

	last_idx = paramp->index - 1;
	if (last_idx >= 0) {
	    /* Since disps can be negative, we cannot use
	     * DLOOP_Ensure_Offset_fits_in_pointer to verify that disps +
	     * blklens fits in a pointer.  Nor can we use
	     * DLOOP_OFFSET_CAST_TO_VOID_PTR to cast the sum to a pointer.
	     * Just let it truncate, if the sizeof a pointer is less
	     * than the sizeof an MPI_Aint.
	     */
	    last_end = (char *) DLOOP_OFFSET_CAST_TO_VOID_PTR
		       (paramp->disps[last_idx] +
			(MPI_Aint)(paramp->blklens[last_idx]));
	}

	/* Since bufp can be a displacement and can be negative, we
	 * cannot use DLOOP_Ensure_Offset_fits_in_pointer to ensure the
	 * sum fits in a pointer.  Just let it truncate.
	 */
        if ((last_idx == paramp->length-1) &&
            (last_end != ((char *) bufp + rel_off + offsetarray[i])))
	{
	    /* we have used up all our entries, and this one doesn't fit on
	     * the end of the last one.
	     */
	    *blocks_p -= (blocks_left + (size /  el_size));
	    return 1;
	}
        else if (last_idx >= 0 && (last_end == ((char *) bufp + rel_off + offsetarray[i])))
	{
	    /* add this size to the last vector rather than using up new one */
	    paramp->blklens[last_idx] += size;
	}
	else {
	    /* Since bufp can be a displacement and can be negative, we cannot
	     * use DLOOP_VOID_PTR_CAST_TO_OFFSET to cast the sum to a pointer.
	     * Just let it sign extend.
	     */
            paramp->disps[last_idx+1]   = DLOOP_PTR_DISP_CAST_TO_OFFSET bufp +
		rel_off + offsetarray[i];
	    paramp->blklens[last_idx+1] = size; /* these blocks are in bytes */
	    paramp->index++;
	}
    }

    /* if we get here then we processed ALL the blocks; don't need to update
     * blocks_p
     */
    DLOOP_Assert(blocks_left == 0);
    return 0;
}
Exemple #7
0
/* DLOOP_Leaf_vector_mpi_flatten
 *
 * Input Parameters:
 * blocks_p - [inout] pointer to a count of blocks (total, for all noncontiguous pieces)
 * count    - # of noncontiguous regions
 * blksz    - size of each noncontiguous region
 * stride   - distance in bytes from start of one region to start of next
 * el_type - elemental type (e.g. MPI_INT)
 * ...
 *
 * Note: this is only called when the starting position is at the beginning
 * of a whole block in a vector type.
 *
 * TODO: MAKE THIS CODE SMARTER, USING THE SAME GENERAL APPROACH AS IN THE
 *       COUNT BLOCK CODE ABOVE.
 */
static int DLOOP_Leaf_vector_mpi_flatten(DLOOP_Offset *blocks_p,
					 DLOOP_Count count,
					 DLOOP_Count blksz,
					 DLOOP_Offset stride,
					 DLOOP_Type el_type,
					 DLOOP_Offset rel_off, /* offset into buffer */
					 void *bufp, /* start of buffer */
					 void *v_paramp)
{
    int i;
    DLOOP_Size size, blocks_left;
    DLOOP_Offset el_size;
    struct PREPEND_PREFIX(mpi_flatten_params) *paramp = v_paramp;

    DLOOP_Handle_get_size_macro(el_type, el_size);
    blocks_left = *blocks_p;

    for (i=0; i < count && blocks_left > 0; i++) {
	int last_idx;
	char *last_end = NULL;

	if (blocks_left > blksz) {
	    size = blksz * el_size;
	    blocks_left -= blksz;
	}
	else {
	    /* last pass */
	    size = blocks_left * el_size;
	    blocks_left = 0;
	}

	last_idx = paramp->index - 1;
	if (last_idx >= 0) {
	    /* Since disps can be negative, we cannot use
	     * DLOOP_Ensure_Offset_fits_in_pointer to verify that disps +
	     * blklens fits in a pointer.  Nor can we use
	     * DLOOP_OFFSET_CAST_TO_VOID_PTR to cast the sum to a pointer.
	     * Just let it truncate, if the sizeof a pointer is less
	     * than the sizeof an MPI_Aint.
	     */
	    last_end = (char *) DLOOP_OFFSET_CAST_TO_VOID_PTR
		       (paramp->disps[last_idx] +
			 (MPI_Aint)(paramp->blklens[last_idx]));
	}

	/* Since bufp can be a displacement and can be negative, we cannot use
	 * DLOOP_Ensure_Offset_fits_in_pointer to ensure the sum fits in a pointer.
	 * Just let it truncate.
	 */
        if ((last_idx == paramp->length-1) &&
            (last_end != ((char *) bufp + rel_off)))
	{
	    /* we have used up all our entries, and this one doesn't fit on
	     * the end of the last one.
	     */
	    *blocks_p -= (blocks_left + (size / el_size));
#ifdef MPID_SP_VERBOSE
	    MPL_DBG_MSG_FMT(MPIR_DBG_DATATYPE,VERBOSE,(MPL_DBG_FDEST,"\t[vector to vec exiting (1): next ind = %d, " DLOOP_OFFSET_FMT_DEC_SPEC " blocks processed.\n",
			    paramp->u.pack_vector.index,
                            *blocks_p));
#endif
	    return 1;
	}
        else if (last_idx >= 0 && (last_end == ((char *) bufp + rel_off)))
	{
	    /* add this size to the last vector rather than using up new one */
	    paramp->blklens[last_idx] += size;
	}
	else {
	    /* Since bufp can be a displacement and can be negative, we cannot use
	     * DLOOP_VOID_PTR_CAST_TO_OFFSET to cast the sum to a pointer.  Just let it
	     * sign extend.
	     */
            paramp->disps[last_idx+1]   = DLOOP_PTR_DISP_CAST_TO_OFFSET bufp + rel_off;
	    paramp->blklens[last_idx+1] = size;
	    paramp->index++;
	}

	rel_off += stride;
    }

#ifdef MPID_SP_VERBOSE
    MPL_DBG_MSG_FMT(MPIR_DBG_DATATYPE,VERBOSE,(MPL_DBG_FDEST,"\t[vector to vec exiting (2): next ind = %d, " DLOOP_OFFSET_FMT_DEC_SPEC " blocks processed.\n",
		    paramp->u.pack_vector.index,
                    *blocks_p));
#endif

    /* if we get here then we processed ALL the blocks; don't need to update
     * blocks_p
     */

    DLOOP_Assert(blocks_left == 0);
    return 0;
}
static int DLOOP_Dataloop_create_flattened_struct(DLOOP_Count count,
						  const int *blklens,
						  const MPI_Aint *disps,
						  const DLOOP_Type *oldtypes,
						  DLOOP_Dataloop **dlp_p,
						  MPI_Aint *dlsz_p,
						  int *dldepth_p,
						  int flag)
{
    /* arbitrary types, convert to bytes and use indexed */
    int i, err, nr_blks = 0;
    DLOOP_Size *tmp_blklens;
    MPI_Aint *tmp_disps; /* since we're calling another fn that takes
			    this type as an input parameter */
    DLOOP_Offset bytes;
    DLOOP_Segment *segp;

    int first_ind;
    DLOOP_Size last_ind;

    segp = MPIR_Segment_alloc();
    /* --BEGIN ERROR HANDLING-- */
    if (!segp) {
	return DLOOP_Dataloop_create_struct_memory_error();
    }
    /* --END ERROR HANDLING-- */

    /* use segment code once to count contiguous regions */
    for (i=0; i < count; i++)
    {
	int is_basic;

	/* ignore type elements with a zero blklen */
	if (blklens[i] == 0) continue;

	is_basic = (DLOOP_Handle_hasloop_macro(oldtypes[i])) ? 0 : 1;

	if (is_basic && (oldtypes[i] != MPI_LB &&
			 oldtypes[i] != MPI_UB))
	{
	    nr_blks++;
	}
        else /* derived type; get a count of contig blocks */
        {
            DLOOP_Count tmp_nr_blks, sz;

            DLOOP_Handle_get_size_macro(oldtypes[i], sz);

            /* if the derived type has some data to contribute,
             * add to flattened representation */
            if (sz > 0) {
                err = MPIR_Segment_init(NULL,
                                                   (DLOOP_Count) blklens[i],
                                                   oldtypes[i],
                                                   segp,
                                                   flag);
                if (err) return err;

                bytes = SEGMENT_IGNORE_LAST;

                MPIR_Segment_count_contig_blocks(segp,
                                                            0,
                                                            &bytes,
                                                            &tmp_nr_blks);

                nr_blks += tmp_nr_blks;
            }
        }
    }

    /* it's possible for us to get to this point only to realize that
     * there isn't any data in this type. in that case do what we always
     * do: store a simple contig of zero ints and call it done.
     */
    if (nr_blks == 0) {
	MPIR_Segment_free(segp);
	err = MPIR_Dataloop_create_contiguous(0,
							 MPI_INT,
							 dlp_p,
							 dlsz_p,
							 dldepth_p,
							 flag);
	return err;

    }

    nr_blks += 2; /* safety measure */

    tmp_blklens = (DLOOP_Size *) DLOOP_Malloc(nr_blks * sizeof(DLOOP_Size), MPL_MEM_DATATYPE);
    /* --BEGIN ERROR HANDLING-- */
    if (!tmp_blklens) {
	MPIR_Segment_free(segp);
	return DLOOP_Dataloop_create_struct_memory_error();
    }
    /* --END ERROR HANDLING-- */


    tmp_disps = (MPI_Aint *) DLOOP_Malloc(nr_blks * sizeof(MPI_Aint), MPL_MEM_DATATYPE);
    /* --BEGIN ERROR HANDLING-- */
    if (!tmp_disps) {
	DLOOP_Free(tmp_blklens);
	MPIR_Segment_free(segp);
	return DLOOP_Dataloop_create_struct_memory_error();
    }
    /* --END ERROR HANDLING-- */

    /* use segment code again to flatten the type */
    first_ind = 0;
    for (i=0; i < count; i++)
    {
	int is_basic;
	DLOOP_Count sz = -1;

	is_basic = (DLOOP_Handle_hasloop_macro(oldtypes[i])) ? 0 : 1;
	if (!is_basic) DLOOP_Handle_get_size_macro(oldtypes[i], sz);

	/* we're going to use the segment code to flatten the type.
	 * we put in our displacement as the buffer location, and use
	 * the blocklength as the count value to get N contiguous copies
	 * of the type.
	 *
	 * Note that we're going to get back values in bytes, so that will
	 * be our new element type.
	 */
	if (oldtypes[i] != MPI_UB &&
	    oldtypes[i] != MPI_LB &&
	    blklens[i] != 0 &&
	    (is_basic || sz > 0))
	{
	    err = MPIR_Segment_init((char *) DLOOP_OFFSET_CAST_TO_VOID_PTR disps[i],
					 (DLOOP_Count) blklens[i],
					 oldtypes[i],
					 segp,
					 0 /* homogeneous */);
            if (err) return err;

	    last_ind = nr_blks - first_ind;
	    bytes = SEGMENT_IGNORE_LAST;
	    MPIR_Segment_mpi_flatten(segp,
						0,
						&bytes,
						&tmp_blklens[first_ind],
						&tmp_disps[first_ind],
						&last_ind);
            if (err) return err;
	    first_ind += last_ind;
	}
    }
    nr_blks = first_ind;

#if 0
    if (MPL_DBG_SELECTED(MPIR_DBG_DATATYPE,VERBOSE)) {
	MPL_DBG_OUT(MPIR_DBG_DATATYPE,"--- start of flattened type ---");
        for (i=0; i < nr_blks; i++) {
	MPL_DBG_OUT_FMT(MPIR_DBG_DATATYPE,(MPL_DBG_FDEST,
				   "a[%d] = (%d, " DLOOP_OFFSET_FMT_DEC_SPEC ")", i,
				   tmp_blklens[i], tmp_disps[i]));
	}
	MPL_DBG_OUT(MPIR_DBG_DATATYPE,"--- end of flattened type ---");
    }
#endif

    MPIR_Segment_free(segp);

    err = MPIR_Dataloop_create_indexed(nr_blks,
						  tmp_blklens,
						  tmp_disps,
						  1, /* disp in bytes */
						  MPI_BYTE,
						  dlp_p,
						  dlsz_p,
						  dldepth_p,
						  flag);

    DLOOP_Free(tmp_blklens);
    DLOOP_Free(tmp_disps);

    return err;
}
static int DLOOP_Dataloop_create_basic_all_bytes_struct(
	       DLOOP_Count count,
	       const int *blklens,
	       const MPI_Aint *disps,
	       const DLOOP_Type *oldtypes,
	       DLOOP_Dataloop **dlp_p,
	       MPI_Aint *dlsz_p,
	       int *dldepth_p,
	       int flag)
{
    int i, err, cur_pos = 0;
    DLOOP_Size *tmp_blklens;
    MPI_Aint *tmp_disps;

    /* count is an upper bound on number of type instances */
    tmp_blklens = (DLOOP_Size *) DLOOP_Malloc(count * sizeof(DLOOP_Size), MPL_MEM_DATATYPE);

    /* --BEGIN ERROR HANDLING-- */
    if (!tmp_blklens)
    {
	return DLOOP_Dataloop_create_struct_memory_error();
    }
    /* --END ERROR HANDLING-- */

    tmp_disps = (MPI_Aint *) DLOOP_Malloc(count * sizeof(MPI_Aint), MPL_MEM_DATATYPE);

    /* --BEGIN ERROR HANDLING-- */
    if (!tmp_disps)
    {
	DLOOP_Free(tmp_blklens);
	return DLOOP_Dataloop_create_struct_memory_error();
    }
    /* --END ERROR HANDLING-- */

    for (i=0; i < count; i++)
    {
	if (oldtypes[i] != MPI_LB && oldtypes[i] != MPI_UB && blklens[i] != 0)
	{
	    DLOOP_Offset sz;

	    DLOOP_Handle_get_size_macro(oldtypes[i], sz);
	    tmp_blklens[cur_pos] = (int) sz * blklens[i];
	    tmp_disps[cur_pos]   = disps[i];
	    cur_pos++;
	}
    }
    err = MPIR_Dataloop_create_indexed(cur_pos,
						  tmp_blklens,
						  tmp_disps,
						  1, /* disp in bytes */
						  MPI_BYTE,
						  dlp_p,
						  dlsz_p,
						  dldepth_p,
						  flag);

    DLOOP_Free(tmp_blklens);
    DLOOP_Free(tmp_disps);

    return err;
}
/*@
   Dataloop_create_vector

   Arguments:
+  int icount
.  int iblocklength
.  MPI_Aint astride
.  int strideinbytes
.  MPI_Datatype oldtype
.  DLOOP_Dataloop **dlp_p
.  int *dlsz_p
.  int *dldepth_p
-  int flag

   Returns 0 on success, -1 on failure.

@*/
int PREPEND_PREFIX(Dataloop_create_vector)(int icount,
        int iblocklength,
        MPI_Aint astride,
        int strideinbytes,
        DLOOP_Type oldtype,
        DLOOP_Dataloop **dlp_p,
        int *dlsz_p,
        int *dldepth_p,
        int flag)
{
    int err, is_builtin;
    int new_loop_sz, new_loop_depth;

    DLOOP_Count count, blocklength;
    DLOOP_Offset stride;
    DLOOP_Dataloop *new_dlp;

    count       = (DLOOP_Count) icount; /* avoid subsequent casting */
    blocklength = (DLOOP_Count) iblocklength;
    stride      = (DLOOP_Offset) astride;

    /* if count or blocklength are zero, handle with contig code,
     * call it a int
     */
    if (count == 0 || blocklength == 0)
    {

        err = PREPEND_PREFIX(Dataloop_create_contiguous)(0,
                MPI_INT,
                dlp_p,
                dlsz_p,
                dldepth_p,
                flag);
        return err;
    }

    /* optimization:
     *
     * if count == 1, store as a contiguous rather than a vector dataloop.
     */
    if (count == 1) {
        err = PREPEND_PREFIX(Dataloop_create_contiguous)(iblocklength,
                oldtype,
                dlp_p,
                dlsz_p,
                dldepth_p,
                flag);
        return err;
    }

    is_builtin = (DLOOP_Handle_hasloop_macro(oldtype)) ? 0 : 1;

    if (is_builtin) {
        new_loop_sz = sizeof(DLOOP_Dataloop);
        new_loop_depth = 1;
    }
    else {
        int old_loop_sz = 0, old_loop_depth = 0;

        DLOOP_Handle_get_loopsize_macro(oldtype, old_loop_sz, flag);
        DLOOP_Handle_get_loopdepth_macro(oldtype, old_loop_depth, flag);

        /* TODO: ACCOUNT FOR PADDING IN LOOP_SZ HERE */
        new_loop_sz = sizeof(DLOOP_Dataloop) + old_loop_sz;
        new_loop_depth = old_loop_depth + 1;
    }


    if (is_builtin) {
        DLOOP_Offset basic_sz = 0;

        PREPEND_PREFIX(Dataloop_alloc)(DLOOP_KIND_VECTOR,
                                       count,
                                       &new_dlp,
                                       &new_loop_sz);
        /* --BEGIN ERROR HANDLING-- */
        if (!new_dlp) return -1;
        /* --END ERROR HANDLING-- */

        DLOOP_Handle_get_size_macro(oldtype, basic_sz);
        new_dlp->kind = DLOOP_KIND_VECTOR | DLOOP_FINAL_MASK;

        if (flag == DLOOP_DATALOOP_ALL_BYTES)
        {

            blocklength       *= basic_sz;
            new_dlp->el_size   = 1;
            new_dlp->el_extent = 1;
            new_dlp->el_type   = MPI_BYTE;

            if(!strideinbytes)
                /* the stride was specified in units of oldtype, now
                   that we're using bytes, rather than oldtype, we
                   need to update stride. */
                stride *= basic_sz;
        }
        else
        {
            new_dlp->el_size   = basic_sz;
            new_dlp->el_extent = new_dlp->el_size;
            new_dlp->el_type   = oldtype;
        }
    }
    else { /* user-defined base type (oldtype) */
        DLOOP_Dataloop *old_loop_ptr;
        int old_loop_sz = 0;

        DLOOP_Handle_get_loopptr_macro(oldtype, old_loop_ptr, flag);
        DLOOP_Handle_get_loopsize_macro(oldtype, old_loop_sz, flag);

        PREPEND_PREFIX(Dataloop_alloc_and_copy)(DLOOP_KIND_VECTOR,
                                                count,
                                                old_loop_ptr,
                                                old_loop_sz,
                                                &new_dlp,
                                                &new_loop_sz);
        /* --BEGIN ERROR HANDLING-- */
        if (!new_dlp) return -1;
        /* --END ERROR HANDLING-- */

        new_dlp->kind = DLOOP_KIND_VECTOR;
        DLOOP_Handle_get_size_macro(oldtype, new_dlp->el_size);
        DLOOP_Handle_get_extent_macro(oldtype, new_dlp->el_extent);
        DLOOP_Handle_get_basic_type_macro(oldtype, new_dlp->el_type);
    }

    /* vector-specific members
     *
     * stride stored in dataloop is always in bytes for local rep of type
     */
    new_dlp->loop_params.v_t.count     = count;
    new_dlp->loop_params.v_t.blocksize = blocklength;
    new_dlp->loop_params.v_t.stride    = (strideinbytes) ? stride :
                                         stride * new_dlp->el_extent;

    *dlp_p     = new_dlp;
    *dlsz_p    = new_loop_sz;
    *dldepth_p = new_loop_depth;

    return 0;
}