Exemple #1
0
/*@
  Dataloop_dup - make a copy of a dataloop

  Returns 0 on success, -1 on failure.
@*/
void PREPEND_PREFIX(Dataloop_dup)(DLOOP_Dataloop *old_loop,
				  DLOOP_Count old_loop_sz,
				  DLOOP_Dataloop **new_loop_p)
{
    DLOOP_Dataloop *new_loop;

    DLOOP_Assert(old_loop != NULL);
    DLOOP_Assert(old_loop_sz > 0);

    new_loop = (DLOOP_Dataloop *) DLOOP_Malloc(old_loop_sz);
    if (new_loop == NULL) {
	*new_loop_p = NULL;
	return;
    }

    PREPEND_PREFIX(Dataloop_copy)(new_loop, old_loop, old_loop_sz);
    *new_loop_p = new_loop;
    return;
}
static int DLOOP_Dataloop_create_flattened_struct(DLOOP_Count count,
						  const int *blklens,
						  const MPI_Aint *disps,
						  const DLOOP_Type *oldtypes,
						  DLOOP_Dataloop **dlp_p,
						  MPI_Aint *dlsz_p,
						  int *dldepth_p,
						  int flag)
{
    /* arbitrary types, convert to bytes and use indexed */
    int i, err, nr_blks = 0;
    DLOOP_Size *tmp_blklens;
    MPI_Aint *tmp_disps; /* since we're calling another fn that takes
			    this type as an input parameter */
    DLOOP_Offset bytes;
    DLOOP_Segment *segp;

    int first_ind;
    DLOOP_Size last_ind;

    segp = MPIR_Segment_alloc();
    /* --BEGIN ERROR HANDLING-- */
    if (!segp) {
	return DLOOP_Dataloop_create_struct_memory_error();
    }
    /* --END ERROR HANDLING-- */

    /* use segment code once to count contiguous regions */
    for (i=0; i < count; i++)
    {
	int is_basic;

	/* ignore type elements with a zero blklen */
	if (blklens[i] == 0) continue;

	is_basic = (DLOOP_Handle_hasloop_macro(oldtypes[i])) ? 0 : 1;

	if (is_basic && (oldtypes[i] != MPI_LB &&
			 oldtypes[i] != MPI_UB))
	{
	    nr_blks++;
	}
        else /* derived type; get a count of contig blocks */
        {
            DLOOP_Count tmp_nr_blks, sz;

            DLOOP_Handle_get_size_macro(oldtypes[i], sz);

            /* if the derived type has some data to contribute,
             * add to flattened representation */
            if (sz > 0) {
                err = MPIR_Segment_init(NULL,
                                                   (DLOOP_Count) blklens[i],
                                                   oldtypes[i],
                                                   segp,
                                                   flag);
                if (err) return err;

                bytes = SEGMENT_IGNORE_LAST;

                MPIR_Segment_count_contig_blocks(segp,
                                                            0,
                                                            &bytes,
                                                            &tmp_nr_blks);

                nr_blks += tmp_nr_blks;
            }
        }
    }

    /* it's possible for us to get to this point only to realize that
     * there isn't any data in this type. in that case do what we always
     * do: store a simple contig of zero ints and call it done.
     */
    if (nr_blks == 0) {
	MPIR_Segment_free(segp);
	err = MPIR_Dataloop_create_contiguous(0,
							 MPI_INT,
							 dlp_p,
							 dlsz_p,
							 dldepth_p,
							 flag);
	return err;

    }

    nr_blks += 2; /* safety measure */

    tmp_blklens = (DLOOP_Size *) DLOOP_Malloc(nr_blks * sizeof(DLOOP_Size), MPL_MEM_DATATYPE);
    /* --BEGIN ERROR HANDLING-- */
    if (!tmp_blklens) {
	MPIR_Segment_free(segp);
	return DLOOP_Dataloop_create_struct_memory_error();
    }
    /* --END ERROR HANDLING-- */


    tmp_disps = (MPI_Aint *) DLOOP_Malloc(nr_blks * sizeof(MPI_Aint), MPL_MEM_DATATYPE);
    /* --BEGIN ERROR HANDLING-- */
    if (!tmp_disps) {
	DLOOP_Free(tmp_blklens);
	MPIR_Segment_free(segp);
	return DLOOP_Dataloop_create_struct_memory_error();
    }
    /* --END ERROR HANDLING-- */

    /* use segment code again to flatten the type */
    first_ind = 0;
    for (i=0; i < count; i++)
    {
	int is_basic;
	DLOOP_Count sz = -1;

	is_basic = (DLOOP_Handle_hasloop_macro(oldtypes[i])) ? 0 : 1;
	if (!is_basic) DLOOP_Handle_get_size_macro(oldtypes[i], sz);

	/* we're going to use the segment code to flatten the type.
	 * we put in our displacement as the buffer location, and use
	 * the blocklength as the count value to get N contiguous copies
	 * of the type.
	 *
	 * Note that we're going to get back values in bytes, so that will
	 * be our new element type.
	 */
	if (oldtypes[i] != MPI_UB &&
	    oldtypes[i] != MPI_LB &&
	    blklens[i] != 0 &&
	    (is_basic || sz > 0))
	{
	    err = MPIR_Segment_init((char *) DLOOP_OFFSET_CAST_TO_VOID_PTR disps[i],
					 (DLOOP_Count) blklens[i],
					 oldtypes[i],
					 segp,
					 0 /* homogeneous */);
            if (err) return err;

	    last_ind = nr_blks - first_ind;
	    bytes = SEGMENT_IGNORE_LAST;
	    MPIR_Segment_mpi_flatten(segp,
						0,
						&bytes,
						&tmp_blklens[first_ind],
						&tmp_disps[first_ind],
						&last_ind);
            if (err) return err;
	    first_ind += last_ind;
	}
    }
    nr_blks = first_ind;

#if 0
    if (MPL_DBG_SELECTED(MPIR_DBG_DATATYPE,VERBOSE)) {
	MPL_DBG_OUT(MPIR_DBG_DATATYPE,"--- start of flattened type ---");
        for (i=0; i < nr_blks; i++) {
	MPL_DBG_OUT_FMT(MPIR_DBG_DATATYPE,(MPL_DBG_FDEST,
				   "a[%d] = (%d, " DLOOP_OFFSET_FMT_DEC_SPEC ")", i,
				   tmp_blklens[i], tmp_disps[i]));
	}
	MPL_DBG_OUT(MPIR_DBG_DATATYPE,"--- end of flattened type ---");
    }
#endif

    MPIR_Segment_free(segp);

    err = MPIR_Dataloop_create_indexed(nr_blks,
						  tmp_blklens,
						  tmp_disps,
						  1, /* disp in bytes */
						  MPI_BYTE,
						  dlp_p,
						  dlsz_p,
						  dldepth_p,
						  flag);

    DLOOP_Free(tmp_blklens);
    DLOOP_Free(tmp_disps);

    return err;
}
static int DLOOP_Dataloop_create_basic_all_bytes_struct(
	       DLOOP_Count count,
	       const int *blklens,
	       const MPI_Aint *disps,
	       const DLOOP_Type *oldtypes,
	       DLOOP_Dataloop **dlp_p,
	       MPI_Aint *dlsz_p,
	       int *dldepth_p,
	       int flag)
{
    int i, err, cur_pos = 0;
    DLOOP_Size *tmp_blklens;
    MPI_Aint *tmp_disps;

    /* count is an upper bound on number of type instances */
    tmp_blklens = (DLOOP_Size *) DLOOP_Malloc(count * sizeof(DLOOP_Size), MPL_MEM_DATATYPE);

    /* --BEGIN ERROR HANDLING-- */
    if (!tmp_blklens)
    {
	return DLOOP_Dataloop_create_struct_memory_error();
    }
    /* --END ERROR HANDLING-- */

    tmp_disps = (MPI_Aint *) DLOOP_Malloc(count * sizeof(MPI_Aint), MPL_MEM_DATATYPE);

    /* --BEGIN ERROR HANDLING-- */
    if (!tmp_disps)
    {
	DLOOP_Free(tmp_blklens);
	return DLOOP_Dataloop_create_struct_memory_error();
    }
    /* --END ERROR HANDLING-- */

    for (i=0; i < count; i++)
    {
	if (oldtypes[i] != MPI_LB && oldtypes[i] != MPI_UB && blklens[i] != 0)
	{
	    DLOOP_Offset sz;

	    DLOOP_Handle_get_size_macro(oldtypes[i], sz);
	    tmp_blklens[cur_pos] = (int) sz * blklens[i];
	    tmp_disps[cur_pos]   = disps[i];
	    cur_pos++;
	}
    }
    err = MPIR_Dataloop_create_indexed(cur_pos,
						  tmp_blklens,
						  tmp_disps,
						  1, /* disp in bytes */
						  MPI_BYTE,
						  dlp_p,
						  dlsz_p,
						  dldepth_p,
						  flag);

    DLOOP_Free(tmp_blklens);
    DLOOP_Free(tmp_disps);

    return err;
}
static int DLOOP_Dataloop_create_unique_type_struct(DLOOP_Count count,
						    const int *blklens,
						    const MPI_Aint *disps,
						    const DLOOP_Type *oldtypes,
						    int type_pos,
						    DLOOP_Dataloop **dlp_p,
						    MPI_Aint *dlsz_p,
						    int *dldepth_p,
						    int flag)
{
    /* the same type used more than once in the array; type_pos
     * indexes to the first of these.
     */
    int i, err, cur_pos = 0;
    DLOOP_Size *tmp_blklens;
    DLOOP_Offset *tmp_disps;

    /* count is an upper bound on number of type instances */
    tmp_blklens = (DLOOP_Size *) DLOOP_Malloc(count * sizeof(DLOOP_Size), MPL_MEM_DATATYPE);
    /* --BEGIN ERROR HANDLING-- */
    if (!tmp_blklens) {
	/* TODO: ??? */
	return DLOOP_Dataloop_create_struct_memory_error();
    }
    /* --END ERROR HANDLING-- */

    tmp_disps = (DLOOP_Offset *)
	DLOOP_Malloc(count * sizeof(DLOOP_Offset), MPL_MEM_DATATYPE);
    /* --BEGIN ERROR HANDLING-- */
    if (!tmp_disps) {
	DLOOP_Free(tmp_blklens);
	/* TODO: ??? */
	return DLOOP_Dataloop_create_struct_memory_error();
    }
    /* --END ERROR HANDLING-- */

    for (i=type_pos; i < count; i++)
    {
	if (oldtypes[i] == oldtypes[type_pos] && blklens != 0)
	{
	    tmp_blklens[cur_pos] = blklens[i];
	    tmp_disps[cur_pos]   = disps[i];
	    cur_pos++;
	}
    }

    err = MPIR_Dataloop_create_indexed(cur_pos,
						  tmp_blklens,
						  tmp_disps,
						  1, /* disp in bytes */
						  oldtypes[type_pos],
						  dlp_p,
						  dlsz_p,
						  dldepth_p,
						  flag);

    DLOOP_Free(tmp_blklens);
    DLOOP_Free(tmp_disps);

    return err;

}
Exemple #5
0
/*@
  Dataloop_struct_alloc - allocate the resources used to store a dataloop and
                          copy in old dataloop as appropriate.  This version
                          is specifically for use when a struct dataloop is
                          being created; the space to hold old dataloops in
                          this case must be described back to the
                          implementation in order for efficient copying.

Input Parameters:
+ count         - number of elements in dataloop (kind dependent)
. old_loop_sz   - size of old dataloop (should be zero if old_loop is NULL)
. basic_ct      - number of basic types for which new dataloops are needed
. old_loop_p    - address at which to store pointer to old loops
. new_loop_p    - address at which to store new struct dataloop pointer
- new_loop_sz_p - address at which to store new loop size

  Notes:
  The count parameter passed into this function will often be different
  from the count passed in at the MPI layer due to optimizations.

  The caller is responsible for filling in the region pointed to by
  old_loop_p (count elements).
@*/
void PREPEND_PREFIX(Dataloop_struct_alloc)(DLOOP_Count count,
					   DLOOP_Size old_loop_sz,
					   int basic_ct,
					   DLOOP_Dataloop **old_loop_p,
					   DLOOP_Dataloop **new_loop_p,
					   DLOOP_Size *new_loop_sz_p)
{
    DLOOP_Size new_loop_sz = 0;
    int align_sz = 8; /* default aligns everything to 8-byte boundaries */
    int epsilon;
    DLOOP_Size loop_sz = sizeof(DLOOP_Dataloop);
    DLOOP_Size off_sz, blk_sz, ptr_sz, extent_sz, basic_sz;

    DLOOP_Dataloop *new_loop;

#ifdef HAVE_MAX_STRUCT_ALIGNMENT
    if (align_sz > HAVE_MAX_STRUCT_ALIGNMENT) {
	align_sz = HAVE_MAX_STRUCT_ALIGNMENT;
    }
#endif

    /* calculate the space that we actually need for everything */
    ptr_sz    = count * sizeof(DLOOP_Dataloop *);
    extent_sz = count * sizeof(DLOOP_Offset);
    blk_sz    = count * sizeof(DLOOP_Count);
    off_sz    = count * sizeof(DLOOP_Offset);
    basic_sz  = sizeof(DLOOP_Dataloop);

    /* pad everything that we're going to allocate */
    epsilon = loop_sz % align_sz;
    if (epsilon) loop_sz += align_sz - epsilon;

    epsilon = off_sz % align_sz;
    if (epsilon) off_sz += align_sz - epsilon;

    epsilon = blk_sz % align_sz;
    if (epsilon) blk_sz += align_sz - epsilon;

    epsilon = ptr_sz % align_sz;
    if (epsilon) ptr_sz += align_sz - epsilon;

    epsilon = extent_sz % align_sz;
    if (epsilon) extent_sz += align_sz - epsilon;

    epsilon = basic_sz % align_sz;
    if (epsilon) basic_sz += align_sz - epsilon;

    /* note: we pad *each* basic type dataloop, because the
     * code used to create them assumes that we're going to
     * do that.
     */

    new_loop_sz += loop_sz + off_sz + blk_sz + ptr_sz +
	extent_sz + (basic_ct * basic_sz) + old_loop_sz;

    /* allocate space */
    new_loop = (DLOOP_Dataloop *) DLOOP_Malloc(new_loop_sz);
    if (new_loop == NULL) {
	*new_loop_p = NULL;
	return;
    }

#ifdef DLOOP_DEBUG_MEMORY
    DLOOP_dbg_printf("DLOOP_Dataloop_struct_alloc: new loop @ %x (tot sz = %z, loop = %z, off = %z, blk = %z, ptr = %z, extent = %z, basics = %z, old = %z)\n",
		     (int) new_loop,
		     new_loop_sz,
		     loop_sz,
		     off_sz,
		     blk_sz,
		     ptr_sz,
		     extent_sz,
		     basic_sz,
		     old_loop_sz);
#endif

    /* set all the pointers in the new dataloop structure */
    new_loop->loop_params.s_t.dataloop_array = (DLOOP_Dataloop **)
	(((char *) new_loop) + loop_sz);
    new_loop->loop_params.s_t.blocksize_array =	(DLOOP_Count *)
	(((char *) new_loop) + loop_sz + ptr_sz);
    new_loop->loop_params.s_t.offset_array = (DLOOP_Offset *)
	(((char *) new_loop) + loop_sz + ptr_sz + blk_sz);
    new_loop->loop_params.s_t.el_extent_array =	(DLOOP_Offset *)
	(((char *) new_loop) + loop_sz + ptr_sz + blk_sz + off_sz);

    *old_loop_p = (DLOOP_Dataloop *)
	(((char *) new_loop) + loop_sz + ptr_sz + blk_sz + off_sz + extent_sz);
    *new_loop_p = new_loop;
    *new_loop_sz_p = new_loop_sz;

    return;
}
Exemple #6
0
/*@
  Dataloop_alloc_and_copy - allocate the resources used to store a
                            dataloop and copy in old dataloop as
			    appropriate

Input Parameters:
+ kind          - kind of dataloop to allocate
. count         - number of elements in dataloop (kind dependent)
. old_loop      - pointer to old dataloop (or NULL for none)
. old_loop_sz   - size of old dataloop (should be zero if old_loop is NULL)
. new_loop_p    - address at which to store new dataloop pointer
- new_loop_sz_p - pointer to integer in which to store new loop size

  Notes:
  The count parameter passed into this function will often be different
  from the count passed in at the MPI layer.
@*/
void PREPEND_PREFIX(Dataloop_alloc_and_copy)(int kind,
					     DLOOP_Count count,
					     DLOOP_Dataloop *old_loop,
					     DLOOP_Size old_loop_sz,
					     DLOOP_Dataloop **new_loop_p,
					     DLOOP_Size *new_loop_sz_p)
{
    DLOOP_Size new_loop_sz = 0;
    int align_sz = 8; /* default aligns everything to 8-byte boundaries */
    int epsilon;
    DLOOP_Size loop_sz = sizeof(DLOOP_Dataloop);
    DLOOP_Size off_sz = 0, blk_sz = 0, ptr_sz = 0, extent_sz = 0;

    char *pos;
    DLOOP_Dataloop *new_loop;

#ifdef HAVE_MAX_STRUCT_ALIGNMENT
    if (align_sz > HAVE_MAX_STRUCT_ALIGNMENT) {
	align_sz = HAVE_MAX_STRUCT_ALIGNMENT;
    }
#endif

    if (old_loop != NULL) {
	DLOOP_Assert((old_loop_sz % align_sz) == 0);
    }

    /* calculate the space that we actually need for everything */
    switch (kind) {
	case DLOOP_KIND_STRUCT:
	    /* need space for dataloop pointers and extents */
	    ptr_sz = count * sizeof(DLOOP_Dataloop *);
	    extent_sz = count * sizeof(DLOOP_Offset);
	case DLOOP_KIND_INDEXED:
	    /* need space for block sizes */
	    blk_sz = count * sizeof(DLOOP_Count);
	case DLOOP_KIND_BLOCKINDEXED:
	    /* need space for block offsets */
	    off_sz = count * sizeof(DLOOP_Offset);
	case DLOOP_KIND_CONTIG:
	case DLOOP_KIND_VECTOR:
	    break;
	default:
	    DLOOP_Assert(0);
    }

    /* pad everything that we're going to allocate */
    epsilon = loop_sz % align_sz;
    if (epsilon) loop_sz += align_sz - epsilon;

    epsilon = off_sz % align_sz;
    if (epsilon) off_sz += align_sz - epsilon;

    epsilon = blk_sz % align_sz;
    if (epsilon) blk_sz += align_sz - epsilon;

    epsilon = ptr_sz % align_sz;
    if (epsilon) ptr_sz += align_sz - epsilon;

    epsilon = extent_sz % align_sz;
    if (epsilon) extent_sz += align_sz - epsilon;

    new_loop_sz += loop_sz + off_sz + blk_sz + ptr_sz +
	extent_sz + old_loop_sz;

    /* allocate space */
    new_loop = (DLOOP_Dataloop *) DLOOP_Malloc(new_loop_sz);
    if (new_loop == NULL) {
	*new_loop_p = NULL;
	return;
    }

#ifdef DLOOP_DEBUG_MEMORY
    DLOOP_dbg_printf("DLOOP_Dataloop_alloc_and_copy: new loop @ %x (tot sz = %z, loop = %z, off = %z, blk = %z, ptr = %z, extent = %z, old = %z)\n",
		     (int) new_loop,
		     new_loop_sz,
		     loop_sz,
		     off_sz,
		     blk_sz,
		     ptr_sz,
		     extent_sz,
		     old_loop_sz);
#endif

    /* set all the pointers in the new dataloop structure */
    switch (kind) {
	case DLOOP_KIND_STRUCT:
	    /* order is:
	     * - pointers
	     * - blocks
	     * - offsets
	     * - extents
	     */
	    new_loop->loop_params.s_t.dataloop_array =
		(DLOOP_Dataloop **) (((char *) new_loop) + loop_sz);
	    new_loop->loop_params.s_t.blocksize_array =
		(DLOOP_Count *) (((char *) new_loop) + loop_sz + ptr_sz);
	    new_loop->loop_params.s_t.offset_array =
		(DLOOP_Offset *) (((char *) new_loop) + loop_sz +
				  ptr_sz + blk_sz);
	    new_loop->loop_params.s_t.el_extent_array =
		(DLOOP_Offset *) (((char *) new_loop) + loop_sz +
				  ptr_sz + blk_sz + off_sz);
	    break;
	case DLOOP_KIND_INDEXED:
	    /* order is:
	     * - blocks
	     * - offsets
	     */
	    new_loop->loop_params.i_t.blocksize_array =
		(DLOOP_Count *) (((char *) new_loop) + loop_sz);
	    new_loop->loop_params.i_t.offset_array =
		(DLOOP_Offset *) (((char *) new_loop) + loop_sz + blk_sz);
	    if (old_loop == NULL) {
		new_loop->loop_params.i_t.dataloop = NULL;
	    }
	    else {
		new_loop->loop_params.i_t.dataloop =
		    (DLOOP_Dataloop *) (((char *) new_loop) +
					(new_loop_sz - old_loop_sz));
	    }
	    break;
	case DLOOP_KIND_BLOCKINDEXED:
	    new_loop->loop_params.bi_t.offset_array =
		(DLOOP_Offset *) (((char *) new_loop) + loop_sz);
	    if (old_loop == NULL) {
		new_loop->loop_params.bi_t.dataloop = NULL;
	    }
	    else {
		new_loop->loop_params.bi_t.dataloop =
		    (DLOOP_Dataloop *) (((char *) new_loop) +
					(new_loop_sz - old_loop_sz));
	    }
	    break;
	case DLOOP_KIND_CONTIG:
	    if (old_loop == NULL) {
		new_loop->loop_params.c_t.dataloop = NULL;
	    }
	    else {
		new_loop->loop_params.c_t.dataloop =
		    (DLOOP_Dataloop *) (((char *) new_loop) +
					(new_loop_sz - old_loop_sz));
	    }
	    break;
	case DLOOP_KIND_VECTOR:
	    if (old_loop == NULL) {
		new_loop->loop_params.v_t.dataloop = NULL;
	    }
	    else {
		new_loop->loop_params.v_t.dataloop =
		    (DLOOP_Dataloop *) (((char *) new_loop) +
					(new_loop_sz - old_loop_sz));
	    }
	    break;
	default:
	    DLOOP_Assert(0);
    }

    pos = ((char *) new_loop) + (new_loop_sz - old_loop_sz);
    if (old_loop != NULL) {
	PREPEND_PREFIX(Dataloop_copy)(pos, old_loop, old_loop_sz);
    }

    *new_loop_p    = new_loop;
    *new_loop_sz_p = new_loop_sz;
    return;
}
void PREPEND_PREFIX(Dataloop_create)(MPI_Datatype type,
				     DLOOP_Dataloop **dlp_p,
				     int *dlsz_p,
				     int *dldepth_p,
				     int flag)
{
    int i;
    int err;

    int nr_ints, nr_aints, nr_types, combiner;
    MPI_Datatype *types;
    int *ints;
    MPI_Aint *aints;

    DLOOP_Dataloop *old_dlp;
    int old_dlsz, old_dldepth;

    int dummy1, dummy2, dummy3, type0_combiner, ndims;
    MPI_Datatype tmptype;

    MPI_Aint stride;
    MPI_Aint *disps;

    MPIR_Type_get_envelope_impl(type, &nr_ints, &nr_aints, &nr_types, &combiner);

    /* some named types do need dataloops; handle separately. */
    if (combiner == MPI_COMBINER_NAMED) {
	DLOOP_Dataloop_create_named(type, dlp_p, dlsz_p, dldepth_p, flag);
	return;
    }
    else if (combiner == MPI_COMBINER_F90_REAL ||
             combiner == MPI_COMBINER_F90_COMPLEX ||
             combiner == MPI_COMBINER_F90_INTEGER)
    {
        MPI_Datatype f90basetype;
        DLOOP_Handle_get_basic_type_macro(type, f90basetype);
        PREPEND_PREFIX(Dataloop_create_contiguous)(1 /* count */,
                                                   f90basetype,
                                                   dlp_p, dlsz_p,
                                                   dldepth_p,
                                                   flag);
        return;
    }

    /* Q: should we also check for "hasloop", or is the COMBINER
     *    check above enough to weed out everything that wouldn't
     *    have a loop?
     */
    DLOOP_Handle_get_loopptr_macro(type, old_dlp, flag);
    if (old_dlp != NULL) {
	/* dataloop already created; just return it. */
	*dlp_p = old_dlp;
	DLOOP_Handle_get_loopsize_macro(type, *dlsz_p, flag);
	DLOOP_Handle_get_loopdepth_macro(type, *dldepth_p, flag);
	return;
    }

    PREPEND_PREFIX(Type_access_contents)(type, &ints, &aints, &types);

    /* first check for zero count on types where that makes sense */
    switch(combiner) {
	case MPI_COMBINER_CONTIGUOUS:
	case MPI_COMBINER_VECTOR:
	case MPI_COMBINER_HVECTOR_INTEGER:
	case MPI_COMBINER_HVECTOR:
	case MPI_COMBINER_INDEXED_BLOCK:
	case MPI_COMBINER_HINDEXED_BLOCK:
	case MPI_COMBINER_INDEXED:
	case MPI_COMBINER_HINDEXED_INTEGER:
	case MPI_COMBINER_HINDEXED:
	case MPI_COMBINER_STRUCT_INTEGER:
	case MPI_COMBINER_STRUCT:
	    if (ints[0] == 0) {
		PREPEND_PREFIX(Dataloop_create_contiguous)(0,
							   MPI_INT,
							   dlp_p,
							   dlsz_p,
							   dldepth_p,
							   flag);
		goto clean_exit;
	    }
	    break;
	default:
	    break;
    }

    /* recurse, processing types "below" this one before processing
     * this one, if those type don't already have dataloops.
     *
     * note: in the struct case below we'll handle any additional
     *       types "below" the current one.
     */
    MPIR_Type_get_envelope_impl(types[0], &dummy1, &dummy2, &dummy3, &type0_combiner);
    if (type0_combiner != MPI_COMBINER_NAMED)
    {
	DLOOP_Handle_get_loopptr_macro(types[0], old_dlp, flag);
	if (old_dlp == NULL)
	{
	    /* no dataloop already present; create and store one */
	    PREPEND_PREFIX(Dataloop_create)(types[0],
					    &old_dlp,
					    &old_dlsz,
					    &old_dldepth,
					    flag);

	    DLOOP_Handle_set_loopptr_macro(types[0], old_dlp, flag);
	    DLOOP_Handle_set_loopsize_macro(types[0], old_dlsz, flag);
	    DLOOP_Handle_set_loopdepth_macro(types[0], old_dldepth, flag);
	}
	else {
	    DLOOP_Handle_get_loopsize_macro(types[0], old_dlsz, flag);
	    DLOOP_Handle_get_loopdepth_macro(types[0], old_dldepth, flag);
	}
    }
       
    switch(combiner)
    {
	case MPI_COMBINER_DUP:
	    if (type0_combiner != MPI_COMBINER_NAMED) {
		PREPEND_PREFIX(Dataloop_dup)(old_dlp, old_dlsz, dlp_p);
		*dlsz_p    = old_dlsz;
		*dldepth_p = old_dldepth;
	    }
	    else {
		PREPEND_PREFIX(Dataloop_create_contiguous)(1,
							   types[0], 
							   dlp_p, dlsz_p,
							   dldepth_p,
							   flag);
	    }
	    break;
	case MPI_COMBINER_RESIZED:
	    if (type0_combiner != MPI_COMBINER_NAMED) {
		PREPEND_PREFIX(Dataloop_dup)(old_dlp, old_dlsz, dlp_p);
		*dlsz_p    = old_dlsz;
		*dldepth_p = old_dldepth;
	    }
	    else {
		PREPEND_PREFIX(Dataloop_create_contiguous)(1,
							   types[0], 
							   dlp_p, dlsz_p,
							   dldepth_p,
							   flag);

		(*dlp_p)->el_extent = aints[1]; /* extent */
	    }
	    break;
	case MPI_COMBINER_CONTIGUOUS:
	    PREPEND_PREFIX(Dataloop_create_contiguous)(ints[0] /* count */,
						       types[0] /* oldtype */,
						       dlp_p, dlsz_p,
						       dldepth_p,
						       flag);
	    break;
	case MPI_COMBINER_VECTOR:
	    PREPEND_PREFIX(Dataloop_create_vector)(ints[0] /* count */,
						   ints[1] /* blklen */,
						   ints[2] /* stride */,
						   0 /* stride not bytes */,
						   types[0] /* oldtype */,
						   dlp_p, dlsz_p, dldepth_p,
						   flag);
	    break;
	case MPI_COMBINER_HVECTOR_INTEGER:
	case MPI_COMBINER_HVECTOR:
	    /* fortran hvector has integer stride in bytes */
	    if (combiner == MPI_COMBINER_HVECTOR_INTEGER) {
		stride = (MPI_Aint) ints[2];
	    }
	    else {
		stride = aints[0];
	    }

	    PREPEND_PREFIX(Dataloop_create_vector)(ints[0] /* count */,
						   ints[1] /* blklen */,
						   stride,
						   1 /* stride in bytes */,
						   types[0] /* oldtype */,
						   dlp_p, dlsz_p, dldepth_p,
						   flag);
	    break;
	case MPI_COMBINER_INDEXED_BLOCK:
	    PREPEND_PREFIX(Dataloop_create_blockindexed)(ints[0] /* count */,
							 ints[1] /* blklen */,
							 &ints[2] /* disps */,
							 0 /* disp not bytes */,
							 types[0] /* oldtype */,
							 dlp_p, dlsz_p,
							 dldepth_p,
							 flag);
	    break;
	case MPI_COMBINER_HINDEXED_BLOCK:
            disps = (MPI_Aint *) DLOOP_Malloc(ints[0] * sizeof(MPI_Aint));
            for (i = 0; i < ints[0]; i++)
                disps[i] = aints[i];
	    PREPEND_PREFIX(Dataloop_create_blockindexed)(ints[0] /* count */,
							 ints[1] /* blklen */,
							 disps /* disps */,
							 1 /* disp is bytes */,
							 types[0] /* oldtype */,
							 dlp_p, dlsz_p,
							 dldepth_p,
							 flag);
            DLOOP_Free(disps);
	    break;
	case MPI_COMBINER_INDEXED:
	    PREPEND_PREFIX(Dataloop_create_indexed)(ints[0] /* count */,
						    &ints[1] /* blklens */,
						    &ints[ints[0]+1] /* disp */,
						    0 /* disp not in bytes */,
						    types[0] /* oldtype */,
						    dlp_p, dlsz_p, dldepth_p,
						    flag);
	    break;
	case MPI_COMBINER_HINDEXED_INTEGER:
	case MPI_COMBINER_HINDEXED:
	    if (combiner == MPI_COMBINER_HINDEXED_INTEGER) {
		disps = (MPI_Aint *) DLOOP_Malloc(ints[0] * sizeof(MPI_Aint));

		for (i=0; i < ints[0]; i++) {
		    disps[i] = (MPI_Aint) ints[ints[0] + 1 + i];
		}
	    }
	    else {
		disps = aints;
	    }

	    PREPEND_PREFIX(Dataloop_create_indexed)(ints[0] /* count */,
						    &ints[1] /* blklens */,
						    disps,
						    1 /* disp in bytes */,
						    types[0] /* oldtype */,
						    dlp_p, dlsz_p, dldepth_p,
						    flag);

	    if (combiner == MPI_COMBINER_HINDEXED_INTEGER) {
		DLOOP_Free(disps);
	    }

	    break;
	case MPI_COMBINER_STRUCT_INTEGER:
	case MPI_COMBINER_STRUCT:
	    for (i = 1; i < ints[0]; i++) {
		int type_combiner;
		MPIR_Type_get_envelope_impl(types[i], &dummy1, &dummy2, &dummy3, &type_combiner);

		if (type_combiner != MPI_COMBINER_NAMED) {
		    DLOOP_Handle_get_loopptr_macro(types[i], old_dlp, flag);
		    if (old_dlp == NULL)
		    {
			PREPEND_PREFIX(Dataloop_create)(types[i],
							&old_dlp,
							&old_dlsz,
							&old_dldepth,
							flag);
			
			DLOOP_Handle_set_loopptr_macro(types[i], old_dlp,
						       flag);
			DLOOP_Handle_set_loopsize_macro(types[i], old_dlsz,
							flag);
			DLOOP_Handle_set_loopdepth_macro(types[i], old_dldepth,
							 flag);
		    }
		}
	    }
	    if (combiner == MPI_COMBINER_STRUCT_INTEGER) {
		disps = (MPI_Aint *) DLOOP_Malloc(ints[0] * sizeof(MPI_Aint));

		for (i=0; i < ints[0]; i++) {
		    disps[i] = (MPI_Aint) ints[ints[0] + 1 + i];
		}
	    }
	    else {
		disps = aints;
	    }

            err = PREPEND_PREFIX(Dataloop_create_struct)(ints[0] /* count */,
                                                         &ints[1] /* blklens */,
                                                         disps,
                                                         types /* oldtype array */,
                                                         dlp_p, dlsz_p, dldepth_p,
                                                         flag);
            /* TODO if/when this function returns error codes, propagate this failure instead */
            DLOOP_Assert(0 == err);
            /* if (err) return err; */

	    if (combiner == MPI_COMBINER_STRUCT_INTEGER) {
		DLOOP_Free(disps);
	    }
	    break;
	case MPI_COMBINER_SUBARRAY:
	    ndims = ints[0];
	    PREPEND_PREFIX(Type_convert_subarray)(ndims,
						  &ints[1] /* sizes */,
						  &ints[1+ndims] /* subsizes */,
						  &ints[1+2*ndims] /* starts */,
						  ints[1+3*ndims] /* order */,
						  types[0],
						  &tmptype);

	    PREPEND_PREFIX(Dataloop_create)(tmptype,
					    dlp_p,
					    dlsz_p,
					    dldepth_p,
					    flag);
	    
	    MPIR_Type_free_impl(&tmptype);
	    break;
	case MPI_COMBINER_DARRAY:
	    ndims = ints[2];
	    PREPEND_PREFIX(Type_convert_darray)(ints[0] /* size */,
						ints[1] /* rank */,
						ndims,
						&ints[3] /* gsizes */,
						&ints[3+ndims] /*distribs */,
						&ints[3+2*ndims] /* dargs */,
						&ints[3+3*ndims] /* psizes */,
						ints[3+4*ndims] /* order */,
						types[0],
						&tmptype);

	    PREPEND_PREFIX(Dataloop_create)(tmptype,
					    dlp_p,
					    dlsz_p,
					    dldepth_p,
					    flag);

	    MPIR_Type_free_impl(&tmptype);
	    break;
	default:
	    DLOOP_Assert(0);
	    break;
    }

 clean_exit:

    PREPEND_PREFIX(Type_release_contents)(type, &ints, &aints, &types);

    /* for now we just leave the intermediate dataloops in place.
     * could remove them to save space if we wanted.
     */

    return;
}
Exemple #8
0
int MPIR_Type_convert_darray(int size,
                             int rank,
                             int ndims,
                             int *array_of_gsizes,
                             int *array_of_distribs,
                             int *array_of_dargs,
                             int *array_of_psizes,
                             int order, MPI_Datatype oldtype, MPI_Datatype * newtype)
{
    int mpi_errno = MPI_SUCCESS;
    MPI_Datatype type_old, type_new = MPI_DATATYPE_NULL, types[3];
    int procs, tmp_rank, i, tmp_size, blklens[3], *coords;
    MPI_Aint *st_offsets, orig_extent, disps[3];

    MPIR_Datatype_get_extent_macro(oldtype, orig_extent);

/* calculate position in Cartesian grid as MPI would (row-major
   ordering) */
    coords = (int *) DLOOP_Malloc(ndims * sizeof(int), MPL_MEM_DATATYPE);
    procs = size;
    tmp_rank = rank;
    for (i = 0; i < ndims; i++) {
        procs = procs / array_of_psizes[i];
        coords[i] = tmp_rank / procs;
        tmp_rank = tmp_rank % procs;
    }

    st_offsets = (MPI_Aint *) DLOOP_Malloc(ndims * sizeof(MPI_Aint), MPL_MEM_DATATYPE);
    type_old = oldtype;

    if (order == MPI_ORDER_FORTRAN) {
        /* dimension 0 changes fastest */
        for (i = 0; i < ndims; i++) {
            switch (array_of_distribs[i]) {
                case MPI_DISTRIBUTE_BLOCK:
                    mpi_errno = MPII_Type_block(array_of_gsizes, i, ndims,
                                                array_of_psizes[i],
                                                coords[i], array_of_dargs[i],
                                                order, orig_extent,
                                                type_old, &type_new, st_offsets + i);
                    if (mpi_errno)
                        MPIR_ERR_POP(mpi_errno);
                    break;
                case MPI_DISTRIBUTE_CYCLIC:
                    mpi_errno = MPII_Type_cyclic(array_of_gsizes, i, ndims,
                                                 array_of_psizes[i], coords[i],
                                                 array_of_dargs[i], order,
                                                 orig_extent, type_old, &type_new, st_offsets + i);
                    if (mpi_errno)
                        MPIR_ERR_POP(mpi_errno);
                    break;
                case MPI_DISTRIBUTE_NONE:
                    /* treat it as a block distribution on 1 process */
                    mpi_errno = MPII_Type_block(array_of_gsizes, i, ndims, 1, 0,
                                                MPI_DISTRIBUTE_DFLT_DARG, order,
                                                orig_extent, type_old, &type_new, st_offsets + i);
                    if (mpi_errno)
                        MPIR_ERR_POP(mpi_errno);
                    break;
            }
            if (i)
                MPIR_Type_free_impl(&type_old);
            type_old = type_new;
        }

        /* add displacement and UB */
        disps[1] = st_offsets[0];
        tmp_size = 1;
        for (i = 1; i < ndims; i++) {
            tmp_size *= array_of_gsizes[i - 1];
            disps[1] += ((MPI_Aint) tmp_size) * st_offsets[i];
        }
        /* rest done below for both Fortran and C order */
    }

    else {      /* order == MPI_ORDER_C */

        /* dimension ndims-1 changes fastest */
        for (i = ndims - 1; i >= 0; i--) {
            switch (array_of_distribs[i]) {
                case MPI_DISTRIBUTE_BLOCK:
                    mpi_errno = MPII_Type_block(array_of_gsizes, i, ndims, array_of_psizes[i],
                                                coords[i], array_of_dargs[i], order,
                                                orig_extent, type_old, &type_new, st_offsets + i);
                    if (mpi_errno)
                        MPIR_ERR_POP(mpi_errno);
                    break;
                case MPI_DISTRIBUTE_CYCLIC:
                    mpi_errno = MPII_Type_cyclic(array_of_gsizes, i, ndims,
                                                 array_of_psizes[i], coords[i],
                                                 array_of_dargs[i], order,
                                                 orig_extent, type_old, &type_new, st_offsets + i);
                    if (mpi_errno)
                        MPIR_ERR_POP(mpi_errno);
                    break;
                case MPI_DISTRIBUTE_NONE:
                    /* treat it as a block distribution on 1 process */
                    mpi_errno = MPII_Type_block(array_of_gsizes, i, ndims, array_of_psizes[i],
                                                coords[i], MPI_DISTRIBUTE_DFLT_DARG, order,
                                                orig_extent, type_old, &type_new, st_offsets + i);
                    if (mpi_errno)
                        MPIR_ERR_POP(mpi_errno);
                    break;
            }
            if (i != ndims - 1)
                MPIR_Type_free_impl(&type_old);
            type_old = type_new;
        }

        /* add displacement and UB */
        disps[1] = st_offsets[ndims - 1];
        tmp_size = 1;
        for (i = ndims - 2; i >= 0; i--) {
            tmp_size *= array_of_gsizes[i + 1];
            disps[1] += ((MPI_Aint) tmp_size) * st_offsets[i];
        }
    }

    disps[1] *= orig_extent;

    disps[2] = orig_extent;
    for (i = 0; i < ndims; i++)
        disps[2] *= (MPI_Aint) (array_of_gsizes[i]);

    disps[0] = 0;
    blklens[0] = blklens[1] = blklens[2] = 1;
    types[0] = MPI_LB;
    types[1] = type_new;
    types[2] = MPI_UB;

    DLOOP_Free(st_offsets);
    DLOOP_Free(coords);

    mpi_errno = MPIR_Type_struct_impl(3, blklens, disps, types, newtype);
    if (mpi_errno)
        MPIR_ERR_POP(mpi_errno);

    MPIR_Type_free_impl(&type_new);


  fn_exit:
    return mpi_errno;
  fn_fail:
    goto fn_exit;
}