int MPIDU_Type_get_envelope(MPI_Datatype datatype,
			   int *num_integers,
			   int *num_addresses,
			   int *num_datatypes,
			   int *combiner)
{
    if (HANDLE_GET_KIND(datatype) == HANDLE_KIND_BUILTIN ||
	datatype == MPI_FLOAT_INT ||
	datatype == MPI_DOUBLE_INT ||
	datatype == MPI_LONG_INT ||
	datatype == MPI_SHORT_INT ||
	datatype == MPI_LONG_DOUBLE_INT)
    {
	*combiner      = MPI_COMBINER_NAMED;
	*num_integers  = 0;
	*num_addresses = 0;
	*num_datatypes = 0;
    }
    else {
	MPIDU_Datatype *dtp;

	MPIDU_Datatype_get_ptr(datatype, dtp);

	*combiner      = dtp->contents->combiner;
	*num_integers  = dtp->contents->nr_ints;
	*num_addresses = dtp->contents->nr_aints;
	*num_datatypes = dtp->contents->nr_types;
    }

    return MPI_SUCCESS;
}
Ejemplo n.º 2
0
void MPIDI_Datatype_printf(MPI_Datatype type,
			   int depth,
			   MPI_Aint displacement,
			   int blocklength,
			   int header)
{
#ifdef MPL_USE_DBG_LOGGING
    char *string;
    MPI_Aint size;
    MPI_Aint extent, true_lb, true_ub, lb, ub, sticky_lb, sticky_ub;

    if (HANDLE_GET_KIND(type) == HANDLE_KIND_BUILTIN) {
	string = MPIDU_Datatype_builtin_to_string(type);
	if (type == MPI_LB) sticky_lb = 1;
	else sticky_lb = 0;
	if (type == MPI_UB) sticky_ub = 1;
	else sticky_ub = 0;
    }
    else {
	MPIDU_Datatype *type_ptr;

	MPIDU_Datatype_get_ptr(type, type_ptr);
	string = MPIDU_Datatype_combiner_to_string(type_ptr->contents->combiner);
	sticky_lb = type_ptr->has_sticky_lb;
	sticky_ub = type_ptr->has_sticky_ub;
    }

    MPIDU_Datatype_get_size_macro(type, size);
    MPIR_Type_get_true_extent_impl(type, &true_lb, &extent);
    true_ub = extent + true_lb;
    MPIR_Type_get_extent_impl(type, &lb, &extent);
    ub = extent + lb;

    if (header == 1) {
	/*               012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 */
	MPL_DBG_OUT(MPIR_DBG_DATATYPE,"------------------------------------------------------------------------------------------------------------------------------------------\n");
	MPL_DBG_OUT(MPIR_DBG_DATATYPE,"depth                   type         size       extent      true_lb      true_ub           lb(s)           ub(s)         disp       blklen\n");
	MPL_DBG_OUT(MPIR_DBG_DATATYPE,"------------------------------------------------------------------------------------------------------------------------------------------\n");
    }
    MPL_DBG_OUT_FMT(MPIR_DBG_DATATYPE,(MPL_DBG_FDEST,"%5d  %21s  %11d  " MPI_AINT_FMT_DEC_SPEC "  " MPI_AINT_FMT_DEC_SPEC "  " MPI_AINT_FMT_DEC_SPEC "  " MPI_AINT_FMT_DEC_SPEC "(" MPI_AINT_FMT_DEC_SPEC ")  " MPI_AINT_FMT_DEC_SPEC "(" MPI_AINT_FMT_DEC_SPEC ")  " MPI_AINT_FMT_DEC_SPEC "  %11d",
		    depth,
		    string,
		    (int) size,
		    (MPI_Aint) extent,
		    (MPI_Aint) true_lb,
		    (MPI_Aint) true_ub,
		    (MPI_Aint) lb,
		    (MPI_Aint) sticky_lb,
		    (MPI_Aint) ub,
		    (MPI_Aint) sticky_ub,
		    (MPI_Aint) displacement,
		    (int) blocklength));
#endif
    return;
}
Ejemplo n.º 3
0
/*
 * You must configure MPICH2 with the logging option enabled (--enable-g=log)
 * for these routines to print - in which case, they use the same options
 * as the logging code, including print to file and control by class (DATATYPE)
 */
void MPIDU_Datatype_debug(MPI_Datatype type,
			  int array_ct)
{
    int is_builtin;
    MPIDU_Datatype *dtp ATTRIBUTE((unused));

    is_builtin = (HANDLE_GET_KIND(type) == HANDLE_KIND_BUILTIN);

    /* can get a NULL type a number of different ways, including not having
     * fortran support included.
     */
    if (type == MPI_DATATYPE_NULL) {
	MPL_DBG_OUT_FMT(MPIR_DBG_DATATYPE,
			 (MPL_DBG_FDEST,
			  "# MPIU_Datatype_debug: MPI_Datatype = MPI_DATATYPE_NULL"));
	return;
    }

    MPL_DBG_OUT_FMT(MPIR_DBG_DATATYPE,(MPL_DBG_FDEST,
	       "# MPIU_Datatype_debug: MPI_Datatype = 0x%0x (%s)", type,
	       (is_builtin) ? MPIDU_Datatype_builtin_to_string(type) :
	        "derived"));

    if (is_builtin) return;

    MPIDU_Datatype_get_ptr(type, dtp);
    MPIR_Assert(dtp != NULL);

    MPL_DBG_OUT_FMT(MPIR_DBG_DATATYPE,(MPL_DBG_FDEST,
      "# Size = " MPI_AINT_FMT_DEC_SPEC ", Extent = " MPI_AINT_FMT_DEC_SPEC ", LB = " MPI_AINT_FMT_DEC_SPEC "%s, UB = " MPI_AINT_FMT_DEC_SPEC "%s, Extent = " MPI_AINT_FMT_DEC_SPEC ", Element Size = " MPI_AINT_FMT_DEC_SPEC " (%s), %s",
		    (MPI_Aint) dtp->size,
		    (MPI_Aint) dtp->extent,
		    (MPI_Aint) dtp->lb,
		    (dtp->has_sticky_lb) ? "(sticky)" : "",
		    (MPI_Aint) dtp->ub,
		    (dtp->has_sticky_ub) ? "(sticky)" : "",
		    (MPI_Aint) dtp->extent,
		    (MPI_Aint) dtp->builtin_element_size,
		    dtp->builtin_element_size == -1 ? "multiple types" :
		    MPIDU_Datatype_builtin_to_string(dtp->basic_type),
		    dtp->is_contig ? "is N contig" : "is not N contig"));

    MPL_DBG_OUT(MPIR_DBG_DATATYPE,"# Contents:");
    MPIDI_Datatype_contents_printf(type, 0, array_ct);

    MPL_DBG_OUT(MPIR_DBG_DATATYPE,"# Dataloop:");
    MPIDI_Datatype_dot_printf(type, 0, 1);
}
Ejemplo n.º 4
0
/* --BEGIN ERROR HANDLING-- */
void MPIDI_Datatype_dot_printf(MPI_Datatype type,
			       int depth,
			       int header)
{
    if (HANDLE_GET_KIND(type) == HANDLE_KIND_BUILTIN) {
	MPL_DBG_OUT(MPIR_DBG_DATATYPE,
			 "MPIDI_Datatype_dot_printf: type is a basic");
	return;
    }
    else {
	MPIDU_Datatype *dt_p;
	MPIDU_Dataloop *loop_p;

	MPIDU_Datatype_get_ptr(type, dt_p);
	loop_p = dt_p->dataloop;

	MPIDI_Dataloop_dot_printf(loop_p, depth, header);
	return;
    }
}
void MPIDU_Datatype_free_contents(MPIDU_Datatype *dtp)
{
    int i, struct_sz = sizeof(MPIDU_Datatype_contents);
    int align_sz = 8, epsilon;
    MPIDU_Datatype *old_dtp;
    MPI_Datatype *array_of_types;

    if ((epsilon = struct_sz % align_sz)) {
	struct_sz += align_sz - epsilon;
    }

    /* note: relies on types being first after structure */
    array_of_types = (MPI_Datatype *) ((char *)dtp->contents + struct_sz);

    for (i=0; i < dtp->contents->nr_types; i++) {
	if (HANDLE_GET_KIND(array_of_types[i]) != HANDLE_KIND_BUILTIN) {
	    MPIDU_Datatype_get_ptr(array_of_types[i], old_dtp);
	    MPIDU_Datatype_release(old_dtp);
	}
    }

    MPL_free(dtp->contents);
    dtp->contents = NULL;
}
Ejemplo n.º 6
0
/*@
  MPIDU_Type_vector - create a vector datatype

Input Parameters:
+ count - number of blocks in vector
. blocklength - number of elements in each block
. stride - distance from beginning of one block to the next (see next
  parameter for units)
. strideinbytes - if nonzero, then stride is in bytes, otherwise stride
  is in terms of extent of oldtype
- oldtype - type (using handle) of datatype on which vector is based

Output Parameters:
. newtype - handle of new vector datatype

  Return Value:
  0 on success, MPI error code on failure.
@*/
int MPIDU_Type_vector(int count,
		     int blocklength,
		     MPI_Aint stride,
		     int strideinbytes,
		     MPI_Datatype oldtype,
		     MPI_Datatype *newtype)
{
    int mpi_errno = MPI_SUCCESS;
    int is_builtin, old_is_contig;
    MPI_Aint el_sz, old_sz;
    MPI_Datatype el_type;
    MPI_Aint old_lb, old_ub, old_extent, old_true_lb, old_true_ub, eff_stride;

    MPIDU_Datatype *new_dtp;

    if (count == 0) return MPIDU_Type_zerolen(newtype);

    /* allocate new datatype object and handle */
    new_dtp = (MPIDU_Datatype *) MPIR_Handle_obj_alloc(&MPIDU_Datatype_mem);
    if (!new_dtp) {
	/* --BEGIN ERROR HANDLING-- */
	mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
					 "MPIDU_Type_vector", __LINE__,
					 MPI_ERR_OTHER, "**nomem", 0);
	return mpi_errno;
	/* --END ERROR HANDLING-- */
    }

    /* handle is filled in by MPIR_Handle_obj_alloc() */
    MPIR_Object_set_ref(new_dtp, 1);
    new_dtp->is_permanent = 0;
    new_dtp->is_committed = 0;
    new_dtp->attributes   = NULL;
    new_dtp->cache_id     = 0;
    new_dtp->name[0]      = 0;
    new_dtp->contents     = NULL;

    new_dtp->dataloop       = NULL;
    new_dtp->dataloop_size  = -1;
    new_dtp->dataloop_depth = -1;
    new_dtp->hetero_dloop       = NULL;
    new_dtp->hetero_dloop_size  = -1;
    new_dtp->hetero_dloop_depth = -1;

    is_builtin = (HANDLE_GET_KIND(oldtype) == HANDLE_KIND_BUILTIN);

    if (is_builtin) {
	el_sz   = (MPI_Aint) MPIDU_Datatype_get_basic_size(oldtype);
	el_type = oldtype;

	old_lb        = 0;
	old_true_lb   = 0;
	old_ub        = el_sz;
	old_true_ub   = el_sz;
	old_sz        = el_sz;
	old_extent    = el_sz;
	old_is_contig = 1;

	new_dtp->size           = (MPI_Aint) count *
	                          (MPI_Aint) blocklength * el_sz;
	new_dtp->has_sticky_lb  = 0;
	new_dtp->has_sticky_ub  = 0;

	new_dtp->alignsize    = el_sz; /* ??? */
	new_dtp->n_builtin_elements   = count * blocklength;
	new_dtp->builtin_element_size = el_sz;
	new_dtp->basic_type       = el_type;

	new_dtp->max_contig_blocks = count;

	eff_stride = (strideinbytes) ? stride : (stride * el_sz);
    }
    else /* user-defined base type (oldtype) */ {
	MPIDU_Datatype *old_dtp;

	MPIDU_Datatype_get_ptr(oldtype, old_dtp);
	el_sz   = old_dtp->builtin_element_size;
	el_type = old_dtp->basic_type;

	old_lb        = old_dtp->lb;
	old_true_lb   = old_dtp->true_lb;
	old_ub        = old_dtp->ub;
	old_true_ub   = old_dtp->true_ub;
	old_sz        = old_dtp->size;
	old_extent    = old_dtp->extent;
	old_is_contig = old_dtp->is_contig;

	new_dtp->size           = count * blocklength * old_dtp->size;
	new_dtp->has_sticky_lb  = old_dtp->has_sticky_lb;
	new_dtp->has_sticky_ub  = old_dtp->has_sticky_ub;

	new_dtp->alignsize    = old_dtp->alignsize;
	new_dtp->n_builtin_elements   = count * blocklength * old_dtp->n_builtin_elements;
	new_dtp->builtin_element_size = el_sz;
	new_dtp->basic_type       = el_type;

	new_dtp->max_contig_blocks = old_dtp->max_contig_blocks * count * blocklength;

	eff_stride = (strideinbytes) ? stride : (stride * old_dtp->extent);
    }

    MPIDU_DATATYPE_VECTOR_LB_UB((MPI_Aint) count,
			       eff_stride,
			       (MPI_Aint) blocklength,
			       old_lb,
			       old_ub,
			       old_extent,
			       new_dtp->lb,
			       new_dtp->ub);
    new_dtp->true_lb = new_dtp->lb + (old_true_lb - old_lb);
    new_dtp->true_ub = new_dtp->ub + (old_true_ub - old_ub);
    new_dtp->extent  = new_dtp->ub - new_dtp->lb;

    /* new type is only contig for N types if old one was, and
     * size and extent of new type are equivalent, and stride is
     * equal to blocklength * size of old type.
     */
    if ((MPI_Aint)(new_dtp->size) == new_dtp->extent &&
	eff_stride == (MPI_Aint) blocklength * old_sz &&
	old_is_contig)
    {
	new_dtp->is_contig = 1;
        new_dtp->max_contig_blocks = 1;
    }
    else {
	new_dtp->is_contig = 0;
    }

    *newtype = new_dtp->handle;

    MPL_DBG_MSG_P(MPIR_DBG_DATATYPE,VERBOSE,"vector type %x created.",
		   new_dtp->handle);

    return mpi_errno;
}
/*@
  MPIDU_Datatype_set_contents - store contents information for use in
                               MPI_Type_get_contents.

  Returns MPI_SUCCESS on success, MPI error code on error.
@*/
int MPIDU_Datatype_set_contents(MPIDU_Datatype *new_dtp,
			       int combiner,
			       int nr_ints,
			       int nr_aints,
			       int nr_types,
			       int array_of_ints[],
			       const MPI_Aint array_of_aints[],
			       const MPI_Datatype array_of_types[])
{
    int i, contents_size, align_sz = 8, epsilon, mpi_errno;
    int struct_sz, ints_sz, aints_sz, types_sz;
    MPIDU_Datatype_contents *cp;
    MPIDU_Datatype *old_dtp;
    char *ptr;

#ifdef HAVE_MAX_STRUCT_ALIGNMENT
    if (align_sz > HAVE_MAX_STRUCT_ALIGNMENT) {
	align_sz = HAVE_MAX_STRUCT_ALIGNMENT;
    }
#endif

    struct_sz = sizeof(MPIDU_Datatype_contents);
    types_sz  = nr_types * sizeof(MPI_Datatype);
    ints_sz   = nr_ints * sizeof(int);
    aints_sz  = nr_aints * sizeof(MPI_Aint);

    /* pad the struct, types, and ints before we allocate.
     *
     * note: it's not necessary that we pad the aints,
     *       because they are last in the region.
     */
    if ((epsilon = struct_sz % align_sz)) {
	struct_sz += align_sz - epsilon;
    }
    if ((epsilon = types_sz % align_sz)) {
	types_sz += align_sz - epsilon;
    }
    if ((epsilon = ints_sz % align_sz)) {
	ints_sz += align_sz - epsilon;
    }

    contents_size = struct_sz + types_sz + ints_sz + aints_sz;

    cp = (MPIDU_Datatype_contents *) MPL_malloc(contents_size);
    /* --BEGIN ERROR HANDLING-- */
    if (cp == NULL) {
	mpi_errno = MPIR_Err_create_code(MPI_SUCCESS,
					 MPIR_ERR_RECOVERABLE,
					 "MPIDU_Datatype_set_contents",
					 __LINE__,
					 MPI_ERR_OTHER,
					 "**nomem",
					 0);
	return mpi_errno;
    }
    /* --END ERROR HANDLING-- */

    cp->combiner = combiner;
    cp->nr_ints  = nr_ints;
    cp->nr_aints = nr_aints;
    cp->nr_types = nr_types;

    /* arrays are stored in the following order: types, ints, aints,
     * following the structure itself.
     */
    ptr = ((char *) cp) + struct_sz;
    /* Fortran90 combiner types do not have a "base" type */
    if (nr_types > 0) {
	MPIR_Memcpy(ptr, array_of_types, nr_types * sizeof(MPI_Datatype));
    }
    
    ptr = ((char *) cp) + struct_sz + types_sz;
    if (nr_ints > 0) {
	MPIR_Memcpy(ptr, array_of_ints, nr_ints * sizeof(int));
    }

    ptr = ((char *) cp) + struct_sz + types_sz + ints_sz;
    if (nr_aints > 0) {
	MPIR_Memcpy(ptr, array_of_aints, nr_aints * sizeof(MPI_Aint));
    }
    new_dtp->contents = cp;

    /* increment reference counts on all the derived types used here */
    for (i=0; i < nr_types; i++) {
	if (HANDLE_GET_KIND(array_of_types[i]) != HANDLE_KIND_BUILTIN) {
	    MPIDU_Datatype_get_ptr(array_of_types[i], old_dtp);
	    MPIDU_Datatype_add_ref(old_dtp);
	}
    }

    return MPI_SUCCESS;
}
Ejemplo n.º 8
0
int MPIDU_Type_indexed(int count,
		      const int *blocklength_array,
		      const void *displacement_array,
		      int dispinbytes,
		      MPI_Datatype oldtype,
		      MPI_Datatype *newtype)
{
    int mpi_errno = MPI_SUCCESS;
    int is_builtin, old_is_contig;
    int i;
    MPI_Aint contig_count;
    MPI_Aint el_sz, el_ct, old_ct, old_sz;
    MPI_Aint old_lb, old_ub, old_extent, old_true_lb, old_true_ub;
    MPI_Aint min_lb = 0, max_ub = 0, eff_disp;
    MPI_Datatype el_type;

    MPIDU_Datatype *new_dtp;

    if (count == 0) return MPIDU_Type_zerolen(newtype);

    /* sanity check that blocklens are all non-negative */
    for (i = 0; i < count; ++i) {
        DLOOP_Assert(blocklength_array[i] >= 0);
    }

    /* allocate new datatype object and handle */
    new_dtp = (MPIDU_Datatype *) MPIR_Handle_obj_alloc(&MPIDU_Datatype_mem);
    /* --BEGIN ERROR HANDLING-- */
    if (!new_dtp)
    {
	mpi_errno = MPIR_Err_create_code(MPI_SUCCESS,
					 MPIR_ERR_RECOVERABLE,
					 "MPIDU_Type_indexed",
					 __LINE__,
					 MPI_ERR_OTHER,
					 "**nomem",
					 0);
	return mpi_errno;
    }
    /* --END ERROR HANDLING-- */

    /* handle is filled in by MPIR_Handle_obj_alloc() */
    MPIR_Object_set_ref(new_dtp, 1);
    new_dtp->is_permanent = 0;
    new_dtp->is_committed = 0;
    new_dtp->attributes   = NULL;
    new_dtp->cache_id     = 0;
    new_dtp->name[0]      = 0;
    new_dtp->contents     = NULL;

    new_dtp->dataloop       = NULL;
    new_dtp->dataloop_size  = -1;
    new_dtp->dataloop_depth = -1;
    new_dtp->hetero_dloop       = NULL;
    new_dtp->hetero_dloop_size  = -1;
    new_dtp->hetero_dloop_depth = -1;

    is_builtin = (HANDLE_GET_KIND(oldtype) == HANDLE_KIND_BUILTIN);

    if (is_builtin)
    {
	/* builtins are handled differently than user-defined types because
	 * they have no associated dataloop or datatype structure.
	 */
	el_sz      = MPIDU_Datatype_get_basic_size(oldtype);
	old_sz     = el_sz;
	el_ct      = 1;
	el_type    = oldtype;

	old_lb        = 0;
	old_true_lb   = 0;
	old_ub        = (MPI_Aint) el_sz;
	old_true_ub   = (MPI_Aint) el_sz;
	old_extent    = (MPI_Aint) el_sz;
	old_is_contig = 1;

	new_dtp->has_sticky_ub = 0;
	new_dtp->has_sticky_lb = 0;

        MPIR_Assign_trunc(new_dtp->alignsize, el_sz, MPI_Aint);
	new_dtp->builtin_element_size = el_sz;
	new_dtp->basic_type       = el_type;

	new_dtp->max_contig_blocks = count;
    }
    else
    {
	/* user-defined base type (oldtype) */
	MPIDU_Datatype *old_dtp;

	MPIDU_Datatype_get_ptr(oldtype, old_dtp);

	/* Ensure that "builtin_element_size" fits into an int datatype. */
	MPIR_Ensure_Aint_fits_in_int(old_dtp->builtin_element_size);

	el_sz   = old_dtp->builtin_element_size;
	old_sz  = old_dtp->size;
	el_ct   = old_dtp->n_builtin_elements;
	el_type = old_dtp->basic_type;

	old_lb        = old_dtp->lb;
	old_true_lb   = old_dtp->true_lb;
	old_ub        = old_dtp->ub;
	old_true_ub   = old_dtp->true_ub;
	old_extent    = old_dtp->extent;
	old_is_contig = old_dtp->is_contig;

	new_dtp->has_sticky_lb = old_dtp->has_sticky_lb;
	new_dtp->has_sticky_ub = old_dtp->has_sticky_ub;
	new_dtp->builtin_element_size  = (MPI_Aint) el_sz;
	new_dtp->basic_type        = el_type;

        new_dtp->max_contig_blocks = 0;
        for(i=0; i<count; i++)
            new_dtp->max_contig_blocks 
                += old_dtp->max_contig_blocks
                    * ((MPI_Aint ) blocklength_array[i]);
    }

    /* find the first nonzero blocklength element */
    i = 0;
    while (i < count && blocklength_array[i] == 0) i++;

    if (i == count) {
	MPIR_Handle_obj_free(&MPIDU_Datatype_mem, new_dtp);
	return MPIDU_Type_zerolen(newtype);
    }

    /* priming for loop */
    old_ct = blocklength_array[i];
    eff_disp = (dispinbytes) ? ((MPI_Aint *) displacement_array)[i] :
	(((MPI_Aint) ((int *) displacement_array)[i]) * old_extent);

    MPIDU_DATATYPE_BLOCK_LB_UB((MPI_Aint) blocklength_array[i],
			      eff_disp,
			      old_lb,
			      old_ub,
			      old_extent,
			      min_lb,
			      max_ub);

    /* determine min lb, max ub, and count of old types in remaining
     * nonzero size blocks
     */
    for (i++; i < count; i++)
    {
	MPI_Aint tmp_lb, tmp_ub;
	
	if (blocklength_array[i] > 0) {
	    old_ct += blocklength_array[i]; /* add more oldtypes */
	
	    eff_disp = (dispinbytes) ? ((MPI_Aint *) displacement_array)[i] :
		(((MPI_Aint) ((int *) displacement_array)[i]) * old_extent);
	
	    /* calculate ub and lb for this block */
	    MPIDU_DATATYPE_BLOCK_LB_UB((MPI_Aint)(blocklength_array[i]),
				      eff_disp,
				      old_lb,
				      old_ub,
				      old_extent,
				      tmp_lb,
				      tmp_ub);
	
	    if (tmp_lb < min_lb) min_lb = tmp_lb;
	    if (tmp_ub > max_ub) max_ub = tmp_ub;
	}
    }

    new_dtp->size = old_ct * old_sz;

    new_dtp->lb      = min_lb;
    new_dtp->ub      = max_ub;
    new_dtp->true_lb = min_lb + (old_true_lb - old_lb);
    new_dtp->true_ub = max_ub + (old_true_ub - old_ub);
    new_dtp->extent  = max_ub - min_lb;

    new_dtp->n_builtin_elements = old_ct * el_ct;

    /* new type is only contig for N types if it's all one big
     * block, its size and extent are the same, and the old type
     * was also contiguous.
     */
    new_dtp->is_contig = 0;
    if(old_is_contig)
    {
	MPI_Aint *blklens = MPL_malloc(count *sizeof(MPI_Aint));
	for (i=0; i<count; i++)
		blklens[i] = blocklength_array[i];
        contig_count = MPIDU_Type_indexed_count_contig(count,
						  blklens,
						  displacement_array,
						  dispinbytes,
						  old_extent);
        new_dtp->max_contig_blocks = contig_count;
        if( (contig_count == 1) &&
            ((MPI_Aint) new_dtp->size == new_dtp->extent))
        {
            new_dtp->is_contig = 1;
        }
	MPL_free(blklens);
    }

    *newtype = new_dtp->handle;
    return mpi_errno;
}
Ejemplo n.º 9
0
void MPIDI_Datatype_contents_printf(MPI_Datatype type,
				    int depth,
				    int acount)
{
    int i;
    MPIDU_Datatype *dtp;
    MPIDU_Datatype_contents *cp;

    MPI_Aint *aints = NULL;
    MPI_Datatype *types = NULL;
    int *ints = NULL;

    if (HANDLE_GET_KIND(type) == HANDLE_KIND_BUILTIN) {
	MPL_DBG_OUT_FMT(MPIR_DBG_DATATYPE,(MPL_DBG_FDEST,"# %stype: %s\n",
			MPIDI_Datatype_depth_spacing(depth),
			MPIDU_Datatype_builtin_to_string(type)));
	return;
    }

    MPIDU_Datatype_get_ptr(type, dtp);
    cp = dtp->contents;

    if (cp == NULL) {
	MPL_DBG_OUT_FMT(MPIR_DBG_DATATYPE,(MPL_DBG_FDEST,"# <NULL>\n"));
	return;
    }

    if (cp->nr_ints > 0)
    {
      ints = (int *) MPL_malloc(cp->nr_ints * sizeof(int));
      MPIDI_Datatype_get_contents_ints(cp, ints);
    }

    if (cp->nr_aints > 0) {
      aints = (MPI_Aint *) MPL_malloc(cp->nr_aints * sizeof(MPI_Aint));
      MPIDI_Datatype_get_contents_aints(cp, aints);
    }

    if (cp->nr_types > 0) {
      types = (MPI_Datatype *) MPL_malloc(cp->nr_types * sizeof(MPI_Datatype));
      MPIDI_Datatype_get_contents_types(cp, types);
    }


    MPL_DBG_OUT_FMT(MPIR_DBG_DATATYPE,(MPL_DBG_FDEST,"# %scombiner: %s",
		    MPIDI_Datatype_depth_spacing(depth),
		    MPIDU_Datatype_combiner_to_string(cp->combiner)));

    switch (cp->combiner) {
	case MPI_COMBINER_NAMED:
	case MPI_COMBINER_DUP:
	    __mpidi_datatype_free_and_return;
	case MPI_COMBINER_RESIZED:
	    /* not done */
	    __mpidi_datatype_free_and_return;
	case MPI_COMBINER_CONTIGUOUS:
	    MPL_DBG_OUT_FMT(MPIR_DBG_DATATYPE,(MPL_DBG_FDEST,"# %scontig ct = %d\n",
			    MPIDI_Datatype_depth_spacing(depth),
				       *ints));
	    MPIDI_Datatype_contents_printf(*types,
					   depth + 1,
					   acount);
	    __mpidi_datatype_free_and_return;
	case MPI_COMBINER_VECTOR:
	    MPL_DBG_OUT_FMT(MPIR_DBG_DATATYPE,(MPL_DBG_FDEST,
	                "# %svector ct = %d, blk = %d, str = %d\n",
			MPIDI_Datatype_depth_spacing(depth),
			    ints[0],
			    ints[1],
			    ints[2]));
	    MPIDI_Datatype_contents_printf(*types,
					   depth + 1,
					   acount);
	    __mpidi_datatype_free_and_return;
        case MPI_COMBINER_HVECTOR:
	    MPL_DBG_OUT_FMT(MPIR_DBG_DATATYPE,(MPL_DBG_FDEST,
	                  "# %shvector ct = %d, blk = %d, str = " MPI_AINT_FMT_DEC_SPEC "\n",
			    MPIDI_Datatype_depth_spacing(depth),
			    ints[0],
			    ints[1],
			    (MPI_Aint) aints[0]));
	    MPIDI_Datatype_contents_printf(*types,
					   depth + 1,
					   acount);
	    __mpidi_datatype_free_and_return;
	case MPI_COMBINER_INDEXED:
	    MPL_DBG_OUT_FMT(MPIR_DBG_DATATYPE,(MPL_DBG_FDEST,"# %sindexed ct = %d:",
			    MPIDI_Datatype_depth_spacing(depth),
			    ints[0]));
	    for (i=0; i < acount && i < ints[0]; i++) {
		MPL_DBG_OUT_FMT(MPIR_DBG_DATATYPE,(MPL_DBG_FDEST,
		         "# %s  indexed [%d]: blk = %d, disp = %d\n",
				MPIDI_Datatype_depth_spacing(depth),
				i,
				ints[i+1],
				ints[i+(cp->nr_ints/2)+1]));
		MPIDI_Datatype_contents_printf(*types,
					       depth + 1,
					       acount);
	    }
	    __mpidi_datatype_free_and_return;
	case MPI_COMBINER_HINDEXED:
	    MPL_DBG_OUT_FMT(MPIR_DBG_DATATYPE,(MPL_DBG_FDEST,"# %shindexed ct = %d:",
			    MPIDI_Datatype_depth_spacing(depth),
			    ints[0]));
	    for (i=0; i < acount && i < ints[0]; i++) {
		MPL_DBG_OUT_FMT(MPIR_DBG_DATATYPE,(MPL_DBG_FDEST,
		            "# %s  hindexed [%d]: blk = %d, disp = " MPI_AINT_FMT_DEC_SPEC "\n",
				MPIDI_Datatype_depth_spacing(depth),
				i,
				(int) ints[i+1],
				(MPI_Aint) aints[i]));
		MPIDI_Datatype_contents_printf(*types,
					       depth + 1,
					       acount);
	    }
	    __mpidi_datatype_free_and_return;
	case MPI_COMBINER_STRUCT:
	    MPL_DBG_OUT_FMT(MPIR_DBG_DATATYPE,(MPL_DBG_FDEST,"# %sstruct ct = %d:",
			    MPIDI_Datatype_depth_spacing(depth),
			    (int) ints[0]));
	    for (i=0; i < acount && i < ints[0]; i++) {
		MPL_DBG_OUT_FMT(MPIR_DBG_DATATYPE,(MPL_DBG_FDEST,
		           "# %s  struct[%d]: blk = %d, disp = " MPI_AINT_FMT_DEC_SPEC "\n",
				MPIDI_Datatype_depth_spacing(depth),
				i,
				(int) ints[i+1],
				(MPI_Aint) aints[i]));
		MPIDI_Datatype_contents_printf(types[i],
					       depth + 1,
					       acount);
	    }
	    __mpidi_datatype_free_and_return;
	case MPI_COMBINER_SUBARRAY:
	    MPL_DBG_OUT_FMT(MPIR_DBG_DATATYPE, (MPL_DBG_FDEST,"# %ssubarray ct = %d:",
			MPIDI_Datatype_depth_spacing(depth),
			(int) ints[0]));
	    for (i=0; i< acount && i < ints[0]; i++) {
		MPL_DBG_OUT_FMT(MPIR_DBG_DATATYPE,(MPL_DBG_FDEST,
			    "# %s  sizes[%d] = %d subsizes[%d] = %d starts[%d] = %d\n",
			    MPIDI_Datatype_depth_spacing(depth),
			    i, (int)ints[i+1],
			    i, (int)ints[i+ ints[0]+1],
			    i, (int)ints[2*ints[0]+1]));
	    }
	    MPIDI_Datatype_contents_printf(*types,
		    depth + 1,
		    acount);
	    __mpidi_datatype_free_and_return;

	default:
	    MPL_DBG_OUT_FMT(MPIR_DBG_DATATYPE,(MPL_DBG_FDEST,"# %sunhandled combiner",
			MPIDI_Datatype_depth_spacing(depth)));
	    __mpidi_datatype_free_and_return;
    }
}
Ejemplo n.º 10
0
int MPIDI_CH3I_Get_accumulate(const void *origin_addr, int origin_count,
                              MPI_Datatype origin_datatype, void *result_addr, int result_count,
                              MPI_Datatype result_datatype, int target_rank, MPI_Aint target_disp,
                              int target_count, MPI_Datatype target_datatype, MPI_Op op,
                              MPIR_Win * win_ptr, MPIR_Request * ureq)
{
    int mpi_errno = MPI_SUCCESS;
    intptr_t orig_data_sz, target_data_sz;
    int rank;
    int dt_contig ATTRIBUTE((unused));
    MPI_Aint dt_true_lb ATTRIBUTE((unused));
    MPIDU_Datatype*dtp;
    MPIDI_VC_t *orig_vc = NULL, *target_vc = NULL;
    int made_progress = 0;
    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_GET_ACCUMULATE);

    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_MPIDI_CH3I_GET_ACCUMULATE);

    MPIR_ERR_CHKANDJUMP(win_ptr->states.access_state == MPIDI_RMA_NONE,
                        mpi_errno, MPI_ERR_RMA_SYNC, "**rmasync");

    if (target_rank == MPI_PROC_NULL) {
        goto fn_exit;
    }

    MPIDI_Datatype_get_info(target_count, target_datatype, dt_contig, target_data_sz, dtp,
                            dt_true_lb);

    if (target_data_sz == 0) {
        goto fn_exit;
    }

    rank = win_ptr->comm_ptr->rank;

    if (win_ptr->shm_allocated == TRUE && target_rank != rank &&
        win_ptr->create_flavor != MPI_WIN_FLAVOR_SHARED) {
        /* check if target is local and shared memory is allocated on window,
         * if so, we directly perform this operation on shared memory region. */

        /* FIXME: Here we decide whether to perform SHM operations by checking if origin and target are on
         * the same node. However, in ch3:sock, even if origin and target are on the same node, they do
         * not within the same SHM region. Here we filter out ch3:sock by checking shm_allocated flag first,
         * which is only set to TRUE when SHM region is allocated in nemesis.
         * In future we need to figure out a way to check if origin and target are in the same "SHM comm".
         */
        MPIDI_Comm_get_vc(win_ptr->comm_ptr, rank, &orig_vc);
        MPIDI_Comm_get_vc(win_ptr->comm_ptr, target_rank, &target_vc);
    }

    /* Do =! rank first (most likely branch?) */
    if (target_rank == rank || win_ptr->create_flavor == MPI_WIN_FLAVOR_SHARED ||
        (win_ptr->shm_allocated == TRUE && orig_vc->node_id == target_vc->node_id)) {
        mpi_errno = MPIDI_CH3I_Shm_get_acc_op(origin_addr, origin_count, origin_datatype,
                                              result_addr, result_count, result_datatype,
                                              target_rank, target_disp, target_count,
                                              target_datatype, op, win_ptr);
        if (mpi_errno)
            MPIR_ERR_POP(mpi_errno);

        if (ureq) {
            /* Complete user request and release the ch3 ref */
            mpi_errno = MPID_Request_complete(ureq);
            if (mpi_errno != MPI_SUCCESS) {
                MPIR_ERR_POP(mpi_errno);
            }
        }
    }
    else {
        MPIDI_RMA_Op_t *op_ptr = NULL;
        MPIDI_CH3_Pkt_get_accum_t *get_accum_pkt;
        MPI_Aint origin_type_size;
        MPI_Aint target_type_size;
        int use_immed_pkt = FALSE, i;
        int is_origin_contig, is_target_contig, is_result_contig;
        MPI_Aint stream_elem_count, stream_unit_count;
        MPI_Aint predefined_dtp_size, predefined_dtp_count, predefined_dtp_extent;
        MPIDU_Datatype*origin_dtp = NULL, *target_dtp = NULL, *result_dtp = NULL;
        int is_empty_origin = FALSE;

        /* Judge if origin buffer is empty */
        if (op == MPI_NO_OP)
            is_empty_origin = TRUE;

        /* Append the operation to the window's RMA ops queue */
        mpi_errno = MPIDI_CH3I_Win_get_op(win_ptr, &op_ptr);
        if (mpi_errno != MPI_SUCCESS)
            MPIR_ERR_POP(mpi_errno);

        /* TODO: Can we use the MPIDI_RMA_ACC_CONTIG optimization? */

        MPIR_T_PVAR_TIMER_START(RMA, rma_rmaqueue_set);

        /******************** Setting operation struct areas ***********************/

        op_ptr->origin_addr = (void *) origin_addr;
        op_ptr->origin_count = origin_count;
        op_ptr->origin_datatype = origin_datatype;
        op_ptr->result_addr = result_addr;
        op_ptr->result_count = result_count;
        op_ptr->result_datatype = result_datatype;
        op_ptr->target_rank = target_rank;

        /* Remember user request */
        op_ptr->ureq = ureq;

        /* if source or target datatypes are derived, increment their
         * reference counts */
        if (is_empty_origin == FALSE && !MPIR_DATATYPE_IS_PREDEFINED(origin_datatype)) {
            MPIDU_Datatype_get_ptr(origin_datatype, origin_dtp);
        }
        if (!MPIR_DATATYPE_IS_PREDEFINED(result_datatype)) {
            MPIDU_Datatype_get_ptr(result_datatype, result_dtp);
        }
        if (!MPIR_DATATYPE_IS_PREDEFINED(target_datatype)) {
            MPIDU_Datatype_get_ptr(target_datatype, target_dtp);
        }

        if (is_empty_origin == FALSE) {
            MPIDU_Datatype_get_size_macro(origin_datatype, origin_type_size);
            MPIR_Assign_trunc(orig_data_sz, origin_count * origin_type_size, intptr_t);
        }
        else {
            /* If origin buffer is empty, set origin data size to 0 */
            orig_data_sz = 0;
        }

        MPIDU_Datatype_get_size_macro(target_datatype, target_type_size);

        /* Get size and count for predefined datatype elements */
        if (MPIR_DATATYPE_IS_PREDEFINED(target_datatype)) {
            predefined_dtp_size = target_type_size;
            predefined_dtp_count = target_count;
            MPIDU_Datatype_get_extent_macro(target_datatype, predefined_dtp_extent);
        }
        else {
            MPIR_Assert(target_dtp->basic_type != MPI_DATATYPE_NULL);
            MPIDU_Datatype_get_size_macro(target_dtp->basic_type, predefined_dtp_size);
            predefined_dtp_count = target_data_sz / predefined_dtp_size;
            MPIDU_Datatype_get_extent_macro(target_dtp->basic_type, predefined_dtp_extent);
        }
        MPIR_Assert(predefined_dtp_count > 0 && predefined_dtp_size > 0 &&
                    predefined_dtp_extent > 0);

        /* Calculate number of predefined elements in each stream unit, and
         * total number of stream units. */
        stream_elem_count = MPIDI_CH3U_Acc_stream_size / predefined_dtp_extent;
        stream_unit_count = (predefined_dtp_count - 1) / stream_elem_count + 1;
        MPIR_Assert(stream_elem_count > 0 && stream_unit_count > 0);

        for (i = 0; i < stream_unit_count; i++) {
            if (origin_dtp != NULL) {
                MPIDU_Datatype_add_ref(origin_dtp);
            }
            if (target_dtp != NULL) {
                MPIDU_Datatype_add_ref(target_dtp);
            }
            if (result_dtp != NULL) {
                MPIDU_Datatype_add_ref(result_dtp);
            }
        }

        if (is_empty_origin == FALSE) {
            MPIDU_Datatype_is_contig(origin_datatype, &is_origin_contig);
        }
        else {
            /* If origin buffer is empty, mark origin data as contig data */
            is_origin_contig = 1;
        }
        MPIDU_Datatype_is_contig(target_datatype, &is_target_contig);
        MPIDU_Datatype_is_contig(result_datatype, &is_result_contig);

        /* Judge if we can use IMMED data packet */
        if ((is_empty_origin == TRUE || MPIR_DATATYPE_IS_PREDEFINED(origin_datatype)) &&
            MPIR_DATATYPE_IS_PREDEFINED(result_datatype) &&
            MPIR_DATATYPE_IS_PREDEFINED(target_datatype) &&
            is_origin_contig && is_target_contig && is_result_contig) {
            if (target_data_sz <= MPIDI_RMA_IMMED_BYTES)
                use_immed_pkt = TRUE;
        }

        /* Judge if this operation is a piggyback candidate */
        if ((is_empty_origin == TRUE || MPIR_DATATYPE_IS_PREDEFINED(origin_datatype)) &&
            MPIR_DATATYPE_IS_PREDEFINED(result_datatype) &&
            MPIR_DATATYPE_IS_PREDEFINED(target_datatype)) {
            /* FIXME: currently we only piggyback LOCK flag with op using predefined datatypes
             * for origin, target and result data. We should extend this optimization to derived
             * datatypes as well. */
            if (orig_data_sz <= MPIR_CVAR_CH3_RMA_OP_PIGGYBACK_LOCK_DATA_SIZE)
                op_ptr->piggyback_lock_candidate = 1;
        }

        /************** Setting packet struct areas in operation ****************/

        get_accum_pkt = &(op_ptr->pkt.get_accum);

        if (use_immed_pkt) {
            MPIDI_Pkt_init(get_accum_pkt, MPIDI_CH3_PKT_GET_ACCUM_IMMED);
        }
        else {
            MPIDI_Pkt_init(get_accum_pkt, MPIDI_CH3_PKT_GET_ACCUM);
        }

        get_accum_pkt->addr = (char *) win_ptr->basic_info_table[target_rank].base_addr +
            win_ptr->basic_info_table[target_rank].disp_unit * target_disp;
        get_accum_pkt->count = target_count;
        get_accum_pkt->datatype = target_datatype;
        get_accum_pkt->info.dataloop_size = 0;
        get_accum_pkt->op = op;
        get_accum_pkt->target_win_handle = win_ptr->basic_info_table[target_rank].win_handle;
        get_accum_pkt->flags = MPIDI_CH3_PKT_FLAG_NONE;
        if (use_immed_pkt) {
            void *src = (void *) origin_addr, *dest = (void *) (get_accum_pkt->info.data);
            mpi_errno = immed_copy(src, dest, orig_data_sz);
            if (mpi_errno != MPI_SUCCESS)
                MPIR_ERR_POP(mpi_errno);
        }

        MPIR_T_PVAR_TIMER_END(RMA, rma_rmaqueue_set);

        mpi_errno = MPIDI_CH3I_Win_enqueue_op(win_ptr, op_ptr);
        if (mpi_errno)
            MPIR_ERR_POP(mpi_errno);

        mpi_errno = MPIDI_CH3I_RMA_Make_progress_target(win_ptr, target_rank, &made_progress);
        if (mpi_errno != MPI_SUCCESS)
            MPIR_ERR_POP(mpi_errno);

        if (MPIR_CVAR_CH3_RMA_ACTIVE_REQ_THRESHOLD >= 0 &&
            MPIDI_CH3I_RMA_Active_req_cnt >= MPIR_CVAR_CH3_RMA_ACTIVE_REQ_THRESHOLD) {
            while (MPIDI_CH3I_RMA_Active_req_cnt >= MPIR_CVAR_CH3_RMA_ACTIVE_REQ_THRESHOLD) {
                mpi_errno = wait_progress_engine();
                if (mpi_errno != MPI_SUCCESS)
                    MPIR_ERR_POP(mpi_errno);
            }
        }
    }

  fn_exit:
    MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_MPIDI_CH3I_GET_ACCUMULATE);
    return mpi_errno;

    /* --BEGIN ERROR HANDLING-- */
  fn_fail:
    goto fn_exit;
    /* --END ERROR HANDLING-- */
}
Ejemplo n.º 11
0
int MPIDI_CH3I_Put(const void *origin_addr, int origin_count, MPI_Datatype
                   origin_datatype, int target_rank, MPI_Aint target_disp,
                   int target_count, MPI_Datatype target_datatype, MPIR_Win * win_ptr,
                   MPIR_Request * ureq)
{
    int mpi_errno = MPI_SUCCESS;
    int dt_contig ATTRIBUTE((unused)), rank;
    MPIDU_Datatype*dtp;
    MPI_Aint dt_true_lb ATTRIBUTE((unused));
    intptr_t data_sz;
    MPIDI_VC_t *orig_vc = NULL, *target_vc = NULL;
    int made_progress = 0;
    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_CH3I_PUT);

    MPIR_FUNC_VERBOSE_RMA_ENTER(MPID_STATE_MPIDI_CH3I_PUT);

    MPIR_ERR_CHKANDJUMP(win_ptr->states.access_state == MPIDI_RMA_NONE,
                        mpi_errno, MPI_ERR_RMA_SYNC, "**rmasync");

    if (target_rank == MPI_PROC_NULL) {
        goto fn_exit;
    }

    MPIDI_Datatype_get_info(origin_count, origin_datatype, dt_contig, data_sz, dtp, dt_true_lb);

    if (data_sz == 0) {
        goto fn_exit;
    }

    rank = win_ptr->comm_ptr->rank;

    if (win_ptr->shm_allocated == TRUE && target_rank != rank &&
        win_ptr->create_flavor != MPI_WIN_FLAVOR_SHARED) {
        /* check if target is local and shared memory is allocated on window,
         * if so, we directly perform this operation on shared memory region. */

        /* FIXME: Here we decide whether to perform SHM operations by checking if origin and target are on
         * the same node. However, in ch3:sock, even if origin and target are on the same node, they do
         * not within the same SHM region. Here we filter out ch3:sock by checking shm_allocated flag first,
         * which is only set to TRUE when SHM region is allocated in nemesis.
         * In future we need to figure out a way to check if origin and target are in the same "SHM comm".
         */
        MPIDI_Comm_get_vc(win_ptr->comm_ptr, rank, &orig_vc);
        MPIDI_Comm_get_vc(win_ptr->comm_ptr, target_rank, &target_vc);
    }

    /* If the put is a local operation, do it here */
    if (target_rank == rank || win_ptr->create_flavor == MPI_WIN_FLAVOR_SHARED ||
        (win_ptr->shm_allocated == TRUE && orig_vc->node_id == target_vc->node_id)) {
        mpi_errno = MPIDI_CH3I_Shm_put_op(origin_addr, origin_count, origin_datatype, target_rank,
                                          target_disp, target_count, target_datatype, win_ptr);
        if (mpi_errno)
            MPIR_ERR_POP(mpi_errno);

        if (ureq) {
            /* Complete user request and release the ch3 ref */
            mpi_errno = MPID_Request_complete(ureq);
            if (mpi_errno != MPI_SUCCESS) {
                MPIR_ERR_POP(mpi_errno);
            }
        }
    }
    else {
        MPIDI_RMA_Op_t *op_ptr = NULL;
        MPIDI_CH3_Pkt_put_t *put_pkt = NULL;
        int use_immed_pkt = FALSE;
        int is_origin_contig, is_target_contig;

        /* queue it up */
        mpi_errno = MPIDI_CH3I_Win_get_op(win_ptr, &op_ptr);
        if (mpi_errno != MPI_SUCCESS)
            MPIR_ERR_POP(mpi_errno);

        MPIR_T_PVAR_TIMER_START(RMA, rma_rmaqueue_set);

        /******************** Setting operation struct areas ***********************/

        /* FIXME: For contig and very short operations, use a streamlined op */
        op_ptr->origin_addr = (void *) origin_addr;
        op_ptr->origin_count = origin_count;
        op_ptr->origin_datatype = origin_datatype;
        op_ptr->target_rank = target_rank;

        /* Remember user request */
        op_ptr->ureq = ureq;

        /* if source or target datatypes are derived, increment their
         * reference counts */
        if (!MPIR_DATATYPE_IS_PREDEFINED(origin_datatype)) {
            MPIDU_Datatype_get_ptr(origin_datatype, dtp);
            MPIDU_Datatype_add_ref(dtp);
        }
        if (!MPIR_DATATYPE_IS_PREDEFINED(target_datatype)) {
            MPIDU_Datatype_get_ptr(target_datatype, dtp);
            MPIDU_Datatype_add_ref(dtp);
        }

        MPIDU_Datatype_is_contig(origin_datatype, &is_origin_contig);
        MPIDU_Datatype_is_contig(target_datatype, &is_target_contig);

        /* Judge if we can use IMMED data packet */
        if (MPIR_DATATYPE_IS_PREDEFINED(origin_datatype) &&
            MPIR_DATATYPE_IS_PREDEFINED(target_datatype) && is_origin_contig && is_target_contig) {
            if (data_sz <= MPIDI_RMA_IMMED_BYTES)
                use_immed_pkt = TRUE;
        }

        /* Judge if this operation is an piggyback candidate */
        if (MPIR_DATATYPE_IS_PREDEFINED(origin_datatype) &&
            MPIR_DATATYPE_IS_PREDEFINED(target_datatype)) {
            /* FIXME: currently we only piggyback LOCK flag with op using predefined datatypes
             * for both origin and target data. We should extend this optimization to derived
             * datatypes as well. */
            if (data_sz <= MPIR_CVAR_CH3_RMA_OP_PIGGYBACK_LOCK_DATA_SIZE)
                op_ptr->piggyback_lock_candidate = 1;
        }

        /************** Setting packet struct areas in operation ****************/

        put_pkt = &(op_ptr->pkt.put);

        if (use_immed_pkt) {
            MPIDI_Pkt_init(put_pkt, MPIDI_CH3_PKT_PUT_IMMED);
        }
        else {
            MPIDI_Pkt_init(put_pkt, MPIDI_CH3_PKT_PUT);
        }

        put_pkt->addr = (char *) win_ptr->basic_info_table[target_rank].base_addr +
            win_ptr->basic_info_table[target_rank].disp_unit * target_disp;
        put_pkt->count = target_count;
        put_pkt->datatype = target_datatype;
        put_pkt->info.dataloop_size = 0;
        put_pkt->target_win_handle = win_ptr->basic_info_table[target_rank].win_handle;
        put_pkt->source_win_handle = win_ptr->handle;
        put_pkt->flags = MPIDI_CH3_PKT_FLAG_NONE;
        if (use_immed_pkt) {
            void *src = (void *) origin_addr, *dest = (void *) (put_pkt->info.data);
            mpi_errno = immed_copy(src, dest, data_sz);
            if (mpi_errno != MPI_SUCCESS)
                MPIR_ERR_POP(mpi_errno);
        }

        MPIR_T_PVAR_TIMER_END(RMA, rma_rmaqueue_set);

        mpi_errno = MPIDI_CH3I_Win_enqueue_op(win_ptr, op_ptr);
        if (mpi_errno)
            MPIR_ERR_POP(mpi_errno);

        mpi_errno = MPIDI_CH3I_RMA_Make_progress_target(win_ptr, target_rank, &made_progress);
        if (mpi_errno != MPI_SUCCESS)
            MPIR_ERR_POP(mpi_errno);

        if (MPIR_CVAR_CH3_RMA_ACTIVE_REQ_THRESHOLD >= 0 &&
            MPIDI_CH3I_RMA_Active_req_cnt >= MPIR_CVAR_CH3_RMA_ACTIVE_REQ_THRESHOLD) {
            while (MPIDI_CH3I_RMA_Active_req_cnt >= MPIR_CVAR_CH3_RMA_ACTIVE_REQ_THRESHOLD) {
                mpi_errno = wait_progress_engine();
                if (mpi_errno != MPI_SUCCESS)
                    MPIR_ERR_POP(mpi_errno);
            }
        }
    }

  fn_exit:
    MPIR_FUNC_VERBOSE_RMA_EXIT(MPID_STATE_MPIDI_CH3I_PUT);
    return mpi_errno;

    /* --BEGIN ERROR HANDLING-- */
  fn_fail:
    goto fn_exit;
    /* --END ERROR HANDLING-- */
}