/*@ Dataloop_contiguous - create the dataloop representation for a contiguous datatype Input Parameters: + int icount, . DLOOP_Type oldtype - int flag Output Parameters: + DLOOP_Dataloop **dlp_p, . DLOOP_Size *dlsz_p, - int *dldepth_p, .N Errors .N Returns 0 on success, -1 on failure. @*/ int PREPEND_PREFIX(Dataloop_create_contiguous)(DLOOP_Count icount, DLOOP_Type oldtype, DLOOP_Dataloop **dlp_p, DLOOP_Size *dlsz_p, int *dldepth_p, int flag) { DLOOP_Count count; int is_builtin, apply_contig_coalescing = 0; int new_loop_depth; DLOOP_Size new_loop_sz; DLOOP_Dataloop *new_dlp; count = icount; is_builtin = (DLOOP_Handle_hasloop_macro(oldtype)) ? 0 : 1; if (is_builtin) { new_loop_depth = 1; } else { int old_loop_depth = 0; DLOOP_Offset old_size = 0, old_extent = 0; DLOOP_Dataloop *old_loop_ptr; DLOOP_Handle_get_loopdepth_macro(oldtype, old_loop_depth, flag); DLOOP_Handle_get_loopptr_macro(oldtype, old_loop_ptr, flag); DLOOP_Handle_get_size_macro(oldtype, old_size); DLOOP_Handle_get_extent_macro(oldtype, old_extent); /* if we have a simple combination of contigs, coalesce */ if (((old_loop_ptr->kind & DLOOP_KIND_MASK) == DLOOP_KIND_CONTIG) && (old_size == old_extent)) { /* will just copy contig and multiply count */ apply_contig_coalescing = 1; new_loop_depth = old_loop_depth; } else { new_loop_depth = old_loop_depth + 1; } } if (is_builtin) { DLOOP_Offset basic_sz = 0; PREPEND_PREFIX(Dataloop_alloc)(DLOOP_KIND_CONTIG, count, &new_dlp, &new_loop_sz); /* --BEGIN ERROR HANDLING-- */ if (!new_dlp) return -1; /* --END ERROR HANDLING-- */ DLOOP_Handle_get_size_macro(oldtype, basic_sz); new_dlp->kind = DLOOP_KIND_CONTIG | DLOOP_FINAL_MASK; if (flag == DLOOP_DATALOOP_ALL_BYTES) { count *= basic_sz; new_dlp->el_size = 1; new_dlp->el_extent = 1; new_dlp->el_type = MPI_BYTE; } else { new_dlp->el_size = basic_sz; new_dlp->el_extent = new_dlp->el_size; new_dlp->el_type = oldtype; } new_dlp->loop_params.c_t.count = count; } else { /* user-defined base type (oldtype) */ DLOOP_Dataloop *old_loop_ptr; MPI_Aint old_loop_sz = 0; DLOOP_Handle_get_loopptr_macro(oldtype, old_loop_ptr, flag); DLOOP_Handle_get_loopsize_macro(oldtype, old_loop_sz, flag); if (apply_contig_coalescing) { /* make a copy of the old loop and multiply the count */ PREPEND_PREFIX(Dataloop_dup)(old_loop_ptr, old_loop_sz, &new_dlp); /* --BEGIN ERROR HANDLING-- */ if (!new_dlp) return -1; /* --END ERROR HANDLING-- */ new_dlp->loop_params.c_t.count *= count; new_loop_sz = old_loop_sz; DLOOP_Handle_get_loopdepth_macro(oldtype, new_loop_depth, flag); } else { /* allocate space for new loop including copy of old */ PREPEND_PREFIX(Dataloop_alloc_and_copy)(DLOOP_KIND_CONTIG, count, old_loop_ptr, old_loop_sz, &new_dlp, &new_loop_sz); /* --BEGIN ERROR HANDLING-- */ if (!new_dlp) return -1; /* --END ERROR HANDLING-- */ new_dlp->kind = DLOOP_KIND_CONTIG; DLOOP_Handle_get_size_macro(oldtype, new_dlp->el_size); DLOOP_Handle_get_extent_macro(oldtype, new_dlp->el_extent); DLOOP_Handle_get_basic_type_macro(oldtype, new_dlp->el_type); new_dlp->loop_params.c_t.count = count; } } *dlp_p = new_dlp; *dlsz_p = new_loop_sz; *dldepth_p = new_loop_depth; return 0; }
/*@ Dataloop_create_blockindexed - create blockindexed dataloop Arguments: + DLOOP_Count count . void *displacement_array (array of either MPI_Aints or ints) . int displacement_in_bytes (boolean) . MPI_Datatype old_type . DLOOP_Dataloop **output_dataloop_ptr . int output_dataloop_size . int output_dataloop_depth - int flag .N Errors .N Returns 0 on success, -1 on failure. @*/ int MPIR_Dataloop_create_blockindexed(DLOOP_Count icount, DLOOP_Count iblklen, const void *disp_array, int dispinbytes, DLOOP_Type oldtype, DLOOP_Dataloop ** dlp_p, DLOOP_Size * dlsz_p, int *dldepth_p, int flag) { int err, is_builtin, is_vectorizable = 1; int i, old_loop_depth; DLOOP_Size new_loop_sz; DLOOP_Count contig_count, count, blklen; DLOOP_Offset old_extent, eff_disp0, eff_disp1, last_stride; DLOOP_Dataloop *new_dlp; count = (DLOOP_Count) icount; /* avoid subsequent casting */ blklen = (DLOOP_Count) iblklen; /* if count or blklen are zero, handle with contig code, call it a int */ if (count == 0 || blklen == 0) { err = MPIR_Dataloop_create_contiguous(0, MPI_INT, dlp_p, dlsz_p, dldepth_p, flag); return err; } is_builtin = (DLOOP_Handle_hasloop_macro(oldtype)) ? 0 : 1; if (is_builtin) { DLOOP_Handle_get_size_macro(oldtype, old_extent); old_loop_depth = 0; } else { DLOOP_Handle_get_extent_macro(oldtype, old_extent); DLOOP_Handle_get_loopdepth_macro(oldtype, old_loop_depth); } contig_count = MPIR_Type_blockindexed_count_contig(count, blklen, disp_array, dispinbytes, old_extent); /* optimization: * * if contig_count == 1 and block starts at displacement 0, * store it as a contiguous rather than a blockindexed dataloop. */ if ((contig_count == 1) && ((!dispinbytes && ((int *) disp_array)[0] == 0) || (dispinbytes && ((MPI_Aint *) disp_array)[0] == 0))) { err = MPIR_Dataloop_create_contiguous(icount * iblklen, oldtype, dlp_p, dlsz_p, dldepth_p, flag); return err; } /* optimization: * * if contig_count == 1 store it as a blockindexed with one * element rather than as a lot of individual blocks. */ if (contig_count == 1) { /* adjust count and blklen and drop through */ blklen *= count; count = 1; iblklen *= icount; icount = 1; } /* optimization: * * if displacements start at zero and result in a fixed stride, * store it as a vector rather than a blockindexed dataloop. */ eff_disp0 = (dispinbytes) ? ((DLOOP_Offset) ((MPI_Aint *) disp_array)[0]) : (((DLOOP_Offset) ((int *) disp_array)[0]) * old_extent); if (count > 1 && eff_disp0 == (DLOOP_Offset) 0) { eff_disp1 = (dispinbytes) ? ((DLOOP_Offset) ((MPI_Aint *) disp_array)[1]) : (((DLOOP_Offset) ((int *) disp_array)[1]) * old_extent); last_stride = eff_disp1 - eff_disp0; for (i = 2; i < count; i++) { eff_disp0 = eff_disp1; eff_disp1 = (dispinbytes) ? ((DLOOP_Offset) ((MPI_Aint *) disp_array)[i]) : (((DLOOP_Offset) ((int *) disp_array)[i]) * old_extent); if (eff_disp1 - eff_disp0 != last_stride) { is_vectorizable = 0; break; } } if (is_vectorizable) { err = MPIR_Dataloop_create_vector(count, blklen, last_stride, 1, /* strideinbytes */ oldtype, dlp_p, dlsz_p, dldepth_p, flag); return err; } } /* TODO: optimization: * * if displacements result in a fixed stride, but first displacement * is not zero, store it as a blockindexed (blklen == 1) of a vector. */ /* TODO: optimization: * * if a blockindexed of a contig, absorb the contig into the blocklen * parameter and keep the same overall depth */ /* otherwise storing as a blockindexed dataloop */ /* Q: HOW CAN WE TELL IF IT IS WORTH IT TO STORE AS AN * INDEXED WITH FEWER CONTIG BLOCKS (IF CONTIG_COUNT IS SMALL)? */ if (is_builtin) { MPIR_Dataloop_alloc(DLOOP_KIND_BLOCKINDEXED, count, &new_dlp, &new_loop_sz); /* --BEGIN ERROR HANDLING-- */ if (!new_dlp) return -1; /* --END ERROR HANDLING-- */ new_dlp->kind = DLOOP_KIND_BLOCKINDEXED | DLOOP_FINAL_MASK; if (flag == DLOOP_DATALOOP_ALL_BYTES) { blklen *= old_extent; new_dlp->el_size = 1; new_dlp->el_extent = 1; new_dlp->el_type = MPI_BYTE; } else { new_dlp->el_size = old_extent; new_dlp->el_extent = old_extent; new_dlp->el_type = oldtype; } } else { DLOOP_Dataloop *old_loop_ptr = NULL; MPI_Aint old_loop_sz = 0; DLOOP_Handle_get_loopptr_macro(oldtype, old_loop_ptr); DLOOP_Handle_get_loopsize_macro(oldtype, old_loop_sz); MPIR_Dataloop_alloc_and_copy(DLOOP_KIND_BLOCKINDEXED, count, old_loop_ptr, old_loop_sz, &new_dlp, &new_loop_sz); /* --BEGIN ERROR HANDLING-- */ if (!new_dlp) return -1; /* --END ERROR HANDLING-- */ new_dlp->kind = DLOOP_KIND_BLOCKINDEXED; DLOOP_Handle_get_size_macro(oldtype, new_dlp->el_size); DLOOP_Handle_get_extent_macro(oldtype, new_dlp->el_extent); DLOOP_Handle_get_basic_type_macro(oldtype, new_dlp->el_type); } new_dlp->loop_params.bi_t.count = count; new_dlp->loop_params.bi_t.blocksize = blklen; /* copy in displacement parameters * * regardless of dispinbytes, we store displacements in bytes in loop. */ DLOOP_Type_blockindexed_array_copy(count, disp_array, new_dlp->loop_params.bi_t.offset_array, dispinbytes, old_extent); *dlp_p = new_dlp; *dlsz_p = new_loop_sz; *dldepth_p = old_loop_depth + 1; return 0; }
/*@ Dataloop_create_vector Arguments: + int icount . int iblocklength . MPI_Aint astride . int strideinbytes . MPI_Datatype oldtype . DLOOP_Dataloop **dlp_p . int *dlsz_p . int *dldepth_p - int flag Returns 0 on success, -1 on failure. @*/ int PREPEND_PREFIX(Dataloop_create_vector)(int icount, int iblocklength, MPI_Aint astride, int strideinbytes, DLOOP_Type oldtype, DLOOP_Dataloop **dlp_p, int *dlsz_p, int *dldepth_p, int flag) { int err, is_builtin; int new_loop_sz, new_loop_depth; DLOOP_Count count, blocklength; DLOOP_Offset stride; DLOOP_Dataloop *new_dlp; count = (DLOOP_Count) icount; /* avoid subsequent casting */ blocklength = (DLOOP_Count) iblocklength; stride = (DLOOP_Offset) astride; /* if count or blocklength are zero, handle with contig code, * call it a int */ if (count == 0 || blocklength == 0) { err = PREPEND_PREFIX(Dataloop_create_contiguous)(0, MPI_INT, dlp_p, dlsz_p, dldepth_p, flag); return err; } /* optimization: * * if count == 1, store as a contiguous rather than a vector dataloop. */ if (count == 1) { err = PREPEND_PREFIX(Dataloop_create_contiguous)(iblocklength, oldtype, dlp_p, dlsz_p, dldepth_p, flag); return err; } is_builtin = (DLOOP_Handle_hasloop_macro(oldtype)) ? 0 : 1; if (is_builtin) { new_loop_sz = sizeof(DLOOP_Dataloop); new_loop_depth = 1; } else { int old_loop_sz = 0, old_loop_depth = 0; DLOOP_Handle_get_loopsize_macro(oldtype, old_loop_sz, flag); DLOOP_Handle_get_loopdepth_macro(oldtype, old_loop_depth, flag); /* TODO: ACCOUNT FOR PADDING IN LOOP_SZ HERE */ new_loop_sz = sizeof(DLOOP_Dataloop) + old_loop_sz; new_loop_depth = old_loop_depth + 1; } if (is_builtin) { DLOOP_Offset basic_sz = 0; PREPEND_PREFIX(Dataloop_alloc)(DLOOP_KIND_VECTOR, count, &new_dlp, &new_loop_sz); /* --BEGIN ERROR HANDLING-- */ if (!new_dlp) return -1; /* --END ERROR HANDLING-- */ DLOOP_Handle_get_size_macro(oldtype, basic_sz); new_dlp->kind = DLOOP_KIND_VECTOR | DLOOP_FINAL_MASK; if (flag == DLOOP_DATALOOP_ALL_BYTES) { blocklength *= basic_sz; new_dlp->el_size = 1; new_dlp->el_extent = 1; new_dlp->el_type = MPI_BYTE; if(!strideinbytes) /* the stride was specified in units of oldtype, now that we're using bytes, rather than oldtype, we need to update stride. */ stride *= basic_sz; } else { new_dlp->el_size = basic_sz; new_dlp->el_extent = new_dlp->el_size; new_dlp->el_type = oldtype; } } else { /* user-defined base type (oldtype) */ DLOOP_Dataloop *old_loop_ptr; int old_loop_sz = 0; DLOOP_Handle_get_loopptr_macro(oldtype, old_loop_ptr, flag); DLOOP_Handle_get_loopsize_macro(oldtype, old_loop_sz, flag); PREPEND_PREFIX(Dataloop_alloc_and_copy)(DLOOP_KIND_VECTOR, count, old_loop_ptr, old_loop_sz, &new_dlp, &new_loop_sz); /* --BEGIN ERROR HANDLING-- */ if (!new_dlp) return -1; /* --END ERROR HANDLING-- */ new_dlp->kind = DLOOP_KIND_VECTOR; DLOOP_Handle_get_size_macro(oldtype, new_dlp->el_size); DLOOP_Handle_get_extent_macro(oldtype, new_dlp->el_extent); DLOOP_Handle_get_basic_type_macro(oldtype, new_dlp->el_type); } /* vector-specific members * * stride stored in dataloop is always in bytes for local rep of type */ new_dlp->loop_params.v_t.count = count; new_dlp->loop_params.v_t.blocksize = blocklength; new_dlp->loop_params.v_t.stride = (strideinbytes) ? stride : stride * new_dlp->el_extent; *dlp_p = new_dlp; *dlsz_p = new_loop_sz; *dldepth_p = new_loop_depth; return 0; }
int PREPEND_PREFIX(Dataloop_create_indexed)(DLOOP_Count icount, const DLOOP_Size *blocklength_array, const void *displacement_array, int dispinbytes, MPI_Datatype oldtype, DLOOP_Dataloop **dlp_p, DLOOP_Size *dlsz_p, int *dldepth_p, int flag) { int err, is_builtin; int old_loop_depth; MPI_Aint i; DLOOP_Size new_loop_sz, blksz; DLOOP_Count first; DLOOP_Count old_type_count = 0, contig_count, count; DLOOP_Offset old_extent; struct DLOOP_Dataloop *new_dlp; count = (DLOOP_Count) icount; /* avoid subsequent casting */ /* if count is zero, handle with contig code, call it an int */ if (count == 0) { err = PREPEND_PREFIX(Dataloop_create_contiguous)(0, MPI_INT, dlp_p, dlsz_p, dldepth_p, flag); return err; } /* Skip any initial zero-length blocks */ for (first = 0; first < count; first++) if ((DLOOP_Count) blocklength_array[first]) break; is_builtin = (DLOOP_Handle_hasloop_macro(oldtype)) ? 0 : 1; if (is_builtin) { DLOOP_Handle_get_extent_macro(oldtype, old_extent); old_loop_depth = 0; } else { DLOOP_Handle_get_extent_macro(oldtype, old_extent); DLOOP_Handle_get_loopdepth_macro(oldtype, old_loop_depth, flag); } for (i=first; i < count; i++) { old_type_count += (DLOOP_Count) blocklength_array[i]; } contig_count = PREPEND_PREFIX(Type_indexed_count_contig)(count, blocklength_array, displacement_array, dispinbytes, old_extent); /* if contig_count is zero (no data), handle with contig code */ if (contig_count == 0) { err = PREPEND_PREFIX(Dataloop_create_contiguous)(0, MPI_INT, dlp_p, dlsz_p, dldepth_p, flag); return err; } /* optimization: * * if contig_count == 1 and block starts at displacement 0, * store it as a contiguous rather than an indexed dataloop. */ if ((contig_count == 1) && ((!dispinbytes && ((int *) displacement_array)[first] == 0) || (dispinbytes && ((MPI_Aint *) displacement_array)[first] == 0))) { err = PREPEND_PREFIX(Dataloop_create_contiguous)(old_type_count, oldtype, dlp_p, dlsz_p, dldepth_p, flag); return err; } /* optimization: * * if contig_count == 1 (and displacement != 0), store this as * a single element blockindexed rather than a lot of individual * blocks. */ if (contig_count == 1) { const void *disp_arr_tmp; /* no ternary assignment to avoid clang warnings */ if (dispinbytes) disp_arr_tmp = &(((const MPI_Aint *)displacement_array)[first]); else disp_arr_tmp = &(((const int *)displacement_array)[first]); err = PREPEND_PREFIX(Dataloop_create_blockindexed)(1, old_type_count, disp_arr_tmp, dispinbytes, oldtype, dlp_p, dlsz_p, dldepth_p, flag); return err; } /* optimization: * * if block length is the same for all blocks, store it as a * blockindexed rather than an indexed dataloop. */ blksz = blocklength_array[first]; for (i = first+1; i < count; i++) { if (blocklength_array[i] != blksz) { blksz--; break; } } if (blksz == blocklength_array[first]) { const void *disp_arr_tmp; /* no ternary assignment to avoid clang warnings */ if (dispinbytes) disp_arr_tmp = &(((const MPI_Aint *)displacement_array)[first]); else disp_arr_tmp = &(((const int *)displacement_array)[first]); err = PREPEND_PREFIX(Dataloop_create_blockindexed)(icount-first, blksz, disp_arr_tmp, dispinbytes, oldtype, dlp_p, dlsz_p, dldepth_p, flag); return err; } /* note: blockindexed looks for the vector optimization */ /* TODO: optimization: * * if an indexed of a contig, absorb the contig into the blocklen array * and keep the same overall depth */ /* otherwise storing as an indexed dataloop */ if (is_builtin) { PREPEND_PREFIX(Dataloop_alloc)(DLOOP_KIND_INDEXED, count, &new_dlp, &new_loop_sz); /* --BEGIN ERROR HANDLING-- */ if (!new_dlp) return -1; /* --END ERROR HANDLING-- */ new_dlp->kind = DLOOP_KIND_INDEXED | DLOOP_FINAL_MASK; if (flag == DLOOP_DATALOOP_ALL_BYTES) { /* blocklengths are modified below */ new_dlp->el_size = 1; new_dlp->el_extent = 1; new_dlp->el_type = MPI_BYTE; } else { new_dlp->el_size = old_extent; new_dlp->el_extent = old_extent; new_dlp->el_type = oldtype; } } else { DLOOP_Dataloop *old_loop_ptr = NULL; MPI_Aint old_loop_sz = 0; DLOOP_Handle_get_loopptr_macro(oldtype, old_loop_ptr, flag); DLOOP_Handle_get_loopsize_macro(oldtype, old_loop_sz, flag); PREPEND_PREFIX(Dataloop_alloc_and_copy)(DLOOP_KIND_INDEXED, contig_count, old_loop_ptr, old_loop_sz, &new_dlp, &new_loop_sz); /* --BEGIN ERROR HANDLING-- */ if (!new_dlp) return -1; /* --END ERROR HANDLING-- */ new_dlp->kind = DLOOP_KIND_INDEXED; DLOOP_Handle_get_size_macro(oldtype, new_dlp->el_size); DLOOP_Handle_get_extent_macro(oldtype, new_dlp->el_extent); DLOOP_Handle_get_basic_type_macro(oldtype, new_dlp->el_type); } new_dlp->loop_params.i_t.count = contig_count; new_dlp->loop_params.i_t.total_blocks = old_type_count; /* copy in blocklength and displacement parameters (in that order) * * regardless of dispinbytes, we store displacements in bytes in loop. */ DLOOP_Type_indexed_array_copy(count, contig_count, blocklength_array, displacement_array, new_dlp->loop_params.i_t.blocksize_array, new_dlp->loop_params.i_t.offset_array, dispinbytes, old_extent); if (is_builtin && (flag == DLOOP_DATALOOP_ALL_BYTES)) { DLOOP_Count *tmp_blklen_array = new_dlp->loop_params.i_t.blocksize_array; for (i=0; i < contig_count; i++) { /* increase block lengths so they are in bytes */ tmp_blklen_array[i] *= old_extent; } new_dlp->loop_params.i_t.total_blocks *= old_extent; } *dlp_p = new_dlp; *dlsz_p = new_loop_sz; *dldepth_p = old_loop_depth + 1; return MPI_SUCCESS; }
void PREPEND_PREFIX(Dataloop_create)(MPI_Datatype type, DLOOP_Dataloop **dlp_p, int *dlsz_p, int *dldepth_p, int flag) { int i; int err; int nr_ints, nr_aints, nr_types, combiner; MPI_Datatype *types; int *ints; MPI_Aint *aints; DLOOP_Dataloop *old_dlp; int old_dlsz, old_dldepth; int dummy1, dummy2, dummy3, type0_combiner, ndims; MPI_Datatype tmptype; MPI_Aint stride; MPI_Aint *disps; MPIR_Type_get_envelope_impl(type, &nr_ints, &nr_aints, &nr_types, &combiner); /* some named types do need dataloops; handle separately. */ if (combiner == MPI_COMBINER_NAMED) { DLOOP_Dataloop_create_named(type, dlp_p, dlsz_p, dldepth_p, flag); return; } else if (combiner == MPI_COMBINER_F90_REAL || combiner == MPI_COMBINER_F90_COMPLEX || combiner == MPI_COMBINER_F90_INTEGER) { MPI_Datatype f90basetype; DLOOP_Handle_get_basic_type_macro(type, f90basetype); PREPEND_PREFIX(Dataloop_create_contiguous)(1 /* count */, f90basetype, dlp_p, dlsz_p, dldepth_p, flag); return; } /* Q: should we also check for "hasloop", or is the COMBINER * check above enough to weed out everything that wouldn't * have a loop? */ DLOOP_Handle_get_loopptr_macro(type, old_dlp, flag); if (old_dlp != NULL) { /* dataloop already created; just return it. */ *dlp_p = old_dlp; DLOOP_Handle_get_loopsize_macro(type, *dlsz_p, flag); DLOOP_Handle_get_loopdepth_macro(type, *dldepth_p, flag); return; } PREPEND_PREFIX(Type_access_contents)(type, &ints, &aints, &types); /* first check for zero count on types where that makes sense */ switch(combiner) { case MPI_COMBINER_CONTIGUOUS: case MPI_COMBINER_VECTOR: case MPI_COMBINER_HVECTOR_INTEGER: case MPI_COMBINER_HVECTOR: case MPI_COMBINER_INDEXED_BLOCK: case MPI_COMBINER_HINDEXED_BLOCK: case MPI_COMBINER_INDEXED: case MPI_COMBINER_HINDEXED_INTEGER: case MPI_COMBINER_HINDEXED: case MPI_COMBINER_STRUCT_INTEGER: case MPI_COMBINER_STRUCT: if (ints[0] == 0) { PREPEND_PREFIX(Dataloop_create_contiguous)(0, MPI_INT, dlp_p, dlsz_p, dldepth_p, flag); goto clean_exit; } break; default: break; } /* recurse, processing types "below" this one before processing * this one, if those type don't already have dataloops. * * note: in the struct case below we'll handle any additional * types "below" the current one. */ MPIR_Type_get_envelope_impl(types[0], &dummy1, &dummy2, &dummy3, &type0_combiner); if (type0_combiner != MPI_COMBINER_NAMED) { DLOOP_Handle_get_loopptr_macro(types[0], old_dlp, flag); if (old_dlp == NULL) { /* no dataloop already present; create and store one */ PREPEND_PREFIX(Dataloop_create)(types[0], &old_dlp, &old_dlsz, &old_dldepth, flag); DLOOP_Handle_set_loopptr_macro(types[0], old_dlp, flag); DLOOP_Handle_set_loopsize_macro(types[0], old_dlsz, flag); DLOOP_Handle_set_loopdepth_macro(types[0], old_dldepth, flag); } else { DLOOP_Handle_get_loopsize_macro(types[0], old_dlsz, flag); DLOOP_Handle_get_loopdepth_macro(types[0], old_dldepth, flag); } } switch(combiner) { case MPI_COMBINER_DUP: if (type0_combiner != MPI_COMBINER_NAMED) { PREPEND_PREFIX(Dataloop_dup)(old_dlp, old_dlsz, dlp_p); *dlsz_p = old_dlsz; *dldepth_p = old_dldepth; } else { PREPEND_PREFIX(Dataloop_create_contiguous)(1, types[0], dlp_p, dlsz_p, dldepth_p, flag); } break; case MPI_COMBINER_RESIZED: if (type0_combiner != MPI_COMBINER_NAMED) { PREPEND_PREFIX(Dataloop_dup)(old_dlp, old_dlsz, dlp_p); *dlsz_p = old_dlsz; *dldepth_p = old_dldepth; } else { PREPEND_PREFIX(Dataloop_create_contiguous)(1, types[0], dlp_p, dlsz_p, dldepth_p, flag); (*dlp_p)->el_extent = aints[1]; /* extent */ } break; case MPI_COMBINER_CONTIGUOUS: PREPEND_PREFIX(Dataloop_create_contiguous)(ints[0] /* count */, types[0] /* oldtype */, dlp_p, dlsz_p, dldepth_p, flag); break; case MPI_COMBINER_VECTOR: PREPEND_PREFIX(Dataloop_create_vector)(ints[0] /* count */, ints[1] /* blklen */, ints[2] /* stride */, 0 /* stride not bytes */, types[0] /* oldtype */, dlp_p, dlsz_p, dldepth_p, flag); break; case MPI_COMBINER_HVECTOR_INTEGER: case MPI_COMBINER_HVECTOR: /* fortran hvector has integer stride in bytes */ if (combiner == MPI_COMBINER_HVECTOR_INTEGER) { stride = (MPI_Aint) ints[2]; } else { stride = aints[0]; } PREPEND_PREFIX(Dataloop_create_vector)(ints[0] /* count */, ints[1] /* blklen */, stride, 1 /* stride in bytes */, types[0] /* oldtype */, dlp_p, dlsz_p, dldepth_p, flag); break; case MPI_COMBINER_INDEXED_BLOCK: PREPEND_PREFIX(Dataloop_create_blockindexed)(ints[0] /* count */, ints[1] /* blklen */, &ints[2] /* disps */, 0 /* disp not bytes */, types[0] /* oldtype */, dlp_p, dlsz_p, dldepth_p, flag); break; case MPI_COMBINER_HINDEXED_BLOCK: disps = (MPI_Aint *) DLOOP_Malloc(ints[0] * sizeof(MPI_Aint)); for (i = 0; i < ints[0]; i++) disps[i] = aints[i]; PREPEND_PREFIX(Dataloop_create_blockindexed)(ints[0] /* count */, ints[1] /* blklen */, disps /* disps */, 1 /* disp is bytes */, types[0] /* oldtype */, dlp_p, dlsz_p, dldepth_p, flag); DLOOP_Free(disps); break; case MPI_COMBINER_INDEXED: PREPEND_PREFIX(Dataloop_create_indexed)(ints[0] /* count */, &ints[1] /* blklens */, &ints[ints[0]+1] /* disp */, 0 /* disp not in bytes */, types[0] /* oldtype */, dlp_p, dlsz_p, dldepth_p, flag); break; case MPI_COMBINER_HINDEXED_INTEGER: case MPI_COMBINER_HINDEXED: if (combiner == MPI_COMBINER_HINDEXED_INTEGER) { disps = (MPI_Aint *) DLOOP_Malloc(ints[0] * sizeof(MPI_Aint)); for (i=0; i < ints[0]; i++) { disps[i] = (MPI_Aint) ints[ints[0] + 1 + i]; } } else { disps = aints; } PREPEND_PREFIX(Dataloop_create_indexed)(ints[0] /* count */, &ints[1] /* blklens */, disps, 1 /* disp in bytes */, types[0] /* oldtype */, dlp_p, dlsz_p, dldepth_p, flag); if (combiner == MPI_COMBINER_HINDEXED_INTEGER) { DLOOP_Free(disps); } break; case MPI_COMBINER_STRUCT_INTEGER: case MPI_COMBINER_STRUCT: for (i = 1; i < ints[0]; i++) { int type_combiner; MPIR_Type_get_envelope_impl(types[i], &dummy1, &dummy2, &dummy3, &type_combiner); if (type_combiner != MPI_COMBINER_NAMED) { DLOOP_Handle_get_loopptr_macro(types[i], old_dlp, flag); if (old_dlp == NULL) { PREPEND_PREFIX(Dataloop_create)(types[i], &old_dlp, &old_dlsz, &old_dldepth, flag); DLOOP_Handle_set_loopptr_macro(types[i], old_dlp, flag); DLOOP_Handle_set_loopsize_macro(types[i], old_dlsz, flag); DLOOP_Handle_set_loopdepth_macro(types[i], old_dldepth, flag); } } } if (combiner == MPI_COMBINER_STRUCT_INTEGER) { disps = (MPI_Aint *) DLOOP_Malloc(ints[0] * sizeof(MPI_Aint)); for (i=0; i < ints[0]; i++) { disps[i] = (MPI_Aint) ints[ints[0] + 1 + i]; } } else { disps = aints; } err = PREPEND_PREFIX(Dataloop_create_struct)(ints[0] /* count */, &ints[1] /* blklens */, disps, types /* oldtype array */, dlp_p, dlsz_p, dldepth_p, flag); /* TODO if/when this function returns error codes, propagate this failure instead */ DLOOP_Assert(0 == err); /* if (err) return err; */ if (combiner == MPI_COMBINER_STRUCT_INTEGER) { DLOOP_Free(disps); } break; case MPI_COMBINER_SUBARRAY: ndims = ints[0]; PREPEND_PREFIX(Type_convert_subarray)(ndims, &ints[1] /* sizes */, &ints[1+ndims] /* subsizes */, &ints[1+2*ndims] /* starts */, ints[1+3*ndims] /* order */, types[0], &tmptype); PREPEND_PREFIX(Dataloop_create)(tmptype, dlp_p, dlsz_p, dldepth_p, flag); MPIR_Type_free_impl(&tmptype); break; case MPI_COMBINER_DARRAY: ndims = ints[2]; PREPEND_PREFIX(Type_convert_darray)(ints[0] /* size */, ints[1] /* rank */, ndims, &ints[3] /* gsizes */, &ints[3+ndims] /*distribs */, &ints[3+2*ndims] /* dargs */, &ints[3+3*ndims] /* psizes */, ints[3+4*ndims] /* order */, types[0], &tmptype); PREPEND_PREFIX(Dataloop_create)(tmptype, dlp_p, dlsz_p, dldepth_p, flag); MPIR_Type_free_impl(&tmptype); break; default: DLOOP_Assert(0); break; } clean_exit: PREPEND_PREFIX(Type_release_contents)(type, &ints, &aints, &types); /* for now we just leave the intermediate dataloops in place. * could remove them to save space if we wanted. */ return; }