/*@ Dataloop_create_struct - create the dataloop representation for a struct datatype Input Parameters: + count - number of blocks in vector . blklens - number of elements in each block . disps - offsets of blocks from start of type in bytes - oldtypes - types (using handle) of datatypes on which vector is based Output Parameters: + dlp_p - pointer to address in which to place pointer to new dataloop - dlsz_p - pointer to address in which to place size of new dataloop Return Value: 0 on success, -1 on failure. Notes: This function relies on others, like Dataloop_create_indexed, to create types in some cases. This call (like all the rest) takes int blklens and MPI_Aint displacements, so it's possible to overflow when working with a particularly large struct type in some cases. This isn't detected or corrected in this code at this time. @*/ int MPIR_Dataloop_create_struct(DLOOP_Count count, const int *blklens, const MPI_Aint *disps, const DLOOP_Type *oldtypes, DLOOP_Dataloop **dlp_p, MPI_Aint *dlsz_p, int *dldepth_p, int flag) { int err, i, nr_basics = 0, nr_derived = 0, type_pos = 0; DLOOP_Type first_basic = MPI_DATATYPE_NULL, first_derived = MPI_DATATYPE_NULL; /* variables used in general case only */ int loop_idx, new_loop_depth; int old_loop_depth = 0; MPI_Aint new_loop_sz, old_loop_sz = 0; DLOOP_Dataloop *new_dlp, *curpos; /* if count is zero, handle with contig code, call it a int */ if (count == 0) { err = MPIR_Dataloop_create_contiguous(0, MPI_INT, dlp_p, dlsz_p, dldepth_p, flag); return err; } /* browse the old types and characterize */ for (i=0; i < count; i++) { /* ignore type elements with a zero blklen */ if (blklens[i] == 0) continue; if (oldtypes[i] != MPI_LB && oldtypes[i] != MPI_UB) { int is_builtin; is_builtin = (DLOOP_Handle_hasloop_macro(oldtypes[i])) ? 0 : 1; if (is_builtin) { if (nr_basics == 0) { first_basic = oldtypes[i]; type_pos = i; } else if (oldtypes[i] != first_basic) { first_basic = MPI_DATATYPE_NULL; } nr_basics++; } else /* derived type */ { if (nr_derived == 0) { first_derived = oldtypes[i]; type_pos = i; } else if (oldtypes[i] != first_derived) { first_derived = MPI_DATATYPE_NULL; } nr_derived++; } } } /* note on optimizations: * * because LB, UB, and extent calculations are handled as part of * the Datatype, we can safely ignore them in all our calculations * here. */ /* optimization: * * if there were only MPI_LBs and MPI_UBs in the struct type, * treat it as a zero-element contiguous (just as count == 0). */ if (nr_basics == 0 && nr_derived == 0) { err = MPIR_Dataloop_create_contiguous(0, MPI_INT, dlp_p, dlsz_p, dldepth_p, flag); return err; } /* optimization: * * if there is only one unique instance of a type in the struct, treat it * as a blockindexed type. * * notes: * * if the displacement happens to be zero, the blockindexed code will * optimize this into a contig. */ if (nr_basics + nr_derived == 1) { /* type_pos is index to only real type in array */ err = MPIR_Dataloop_create_blockindexed (1, /* count */ blklens[type_pos], &disps[type_pos], 1, /* displacement in bytes */ oldtypes[type_pos], dlp_p, dlsz_p, dldepth_p, flag); return err; } /* optimization: * * if there only one unique type (more than one instance) in the * struct, treat it as an indexed type. * * notes: * * this will apply to a single type with an LB/UB, as those * are handled elsewhere. * */ if (((nr_derived == 0) && (first_basic != MPI_DATATYPE_NULL)) || ((nr_basics == 0) && (first_derived != MPI_DATATYPE_NULL))) { return DLOOP_Dataloop_create_unique_type_struct(count, blklens, disps, oldtypes, type_pos, dlp_p, dlsz_p, dldepth_p, flag); } /* optimization: * * if there are no derived types and caller indicated either a * homogeneous system or the "all bytes" conversion, convert * everything to bytes and use an indexed type. */ if (nr_derived == 0 && ((flag == DLOOP_DATALOOP_HOMOGENEOUS) || (flag == DLOOP_DATALOOP_ALL_BYTES))) { return DLOOP_Dataloop_create_basic_all_bytes_struct(count, blklens, disps, oldtypes, dlp_p, dlsz_p, dldepth_p, flag); } /* optimization: * * if caller asked for homogeneous or all bytes representation, * flatten the type and store it as an indexed type so that * there are no branches in the dataloop tree. */ if ((flag == DLOOP_DATALOOP_HOMOGENEOUS) || (flag == DLOOP_DATALOOP_ALL_BYTES)) { return DLOOP_Dataloop_create_flattened_struct(count, blklens, disps, oldtypes, dlp_p, dlsz_p, dldepth_p, flag); } /* scan through types and gather derived type info */ for (i=0; i < count; i++) { /* ignore type elements with a zero blklen */ if (blklens[i] == 0) continue; if (DLOOP_Handle_hasloop_macro(oldtypes[i])) { int tmp_loop_depth; MPI_Aint tmp_loop_sz; DLOOP_Handle_get_loopdepth_macro(oldtypes[i], tmp_loop_depth, flag); DLOOP_Handle_get_loopsize_macro(oldtypes[i], tmp_loop_sz, flag); if (tmp_loop_depth > old_loop_depth) { old_loop_depth = tmp_loop_depth; } old_loop_sz += tmp_loop_sz; } } /* general case below: 2 or more distinct types that are either * basics or derived, and for which we want to preserve the types * themselves. */ if (nr_basics > 0) { /* basics introduce an extra level of depth, so if our new depth * must be at least 2 if there are basics. */ new_loop_depth = ((old_loop_depth+1) > 2) ? (old_loop_depth+1) : 2; } else { new_loop_depth = old_loop_depth + 1; } MPIR_Dataloop_struct_alloc((DLOOP_Count) nr_basics + nr_derived, old_loop_sz, nr_basics, &curpos, &new_dlp, &new_loop_sz); /* --BEGIN ERROR HANDLING-- */ if (!new_dlp) { return DLOOP_Dataloop_create_struct_memory_error(); } /* --END ERROR HANDLING-- */ new_dlp->kind = DLOOP_KIND_STRUCT; new_dlp->el_size = -1; /* not valid for struct */ new_dlp->el_extent = -1; /* not valid for struct; see el_extent_array */ new_dlp->el_type = MPI_DATATYPE_NULL; /* not valid for struct */ new_dlp->loop_params.s_t.count = (DLOOP_Count) nr_basics + nr_derived; /* note: curpos points to first byte in "old dataloop" region of * newly allocated space. */ for (i=0, loop_idx = 0; i < count; i++) { int is_builtin; /* ignore type elements with a zero blklen */ if (blklens[i] == 0) continue; is_builtin = (DLOOP_Handle_hasloop_macro(oldtypes[i])) ? 0 : 1; if (is_builtin) { DLOOP_Dataloop *dummy_dlp; int dummy_depth; MPI_Aint dummy_sz; /* LBs and UBs already taken care of -- skip them */ if (oldtypes[i] == MPI_LB || oldtypes[i] == MPI_UB) { continue; } /* build a contig dataloop for this basic and point to that * * optimization: * * push the count (blklen) from the struct down into the * contig so we can process more at the leaf. */ err = MPIR_Dataloop_create_contiguous(blklens[i], oldtypes[i], &dummy_dlp, &dummy_sz, &dummy_depth, flag); /* --BEGIN ERROR HANDLING-- */ if (err) { /* TODO: FREE ALLOCATED RESOURCES */ return -1; } /* --END ERROR HANDLING-- */ /* copy the new contig loop into place in the struct memory * region */ MPIR_Dataloop_copy(curpos, dummy_dlp, dummy_sz); new_dlp->loop_params.s_t.dataloop_array[loop_idx] = curpos; curpos = (DLOOP_Dataloop *) ((char *) curpos + dummy_sz); /* we stored the block size in the contig -- use 1 here */ new_dlp->loop_params.s_t.blocksize_array[loop_idx] = 1; new_dlp->loop_params.s_t.el_extent_array[loop_idx] = ((DLOOP_Offset) blklens[i]) * dummy_dlp->el_extent; MPIR_Dataloop_free(&dummy_dlp); } else { DLOOP_Dataloop *old_loop_ptr; DLOOP_Offset old_extent; DLOOP_Handle_get_loopptr_macro(oldtypes[i], old_loop_ptr, flag); DLOOP_Handle_get_loopsize_macro(oldtypes[i], old_loop_sz, flag); DLOOP_Handle_get_extent_macro(oldtypes[i], old_extent); MPIR_Dataloop_copy(curpos, old_loop_ptr, old_loop_sz); new_dlp->loop_params.s_t.dataloop_array[loop_idx] = curpos; curpos = (DLOOP_Dataloop *) ((char *) curpos + old_loop_sz); new_dlp->loop_params.s_t.blocksize_array[loop_idx] = (DLOOP_Count) blklens[i]; new_dlp->loop_params.s_t.el_extent_array[loop_idx] = old_extent; } new_dlp->loop_params.s_t.offset_array[loop_idx] = (DLOOP_Offset) disps[i]; loop_idx++; } *dlp_p = new_dlp; *dlsz_p = new_loop_sz; *dldepth_p = new_loop_depth; return 0; }
/*@ Dataloop_create_blockindexed - create blockindexed dataloop Arguments: + DLOOP_Count count . void *displacement_array (array of either MPI_Aints or ints) . int displacement_in_bytes (boolean) . MPI_Datatype old_type . DLOOP_Dataloop **output_dataloop_ptr . int output_dataloop_size . int output_dataloop_depth - int flag .N Errors .N Returns 0 on success, -1 on failure. @*/ int MPIR_Dataloop_create_blockindexed(DLOOP_Count icount, DLOOP_Count iblklen, const void *disp_array, int dispinbytes, DLOOP_Type oldtype, DLOOP_Dataloop ** dlp_p, DLOOP_Size * dlsz_p, int *dldepth_p, int flag) { int err, is_builtin, is_vectorizable = 1; int i, old_loop_depth; DLOOP_Size new_loop_sz; DLOOP_Count contig_count, count, blklen; DLOOP_Offset old_extent, eff_disp0, eff_disp1, last_stride; DLOOP_Dataloop *new_dlp; count = (DLOOP_Count) icount; /* avoid subsequent casting */ blklen = (DLOOP_Count) iblklen; /* if count or blklen are zero, handle with contig code, call it a int */ if (count == 0 || blklen == 0) { err = MPIR_Dataloop_create_contiguous(0, MPI_INT, dlp_p, dlsz_p, dldepth_p, flag); return err; } is_builtin = (DLOOP_Handle_hasloop_macro(oldtype)) ? 0 : 1; if (is_builtin) { DLOOP_Handle_get_size_macro(oldtype, old_extent); old_loop_depth = 0; } else { DLOOP_Handle_get_extent_macro(oldtype, old_extent); DLOOP_Handle_get_loopdepth_macro(oldtype, old_loop_depth); } contig_count = MPIR_Type_blockindexed_count_contig(count, blklen, disp_array, dispinbytes, old_extent); /* optimization: * * if contig_count == 1 and block starts at displacement 0, * store it as a contiguous rather than a blockindexed dataloop. */ if ((contig_count == 1) && ((!dispinbytes && ((int *) disp_array)[0] == 0) || (dispinbytes && ((MPI_Aint *) disp_array)[0] == 0))) { err = MPIR_Dataloop_create_contiguous(icount * iblklen, oldtype, dlp_p, dlsz_p, dldepth_p, flag); return err; } /* optimization: * * if contig_count == 1 store it as a blockindexed with one * element rather than as a lot of individual blocks. */ if (contig_count == 1) { /* adjust count and blklen and drop through */ blklen *= count; count = 1; iblklen *= icount; icount = 1; } /* optimization: * * if displacements start at zero and result in a fixed stride, * store it as a vector rather than a blockindexed dataloop. */ eff_disp0 = (dispinbytes) ? ((DLOOP_Offset) ((MPI_Aint *) disp_array)[0]) : (((DLOOP_Offset) ((int *) disp_array)[0]) * old_extent); if (count > 1 && eff_disp0 == (DLOOP_Offset) 0) { eff_disp1 = (dispinbytes) ? ((DLOOP_Offset) ((MPI_Aint *) disp_array)[1]) : (((DLOOP_Offset) ((int *) disp_array)[1]) * old_extent); last_stride = eff_disp1 - eff_disp0; for (i = 2; i < count; i++) { eff_disp0 = eff_disp1; eff_disp1 = (dispinbytes) ? ((DLOOP_Offset) ((MPI_Aint *) disp_array)[i]) : (((DLOOP_Offset) ((int *) disp_array)[i]) * old_extent); if (eff_disp1 - eff_disp0 != last_stride) { is_vectorizable = 0; break; } } if (is_vectorizable) { err = MPIR_Dataloop_create_vector(count, blklen, last_stride, 1, /* strideinbytes */ oldtype, dlp_p, dlsz_p, dldepth_p, flag); return err; } } /* TODO: optimization: * * if displacements result in a fixed stride, but first displacement * is not zero, store it as a blockindexed (blklen == 1) of a vector. */ /* TODO: optimization: * * if a blockindexed of a contig, absorb the contig into the blocklen * parameter and keep the same overall depth */ /* otherwise storing as a blockindexed dataloop */ /* Q: HOW CAN WE TELL IF IT IS WORTH IT TO STORE AS AN * INDEXED WITH FEWER CONTIG BLOCKS (IF CONTIG_COUNT IS SMALL)? */ if (is_builtin) { MPIR_Dataloop_alloc(DLOOP_KIND_BLOCKINDEXED, count, &new_dlp, &new_loop_sz); /* --BEGIN ERROR HANDLING-- */ if (!new_dlp) return -1; /* --END ERROR HANDLING-- */ new_dlp->kind = DLOOP_KIND_BLOCKINDEXED | DLOOP_FINAL_MASK; if (flag == DLOOP_DATALOOP_ALL_BYTES) { blklen *= old_extent; new_dlp->el_size = 1; new_dlp->el_extent = 1; new_dlp->el_type = MPI_BYTE; } else { new_dlp->el_size = old_extent; new_dlp->el_extent = old_extent; new_dlp->el_type = oldtype; } } else { DLOOP_Dataloop *old_loop_ptr = NULL; MPI_Aint old_loop_sz = 0; DLOOP_Handle_get_loopptr_macro(oldtype, old_loop_ptr); DLOOP_Handle_get_loopsize_macro(oldtype, old_loop_sz); MPIR_Dataloop_alloc_and_copy(DLOOP_KIND_BLOCKINDEXED, count, old_loop_ptr, old_loop_sz, &new_dlp, &new_loop_sz); /* --BEGIN ERROR HANDLING-- */ if (!new_dlp) return -1; /* --END ERROR HANDLING-- */ new_dlp->kind = DLOOP_KIND_BLOCKINDEXED; DLOOP_Handle_get_size_macro(oldtype, new_dlp->el_size); DLOOP_Handle_get_extent_macro(oldtype, new_dlp->el_extent); DLOOP_Handle_get_basic_type_macro(oldtype, new_dlp->el_type); } new_dlp->loop_params.bi_t.count = count; new_dlp->loop_params.bi_t.blocksize = blklen; /* copy in displacement parameters * * regardless of dispinbytes, we store displacements in bytes in loop. */ DLOOP_Type_blockindexed_array_copy(count, disp_array, new_dlp->loop_params.bi_t.offset_array, dispinbytes, old_extent); *dlp_p = new_dlp; *dlsz_p = new_loop_sz; *dldepth_p = old_loop_depth + 1; return 0; }
static int DLOOP_Dataloop_create_flattened_struct(DLOOP_Count count, const int *blklens, const MPI_Aint *disps, const DLOOP_Type *oldtypes, DLOOP_Dataloop **dlp_p, MPI_Aint *dlsz_p, int *dldepth_p, int flag) { /* arbitrary types, convert to bytes and use indexed */ int i, err, nr_blks = 0; DLOOP_Size *tmp_blklens; MPI_Aint *tmp_disps; /* since we're calling another fn that takes this type as an input parameter */ DLOOP_Offset bytes; DLOOP_Segment *segp; int first_ind; DLOOP_Size last_ind; segp = MPIR_Segment_alloc(); /* --BEGIN ERROR HANDLING-- */ if (!segp) { return DLOOP_Dataloop_create_struct_memory_error(); } /* --END ERROR HANDLING-- */ /* use segment code once to count contiguous regions */ for (i=0; i < count; i++) { int is_basic; /* ignore type elements with a zero blklen */ if (blklens[i] == 0) continue; is_basic = (DLOOP_Handle_hasloop_macro(oldtypes[i])) ? 0 : 1; if (is_basic && (oldtypes[i] != MPI_LB && oldtypes[i] != MPI_UB)) { nr_blks++; } else /* derived type; get a count of contig blocks */ { DLOOP_Count tmp_nr_blks, sz; DLOOP_Handle_get_size_macro(oldtypes[i], sz); /* if the derived type has some data to contribute, * add to flattened representation */ if (sz > 0) { err = MPIR_Segment_init(NULL, (DLOOP_Count) blklens[i], oldtypes[i], segp, flag); if (err) return err; bytes = SEGMENT_IGNORE_LAST; MPIR_Segment_count_contig_blocks(segp, 0, &bytes, &tmp_nr_blks); nr_blks += tmp_nr_blks; } } } /* it's possible for us to get to this point only to realize that * there isn't any data in this type. in that case do what we always * do: store a simple contig of zero ints and call it done. */ if (nr_blks == 0) { MPIR_Segment_free(segp); err = MPIR_Dataloop_create_contiguous(0, MPI_INT, dlp_p, dlsz_p, dldepth_p, flag); return err; } nr_blks += 2; /* safety measure */ tmp_blklens = (DLOOP_Size *) DLOOP_Malloc(nr_blks * sizeof(DLOOP_Size), MPL_MEM_DATATYPE); /* --BEGIN ERROR HANDLING-- */ if (!tmp_blklens) { MPIR_Segment_free(segp); return DLOOP_Dataloop_create_struct_memory_error(); } /* --END ERROR HANDLING-- */ tmp_disps = (MPI_Aint *) DLOOP_Malloc(nr_blks * sizeof(MPI_Aint), MPL_MEM_DATATYPE); /* --BEGIN ERROR HANDLING-- */ if (!tmp_disps) { DLOOP_Free(tmp_blklens); MPIR_Segment_free(segp); return DLOOP_Dataloop_create_struct_memory_error(); } /* --END ERROR HANDLING-- */ /* use segment code again to flatten the type */ first_ind = 0; for (i=0; i < count; i++) { int is_basic; DLOOP_Count sz = -1; is_basic = (DLOOP_Handle_hasloop_macro(oldtypes[i])) ? 0 : 1; if (!is_basic) DLOOP_Handle_get_size_macro(oldtypes[i], sz); /* we're going to use the segment code to flatten the type. * we put in our displacement as the buffer location, and use * the blocklength as the count value to get N contiguous copies * of the type. * * Note that we're going to get back values in bytes, so that will * be our new element type. */ if (oldtypes[i] != MPI_UB && oldtypes[i] != MPI_LB && blklens[i] != 0 && (is_basic || sz > 0)) { err = MPIR_Segment_init((char *) DLOOP_OFFSET_CAST_TO_VOID_PTR disps[i], (DLOOP_Count) blklens[i], oldtypes[i], segp, 0 /* homogeneous */); if (err) return err; last_ind = nr_blks - first_ind; bytes = SEGMENT_IGNORE_LAST; MPIR_Segment_mpi_flatten(segp, 0, &bytes, &tmp_blklens[first_ind], &tmp_disps[first_ind], &last_ind); if (err) return err; first_ind += last_ind; } } nr_blks = first_ind; #if 0 if (MPL_DBG_SELECTED(MPIR_DBG_DATATYPE,VERBOSE)) { MPL_DBG_OUT(MPIR_DBG_DATATYPE,"--- start of flattened type ---"); for (i=0; i < nr_blks; i++) { MPL_DBG_OUT_FMT(MPIR_DBG_DATATYPE,(MPL_DBG_FDEST, "a[%d] = (%d, " DLOOP_OFFSET_FMT_DEC_SPEC ")", i, tmp_blklens[i], tmp_disps[i])); } MPL_DBG_OUT(MPIR_DBG_DATATYPE,"--- end of flattened type ---"); } #endif MPIR_Segment_free(segp); err = MPIR_Dataloop_create_indexed(nr_blks, tmp_blklens, tmp_disps, 1, /* disp in bytes */ MPI_BYTE, dlp_p, dlsz_p, dldepth_p, flag); DLOOP_Free(tmp_blklens); DLOOP_Free(tmp_disps); return err; }