/* MPID_Leaf_contig_count_block * * Note: because bufp is just an offset, we can ignore it in our * calculations of # of contig regions. */ static int DLOOP_Leaf_contig_count_block(DLOOP_Offset *blocks_p, DLOOP_Type el_type, DLOOP_Offset rel_off, DLOOP_Buffer bufp ATTRIBUTE((unused)), void *v_paramp) { DLOOP_Offset size, el_size; struct PREPEND_PREFIX(contig_blocks_params) *paramp = v_paramp; DLOOP_Assert(*blocks_p > 0); DLOOP_Handle_get_size_macro(el_type, el_size); size = *blocks_p * el_size; #ifdef MPID_SP_VERBOSE MPIU_dbg_printf("contig count block: count = %d, buf+off = %d, lastloc = " DLOOP_OFFSET_FMT_DEC_SPEC "\n", (int) paramp->count, (int) ((char *) bufp + rel_off), paramp->last_loc); #endif if (paramp->count > 0 && rel_off == paramp->last_loc) { /* this region is adjacent to the last */ paramp->last_loc += size; } else { /* new region */ paramp->last_loc = rel_off + size; paramp->count++; } return 0; }
/* DLOOP_Leaf_contig_mpi_flatten * */ static int DLOOP_Leaf_contig_mpi_flatten(DLOOP_Offset *blocks_p, DLOOP_Type el_type, DLOOP_Offset rel_off, void *bufp, void *v_paramp) { int last_idx; DLOOP_Offset size; DLOOP_Offset el_size; char *last_end = NULL; struct PREPEND_PREFIX(mpi_flatten_params) *paramp = v_paramp; DLOOP_Handle_get_size_macro(el_type, el_size); size = *blocks_p * el_size; last_idx = paramp->index - 1; if (last_idx >= 0) { /* Since disps can be negative, we cannot use * DLOOP_Ensure_Offset_fits_in_pointer to verify that disps + * blklens fits in a pointer. Just let it truncate, if the * sizeof a pointer is less than the sizeof an MPI_Aint. */ last_end = (char*) DLOOP_OFFSET_CAST_TO_VOID_PTR (paramp->disps[last_idx] + ((DLOOP_Offset) paramp->blklens[last_idx])); } /* Since bufp can be a displacement and can be negative, we cannot * use DLOOP_Ensure_Offset_fits_in_pointer to ensure the sum fits in * a pointer. Just let it truncate. */ if ((last_idx == paramp->length-1) && (last_end != ((char *) bufp + rel_off))) { /* we have used up all our entries, and this region doesn't fit on * the end of the last one. setting blocks to 0 tells manipulation * function that we are done (and that we didn't process any blocks). */ *blocks_p = 0; return 1; } else if (last_idx >= 0 && (last_end == ((char *) bufp + rel_off))) { /* add this size to the last vector rather than using up another one */ paramp->blklens[last_idx] += size; } else { /* Since bufp can be a displacement and can be negative, we cannot use * DLOOP_VOID_PTR_CAST_TO_OFFSET to cast the sum to a pointer. Just let it * sign extend. */ paramp->disps[last_idx+1] = DLOOP_PTR_DISP_CAST_TO_OFFSET bufp + rel_off; paramp->blklens[last_idx+1] = size; paramp->index++; } return 0; }
/*@ Dataloop_contiguous - create the dataloop representation for a contiguous datatype Input Parameters: + int icount, . DLOOP_Type oldtype - int flag Output Parameters: + DLOOP_Dataloop **dlp_p, . DLOOP_Size *dlsz_p, - int *dldepth_p, .N Errors .N Returns 0 on success, -1 on failure. @*/ int PREPEND_PREFIX(Dataloop_create_contiguous)(DLOOP_Count icount, DLOOP_Type oldtype, DLOOP_Dataloop **dlp_p, DLOOP_Size *dlsz_p, int *dldepth_p, int flag) { DLOOP_Count count; int is_builtin, apply_contig_coalescing = 0; int new_loop_depth; DLOOP_Size new_loop_sz; DLOOP_Dataloop *new_dlp; count = icount; is_builtin = (DLOOP_Handle_hasloop_macro(oldtype)) ? 0 : 1; if (is_builtin) { new_loop_depth = 1; } else { int old_loop_depth = 0; DLOOP_Offset old_size = 0, old_extent = 0; DLOOP_Dataloop *old_loop_ptr; DLOOP_Handle_get_loopdepth_macro(oldtype, old_loop_depth, flag); DLOOP_Handle_get_loopptr_macro(oldtype, old_loop_ptr, flag); DLOOP_Handle_get_size_macro(oldtype, old_size); DLOOP_Handle_get_extent_macro(oldtype, old_extent); /* if we have a simple combination of contigs, coalesce */ if (((old_loop_ptr->kind & DLOOP_KIND_MASK) == DLOOP_KIND_CONTIG) && (old_size == old_extent)) { /* will just copy contig and multiply count */ apply_contig_coalescing = 1; new_loop_depth = old_loop_depth; } else { new_loop_depth = old_loop_depth + 1; } } if (is_builtin) { DLOOP_Offset basic_sz = 0; PREPEND_PREFIX(Dataloop_alloc)(DLOOP_KIND_CONTIG, count, &new_dlp, &new_loop_sz); /* --BEGIN ERROR HANDLING-- */ if (!new_dlp) return -1; /* --END ERROR HANDLING-- */ DLOOP_Handle_get_size_macro(oldtype, basic_sz); new_dlp->kind = DLOOP_KIND_CONTIG | DLOOP_FINAL_MASK; if (flag == DLOOP_DATALOOP_ALL_BYTES) { count *= basic_sz; new_dlp->el_size = 1; new_dlp->el_extent = 1; new_dlp->el_type = MPI_BYTE; } else { new_dlp->el_size = basic_sz; new_dlp->el_extent = new_dlp->el_size; new_dlp->el_type = oldtype; } new_dlp->loop_params.c_t.count = count; } else { /* user-defined base type (oldtype) */ DLOOP_Dataloop *old_loop_ptr; MPI_Aint old_loop_sz = 0; DLOOP_Handle_get_loopptr_macro(oldtype, old_loop_ptr, flag); DLOOP_Handle_get_loopsize_macro(oldtype, old_loop_sz, flag); if (apply_contig_coalescing) { /* make a copy of the old loop and multiply the count */ PREPEND_PREFIX(Dataloop_dup)(old_loop_ptr, old_loop_sz, &new_dlp); /* --BEGIN ERROR HANDLING-- */ if (!new_dlp) return -1; /* --END ERROR HANDLING-- */ new_dlp->loop_params.c_t.count *= count; new_loop_sz = old_loop_sz; DLOOP_Handle_get_loopdepth_macro(oldtype, new_loop_depth, flag); } else { /* allocate space for new loop including copy of old */ PREPEND_PREFIX(Dataloop_alloc_and_copy)(DLOOP_KIND_CONTIG, count, old_loop_ptr, old_loop_sz, &new_dlp, &new_loop_sz); /* --BEGIN ERROR HANDLING-- */ if (!new_dlp) return -1; /* --END ERROR HANDLING-- */ new_dlp->kind = DLOOP_KIND_CONTIG; DLOOP_Handle_get_size_macro(oldtype, new_dlp->el_size); DLOOP_Handle_get_extent_macro(oldtype, new_dlp->el_extent); DLOOP_Handle_get_basic_type_macro(oldtype, new_dlp->el_type); new_dlp->loop_params.c_t.count = count; } } *dlp_p = new_dlp; *dlsz_p = new_loop_sz; *dldepth_p = new_loop_depth; return 0; }
/*@ Dataloop_create_blockindexed - create blockindexed dataloop Arguments: + DLOOP_Count count . void *displacement_array (array of either MPI_Aints or ints) . int displacement_in_bytes (boolean) . MPI_Datatype old_type . DLOOP_Dataloop **output_dataloop_ptr . int output_dataloop_size . int output_dataloop_depth - int flag .N Errors .N Returns 0 on success, -1 on failure. @*/ int MPIR_Dataloop_create_blockindexed(DLOOP_Count icount, DLOOP_Count iblklen, const void *disp_array, int dispinbytes, DLOOP_Type oldtype, DLOOP_Dataloop ** dlp_p, DLOOP_Size * dlsz_p, int *dldepth_p, int flag) { int err, is_builtin, is_vectorizable = 1; int i, old_loop_depth; DLOOP_Size new_loop_sz; DLOOP_Count contig_count, count, blklen; DLOOP_Offset old_extent, eff_disp0, eff_disp1, last_stride; DLOOP_Dataloop *new_dlp; count = (DLOOP_Count) icount; /* avoid subsequent casting */ blklen = (DLOOP_Count) iblklen; /* if count or blklen are zero, handle with contig code, call it a int */ if (count == 0 || blklen == 0) { err = MPIR_Dataloop_create_contiguous(0, MPI_INT, dlp_p, dlsz_p, dldepth_p, flag); return err; } is_builtin = (DLOOP_Handle_hasloop_macro(oldtype)) ? 0 : 1; if (is_builtin) { DLOOP_Handle_get_size_macro(oldtype, old_extent); old_loop_depth = 0; } else { DLOOP_Handle_get_extent_macro(oldtype, old_extent); DLOOP_Handle_get_loopdepth_macro(oldtype, old_loop_depth); } contig_count = MPIR_Type_blockindexed_count_contig(count, blklen, disp_array, dispinbytes, old_extent); /* optimization: * * if contig_count == 1 and block starts at displacement 0, * store it as a contiguous rather than a blockindexed dataloop. */ if ((contig_count == 1) && ((!dispinbytes && ((int *) disp_array)[0] == 0) || (dispinbytes && ((MPI_Aint *) disp_array)[0] == 0))) { err = MPIR_Dataloop_create_contiguous(icount * iblklen, oldtype, dlp_p, dlsz_p, dldepth_p, flag); return err; } /* optimization: * * if contig_count == 1 store it as a blockindexed with one * element rather than as a lot of individual blocks. */ if (contig_count == 1) { /* adjust count and blklen and drop through */ blklen *= count; count = 1; iblklen *= icount; icount = 1; } /* optimization: * * if displacements start at zero and result in a fixed stride, * store it as a vector rather than a blockindexed dataloop. */ eff_disp0 = (dispinbytes) ? ((DLOOP_Offset) ((MPI_Aint *) disp_array)[0]) : (((DLOOP_Offset) ((int *) disp_array)[0]) * old_extent); if (count > 1 && eff_disp0 == (DLOOP_Offset) 0) { eff_disp1 = (dispinbytes) ? ((DLOOP_Offset) ((MPI_Aint *) disp_array)[1]) : (((DLOOP_Offset) ((int *) disp_array)[1]) * old_extent); last_stride = eff_disp1 - eff_disp0; for (i = 2; i < count; i++) { eff_disp0 = eff_disp1; eff_disp1 = (dispinbytes) ? ((DLOOP_Offset) ((MPI_Aint *) disp_array)[i]) : (((DLOOP_Offset) ((int *) disp_array)[i]) * old_extent); if (eff_disp1 - eff_disp0 != last_stride) { is_vectorizable = 0; break; } } if (is_vectorizable) { err = MPIR_Dataloop_create_vector(count, blklen, last_stride, 1, /* strideinbytes */ oldtype, dlp_p, dlsz_p, dldepth_p, flag); return err; } } /* TODO: optimization: * * if displacements result in a fixed stride, but first displacement * is not zero, store it as a blockindexed (blklen == 1) of a vector. */ /* TODO: optimization: * * if a blockindexed of a contig, absorb the contig into the blocklen * parameter and keep the same overall depth */ /* otherwise storing as a blockindexed dataloop */ /* Q: HOW CAN WE TELL IF IT IS WORTH IT TO STORE AS AN * INDEXED WITH FEWER CONTIG BLOCKS (IF CONTIG_COUNT IS SMALL)? */ if (is_builtin) { MPIR_Dataloop_alloc(DLOOP_KIND_BLOCKINDEXED, count, &new_dlp, &new_loop_sz); /* --BEGIN ERROR HANDLING-- */ if (!new_dlp) return -1; /* --END ERROR HANDLING-- */ new_dlp->kind = DLOOP_KIND_BLOCKINDEXED | DLOOP_FINAL_MASK; if (flag == DLOOP_DATALOOP_ALL_BYTES) { blklen *= old_extent; new_dlp->el_size = 1; new_dlp->el_extent = 1; new_dlp->el_type = MPI_BYTE; } else { new_dlp->el_size = old_extent; new_dlp->el_extent = old_extent; new_dlp->el_type = oldtype; } } else { DLOOP_Dataloop *old_loop_ptr = NULL; MPI_Aint old_loop_sz = 0; DLOOP_Handle_get_loopptr_macro(oldtype, old_loop_ptr); DLOOP_Handle_get_loopsize_macro(oldtype, old_loop_sz); MPIR_Dataloop_alloc_and_copy(DLOOP_KIND_BLOCKINDEXED, count, old_loop_ptr, old_loop_sz, &new_dlp, &new_loop_sz); /* --BEGIN ERROR HANDLING-- */ if (!new_dlp) return -1; /* --END ERROR HANDLING-- */ new_dlp->kind = DLOOP_KIND_BLOCKINDEXED; DLOOP_Handle_get_size_macro(oldtype, new_dlp->el_size); DLOOP_Handle_get_extent_macro(oldtype, new_dlp->el_extent); DLOOP_Handle_get_basic_type_macro(oldtype, new_dlp->el_type); } new_dlp->loop_params.bi_t.count = count; new_dlp->loop_params.bi_t.blocksize = blklen; /* copy in displacement parameters * * regardless of dispinbytes, we store displacements in bytes in loop. */ DLOOP_Type_blockindexed_array_copy(count, disp_array, new_dlp->loop_params.bi_t.offset_array, dispinbytes, old_extent); *dlp_p = new_dlp; *dlsz_p = new_loop_sz; *dldepth_p = old_loop_depth + 1; return 0; }
int PREPEND_PREFIX(Dataloop_create_indexed)(DLOOP_Count icount, const DLOOP_Size *blocklength_array, const void *displacement_array, int dispinbytes, MPI_Datatype oldtype, DLOOP_Dataloop **dlp_p, DLOOP_Size *dlsz_p, int *dldepth_p, int flag) { int err, is_builtin; int old_loop_depth; MPI_Aint i; DLOOP_Size new_loop_sz, blksz; DLOOP_Count first; DLOOP_Count old_type_count = 0, contig_count, count; DLOOP_Offset old_extent; struct DLOOP_Dataloop *new_dlp; count = (DLOOP_Count) icount; /* avoid subsequent casting */ /* if count is zero, handle with contig code, call it an int */ if (count == 0) { err = PREPEND_PREFIX(Dataloop_create_contiguous)(0, MPI_INT, dlp_p, dlsz_p, dldepth_p, flag); return err; } /* Skip any initial zero-length blocks */ for (first = 0; first < count; first++) if ((DLOOP_Count) blocklength_array[first]) break; is_builtin = (DLOOP_Handle_hasloop_macro(oldtype)) ? 0 : 1; if (is_builtin) { DLOOP_Handle_get_extent_macro(oldtype, old_extent); old_loop_depth = 0; } else { DLOOP_Handle_get_extent_macro(oldtype, old_extent); DLOOP_Handle_get_loopdepth_macro(oldtype, old_loop_depth, flag); } for (i=first; i < count; i++) { old_type_count += (DLOOP_Count) blocklength_array[i]; } contig_count = PREPEND_PREFIX(Type_indexed_count_contig)(count, blocklength_array, displacement_array, dispinbytes, old_extent); /* if contig_count is zero (no data), handle with contig code */ if (contig_count == 0) { err = PREPEND_PREFIX(Dataloop_create_contiguous)(0, MPI_INT, dlp_p, dlsz_p, dldepth_p, flag); return err; } /* optimization: * * if contig_count == 1 and block starts at displacement 0, * store it as a contiguous rather than an indexed dataloop. */ if ((contig_count == 1) && ((!dispinbytes && ((int *) displacement_array)[first] == 0) || (dispinbytes && ((MPI_Aint *) displacement_array)[first] == 0))) { err = PREPEND_PREFIX(Dataloop_create_contiguous)(old_type_count, oldtype, dlp_p, dlsz_p, dldepth_p, flag); return err; } /* optimization: * * if contig_count == 1 (and displacement != 0), store this as * a single element blockindexed rather than a lot of individual * blocks. */ if (contig_count == 1) { const void *disp_arr_tmp; /* no ternary assignment to avoid clang warnings */ if (dispinbytes) disp_arr_tmp = &(((const MPI_Aint *)displacement_array)[first]); else disp_arr_tmp = &(((const int *)displacement_array)[first]); err = PREPEND_PREFIX(Dataloop_create_blockindexed)(1, old_type_count, disp_arr_tmp, dispinbytes, oldtype, dlp_p, dlsz_p, dldepth_p, flag); return err; } /* optimization: * * if block length is the same for all blocks, store it as a * blockindexed rather than an indexed dataloop. */ blksz = blocklength_array[first]; for (i = first+1; i < count; i++) { if (blocklength_array[i] != blksz) { blksz--; break; } } if (blksz == blocklength_array[first]) { const void *disp_arr_tmp; /* no ternary assignment to avoid clang warnings */ if (dispinbytes) disp_arr_tmp = &(((const MPI_Aint *)displacement_array)[first]); else disp_arr_tmp = &(((const int *)displacement_array)[first]); err = PREPEND_PREFIX(Dataloop_create_blockindexed)(icount-first, blksz, disp_arr_tmp, dispinbytes, oldtype, dlp_p, dlsz_p, dldepth_p, flag); return err; } /* note: blockindexed looks for the vector optimization */ /* TODO: optimization: * * if an indexed of a contig, absorb the contig into the blocklen array * and keep the same overall depth */ /* otherwise storing as an indexed dataloop */ if (is_builtin) { PREPEND_PREFIX(Dataloop_alloc)(DLOOP_KIND_INDEXED, count, &new_dlp, &new_loop_sz); /* --BEGIN ERROR HANDLING-- */ if (!new_dlp) return -1; /* --END ERROR HANDLING-- */ new_dlp->kind = DLOOP_KIND_INDEXED | DLOOP_FINAL_MASK; if (flag == DLOOP_DATALOOP_ALL_BYTES) { /* blocklengths are modified below */ new_dlp->el_size = 1; new_dlp->el_extent = 1; new_dlp->el_type = MPI_BYTE; } else { new_dlp->el_size = old_extent; new_dlp->el_extent = old_extent; new_dlp->el_type = oldtype; } } else { DLOOP_Dataloop *old_loop_ptr = NULL; MPI_Aint old_loop_sz = 0; DLOOP_Handle_get_loopptr_macro(oldtype, old_loop_ptr, flag); DLOOP_Handle_get_loopsize_macro(oldtype, old_loop_sz, flag); PREPEND_PREFIX(Dataloop_alloc_and_copy)(DLOOP_KIND_INDEXED, contig_count, old_loop_ptr, old_loop_sz, &new_dlp, &new_loop_sz); /* --BEGIN ERROR HANDLING-- */ if (!new_dlp) return -1; /* --END ERROR HANDLING-- */ new_dlp->kind = DLOOP_KIND_INDEXED; DLOOP_Handle_get_size_macro(oldtype, new_dlp->el_size); DLOOP_Handle_get_extent_macro(oldtype, new_dlp->el_extent); DLOOP_Handle_get_basic_type_macro(oldtype, new_dlp->el_type); } new_dlp->loop_params.i_t.count = contig_count; new_dlp->loop_params.i_t.total_blocks = old_type_count; /* copy in blocklength and displacement parameters (in that order) * * regardless of dispinbytes, we store displacements in bytes in loop. */ DLOOP_Type_indexed_array_copy(count, contig_count, blocklength_array, displacement_array, new_dlp->loop_params.i_t.blocksize_array, new_dlp->loop_params.i_t.offset_array, dispinbytes, old_extent); if (is_builtin && (flag == DLOOP_DATALOOP_ALL_BYTES)) { DLOOP_Count *tmp_blklen_array = new_dlp->loop_params.i_t.blocksize_array; for (i=0; i < contig_count; i++) { /* increase block lengths so they are in bytes */ tmp_blklen_array[i] *= old_extent; } new_dlp->loop_params.i_t.total_blocks *= old_extent; } *dlp_p = new_dlp; *dlsz_p = new_loop_sz; *dldepth_p = old_loop_depth + 1; return MPI_SUCCESS; }
static int DLOOP_Leaf_index_mpi_flatten(DLOOP_Offset *blocks_p, DLOOP_Count count, DLOOP_Count *blockarray, DLOOP_Offset *offsetarray, DLOOP_Type el_type, DLOOP_Offset rel_off, void *bufp, void *v_paramp) { int i; DLOOP_Size size, blocks_left; DLOOP_Offset el_size; struct PREPEND_PREFIX(mpi_flatten_params) *paramp = v_paramp; DLOOP_Handle_get_size_macro(el_type, el_size); blocks_left = *blocks_p; for (i=0; i < count && blocks_left > 0; i++) { int last_idx; char *last_end = NULL; if (blocks_left > blockarray[i]) { size = blockarray[i] * el_size; blocks_left -= blockarray[i]; } else { /* last pass */ size = blocks_left * el_size; blocks_left = 0; } last_idx = paramp->index - 1; if (last_idx >= 0) { /* Since disps can be negative, we cannot use * DLOOP_Ensure_Offset_fits_in_pointer to verify that disps + * blklens fits in a pointer. Nor can we use * DLOOP_OFFSET_CAST_TO_VOID_PTR to cast the sum to a pointer. * Just let it truncate, if the sizeof a pointer is less * than the sizeof an MPI_Aint. */ last_end = (char *) DLOOP_OFFSET_CAST_TO_VOID_PTR (paramp->disps[last_idx] + (MPI_Aint)(paramp->blklens[last_idx])); } /* Since bufp can be a displacement and can be negative, we * cannot use DLOOP_Ensure_Offset_fits_in_pointer to ensure the * sum fits in a pointer. Just let it truncate. */ if ((last_idx == paramp->length-1) && (last_end != ((char *) bufp + rel_off + offsetarray[i]))) { /* we have used up all our entries, and this one doesn't fit on * the end of the last one. */ *blocks_p -= (blocks_left + (size / el_size)); return 1; } else if (last_idx >= 0 && (last_end == ((char *) bufp + rel_off + offsetarray[i]))) { /* add this size to the last vector rather than using up new one */ paramp->blklens[last_idx] += size; } else { /* Since bufp can be a displacement and can be negative, we cannot * use DLOOP_VOID_PTR_CAST_TO_OFFSET to cast the sum to a pointer. * Just let it sign extend. */ paramp->disps[last_idx+1] = DLOOP_PTR_DISP_CAST_TO_OFFSET bufp + rel_off + offsetarray[i]; paramp->blklens[last_idx+1] = size; /* these blocks are in bytes */ paramp->index++; } } /* if we get here then we processed ALL the blocks; don't need to update * blocks_p */ DLOOP_Assert(blocks_left == 0); return 0; }
/* DLOOP_Leaf_vector_mpi_flatten * * Input Parameters: * blocks_p - [inout] pointer to a count of blocks (total, for all noncontiguous pieces) * count - # of noncontiguous regions * blksz - size of each noncontiguous region * stride - distance in bytes from start of one region to start of next * el_type - elemental type (e.g. MPI_INT) * ... * * Note: this is only called when the starting position is at the beginning * of a whole block in a vector type. * * TODO: MAKE THIS CODE SMARTER, USING THE SAME GENERAL APPROACH AS IN THE * COUNT BLOCK CODE ABOVE. */ static int DLOOP_Leaf_vector_mpi_flatten(DLOOP_Offset *blocks_p, DLOOP_Count count, DLOOP_Count blksz, DLOOP_Offset stride, DLOOP_Type el_type, DLOOP_Offset rel_off, /* offset into buffer */ void *bufp, /* start of buffer */ void *v_paramp) { int i; DLOOP_Size size, blocks_left; DLOOP_Offset el_size; struct PREPEND_PREFIX(mpi_flatten_params) *paramp = v_paramp; DLOOP_Handle_get_size_macro(el_type, el_size); blocks_left = *blocks_p; for (i=0; i < count && blocks_left > 0; i++) { int last_idx; char *last_end = NULL; if (blocks_left > blksz) { size = blksz * el_size; blocks_left -= blksz; } else { /* last pass */ size = blocks_left * el_size; blocks_left = 0; } last_idx = paramp->index - 1; if (last_idx >= 0) { /* Since disps can be negative, we cannot use * DLOOP_Ensure_Offset_fits_in_pointer to verify that disps + * blklens fits in a pointer. Nor can we use * DLOOP_OFFSET_CAST_TO_VOID_PTR to cast the sum to a pointer. * Just let it truncate, if the sizeof a pointer is less * than the sizeof an MPI_Aint. */ last_end = (char *) DLOOP_OFFSET_CAST_TO_VOID_PTR (paramp->disps[last_idx] + (MPI_Aint)(paramp->blklens[last_idx])); } /* Since bufp can be a displacement and can be negative, we cannot use * DLOOP_Ensure_Offset_fits_in_pointer to ensure the sum fits in a pointer. * Just let it truncate. */ if ((last_idx == paramp->length-1) && (last_end != ((char *) bufp + rel_off))) { /* we have used up all our entries, and this one doesn't fit on * the end of the last one. */ *blocks_p -= (blocks_left + (size / el_size)); #ifdef MPID_SP_VERBOSE MPL_DBG_MSG_FMT(MPIR_DBG_DATATYPE,VERBOSE,(MPL_DBG_FDEST,"\t[vector to vec exiting (1): next ind = %d, " DLOOP_OFFSET_FMT_DEC_SPEC " blocks processed.\n", paramp->u.pack_vector.index, *blocks_p)); #endif return 1; } else if (last_idx >= 0 && (last_end == ((char *) bufp + rel_off))) { /* add this size to the last vector rather than using up new one */ paramp->blklens[last_idx] += size; } else { /* Since bufp can be a displacement and can be negative, we cannot use * DLOOP_VOID_PTR_CAST_TO_OFFSET to cast the sum to a pointer. Just let it * sign extend. */ paramp->disps[last_idx+1] = DLOOP_PTR_DISP_CAST_TO_OFFSET bufp + rel_off; paramp->blklens[last_idx+1] = size; paramp->index++; } rel_off += stride; } #ifdef MPID_SP_VERBOSE MPL_DBG_MSG_FMT(MPIR_DBG_DATATYPE,VERBOSE,(MPL_DBG_FDEST,"\t[vector to vec exiting (2): next ind = %d, " DLOOP_OFFSET_FMT_DEC_SPEC " blocks processed.\n", paramp->u.pack_vector.index, *blocks_p)); #endif /* if we get here then we processed ALL the blocks; don't need to update * blocks_p */ DLOOP_Assert(blocks_left == 0); return 0; }
static int DLOOP_Dataloop_create_flattened_struct(DLOOP_Count count, const int *blklens, const MPI_Aint *disps, const DLOOP_Type *oldtypes, DLOOP_Dataloop **dlp_p, MPI_Aint *dlsz_p, int *dldepth_p, int flag) { /* arbitrary types, convert to bytes and use indexed */ int i, err, nr_blks = 0; DLOOP_Size *tmp_blklens; MPI_Aint *tmp_disps; /* since we're calling another fn that takes this type as an input parameter */ DLOOP_Offset bytes; DLOOP_Segment *segp; int first_ind; DLOOP_Size last_ind; segp = MPIR_Segment_alloc(); /* --BEGIN ERROR HANDLING-- */ if (!segp) { return DLOOP_Dataloop_create_struct_memory_error(); } /* --END ERROR HANDLING-- */ /* use segment code once to count contiguous regions */ for (i=0; i < count; i++) { int is_basic; /* ignore type elements with a zero blklen */ if (blklens[i] == 0) continue; is_basic = (DLOOP_Handle_hasloop_macro(oldtypes[i])) ? 0 : 1; if (is_basic && (oldtypes[i] != MPI_LB && oldtypes[i] != MPI_UB)) { nr_blks++; } else /* derived type; get a count of contig blocks */ { DLOOP_Count tmp_nr_blks, sz; DLOOP_Handle_get_size_macro(oldtypes[i], sz); /* if the derived type has some data to contribute, * add to flattened representation */ if (sz > 0) { err = MPIR_Segment_init(NULL, (DLOOP_Count) blklens[i], oldtypes[i], segp, flag); if (err) return err; bytes = SEGMENT_IGNORE_LAST; MPIR_Segment_count_contig_blocks(segp, 0, &bytes, &tmp_nr_blks); nr_blks += tmp_nr_blks; } } } /* it's possible for us to get to this point only to realize that * there isn't any data in this type. in that case do what we always * do: store a simple contig of zero ints and call it done. */ if (nr_blks == 0) { MPIR_Segment_free(segp); err = MPIR_Dataloop_create_contiguous(0, MPI_INT, dlp_p, dlsz_p, dldepth_p, flag); return err; } nr_blks += 2; /* safety measure */ tmp_blklens = (DLOOP_Size *) DLOOP_Malloc(nr_blks * sizeof(DLOOP_Size), MPL_MEM_DATATYPE); /* --BEGIN ERROR HANDLING-- */ if (!tmp_blklens) { MPIR_Segment_free(segp); return DLOOP_Dataloop_create_struct_memory_error(); } /* --END ERROR HANDLING-- */ tmp_disps = (MPI_Aint *) DLOOP_Malloc(nr_blks * sizeof(MPI_Aint), MPL_MEM_DATATYPE); /* --BEGIN ERROR HANDLING-- */ if (!tmp_disps) { DLOOP_Free(tmp_blklens); MPIR_Segment_free(segp); return DLOOP_Dataloop_create_struct_memory_error(); } /* --END ERROR HANDLING-- */ /* use segment code again to flatten the type */ first_ind = 0; for (i=0; i < count; i++) { int is_basic; DLOOP_Count sz = -1; is_basic = (DLOOP_Handle_hasloop_macro(oldtypes[i])) ? 0 : 1; if (!is_basic) DLOOP_Handle_get_size_macro(oldtypes[i], sz); /* we're going to use the segment code to flatten the type. * we put in our displacement as the buffer location, and use * the blocklength as the count value to get N contiguous copies * of the type. * * Note that we're going to get back values in bytes, so that will * be our new element type. */ if (oldtypes[i] != MPI_UB && oldtypes[i] != MPI_LB && blklens[i] != 0 && (is_basic || sz > 0)) { err = MPIR_Segment_init((char *) DLOOP_OFFSET_CAST_TO_VOID_PTR disps[i], (DLOOP_Count) blklens[i], oldtypes[i], segp, 0 /* homogeneous */); if (err) return err; last_ind = nr_blks - first_ind; bytes = SEGMENT_IGNORE_LAST; MPIR_Segment_mpi_flatten(segp, 0, &bytes, &tmp_blklens[first_ind], &tmp_disps[first_ind], &last_ind); if (err) return err; first_ind += last_ind; } } nr_blks = first_ind; #if 0 if (MPL_DBG_SELECTED(MPIR_DBG_DATATYPE,VERBOSE)) { MPL_DBG_OUT(MPIR_DBG_DATATYPE,"--- start of flattened type ---"); for (i=0; i < nr_blks; i++) { MPL_DBG_OUT_FMT(MPIR_DBG_DATATYPE,(MPL_DBG_FDEST, "a[%d] = (%d, " DLOOP_OFFSET_FMT_DEC_SPEC ")", i, tmp_blklens[i], tmp_disps[i])); } MPL_DBG_OUT(MPIR_DBG_DATATYPE,"--- end of flattened type ---"); } #endif MPIR_Segment_free(segp); err = MPIR_Dataloop_create_indexed(nr_blks, tmp_blklens, tmp_disps, 1, /* disp in bytes */ MPI_BYTE, dlp_p, dlsz_p, dldepth_p, flag); DLOOP_Free(tmp_blklens); DLOOP_Free(tmp_disps); return err; }
static int DLOOP_Dataloop_create_basic_all_bytes_struct( DLOOP_Count count, const int *blklens, const MPI_Aint *disps, const DLOOP_Type *oldtypes, DLOOP_Dataloop **dlp_p, MPI_Aint *dlsz_p, int *dldepth_p, int flag) { int i, err, cur_pos = 0; DLOOP_Size *tmp_blklens; MPI_Aint *tmp_disps; /* count is an upper bound on number of type instances */ tmp_blklens = (DLOOP_Size *) DLOOP_Malloc(count * sizeof(DLOOP_Size), MPL_MEM_DATATYPE); /* --BEGIN ERROR HANDLING-- */ if (!tmp_blklens) { return DLOOP_Dataloop_create_struct_memory_error(); } /* --END ERROR HANDLING-- */ tmp_disps = (MPI_Aint *) DLOOP_Malloc(count * sizeof(MPI_Aint), MPL_MEM_DATATYPE); /* --BEGIN ERROR HANDLING-- */ if (!tmp_disps) { DLOOP_Free(tmp_blklens); return DLOOP_Dataloop_create_struct_memory_error(); } /* --END ERROR HANDLING-- */ for (i=0; i < count; i++) { if (oldtypes[i] != MPI_LB && oldtypes[i] != MPI_UB && blklens[i] != 0) { DLOOP_Offset sz; DLOOP_Handle_get_size_macro(oldtypes[i], sz); tmp_blklens[cur_pos] = (int) sz * blklens[i]; tmp_disps[cur_pos] = disps[i]; cur_pos++; } } err = MPIR_Dataloop_create_indexed(cur_pos, tmp_blklens, tmp_disps, 1, /* disp in bytes */ MPI_BYTE, dlp_p, dlsz_p, dldepth_p, flag); DLOOP_Free(tmp_blklens); DLOOP_Free(tmp_disps); return err; }
/*@ Dataloop_create_vector Arguments: + int icount . int iblocklength . MPI_Aint astride . int strideinbytes . MPI_Datatype oldtype . DLOOP_Dataloop **dlp_p . int *dlsz_p . int *dldepth_p - int flag Returns 0 on success, -1 on failure. @*/ int PREPEND_PREFIX(Dataloop_create_vector)(int icount, int iblocklength, MPI_Aint astride, int strideinbytes, DLOOP_Type oldtype, DLOOP_Dataloop **dlp_p, int *dlsz_p, int *dldepth_p, int flag) { int err, is_builtin; int new_loop_sz, new_loop_depth; DLOOP_Count count, blocklength; DLOOP_Offset stride; DLOOP_Dataloop *new_dlp; count = (DLOOP_Count) icount; /* avoid subsequent casting */ blocklength = (DLOOP_Count) iblocklength; stride = (DLOOP_Offset) astride; /* if count or blocklength are zero, handle with contig code, * call it a int */ if (count == 0 || blocklength == 0) { err = PREPEND_PREFIX(Dataloop_create_contiguous)(0, MPI_INT, dlp_p, dlsz_p, dldepth_p, flag); return err; } /* optimization: * * if count == 1, store as a contiguous rather than a vector dataloop. */ if (count == 1) { err = PREPEND_PREFIX(Dataloop_create_contiguous)(iblocklength, oldtype, dlp_p, dlsz_p, dldepth_p, flag); return err; } is_builtin = (DLOOP_Handle_hasloop_macro(oldtype)) ? 0 : 1; if (is_builtin) { new_loop_sz = sizeof(DLOOP_Dataloop); new_loop_depth = 1; } else { int old_loop_sz = 0, old_loop_depth = 0; DLOOP_Handle_get_loopsize_macro(oldtype, old_loop_sz, flag); DLOOP_Handle_get_loopdepth_macro(oldtype, old_loop_depth, flag); /* TODO: ACCOUNT FOR PADDING IN LOOP_SZ HERE */ new_loop_sz = sizeof(DLOOP_Dataloop) + old_loop_sz; new_loop_depth = old_loop_depth + 1; } if (is_builtin) { DLOOP_Offset basic_sz = 0; PREPEND_PREFIX(Dataloop_alloc)(DLOOP_KIND_VECTOR, count, &new_dlp, &new_loop_sz); /* --BEGIN ERROR HANDLING-- */ if (!new_dlp) return -1; /* --END ERROR HANDLING-- */ DLOOP_Handle_get_size_macro(oldtype, basic_sz); new_dlp->kind = DLOOP_KIND_VECTOR | DLOOP_FINAL_MASK; if (flag == DLOOP_DATALOOP_ALL_BYTES) { blocklength *= basic_sz; new_dlp->el_size = 1; new_dlp->el_extent = 1; new_dlp->el_type = MPI_BYTE; if(!strideinbytes) /* the stride was specified in units of oldtype, now that we're using bytes, rather than oldtype, we need to update stride. */ stride *= basic_sz; } else { new_dlp->el_size = basic_sz; new_dlp->el_extent = new_dlp->el_size; new_dlp->el_type = oldtype; } } else { /* user-defined base type (oldtype) */ DLOOP_Dataloop *old_loop_ptr; int old_loop_sz = 0; DLOOP_Handle_get_loopptr_macro(oldtype, old_loop_ptr, flag); DLOOP_Handle_get_loopsize_macro(oldtype, old_loop_sz, flag); PREPEND_PREFIX(Dataloop_alloc_and_copy)(DLOOP_KIND_VECTOR, count, old_loop_ptr, old_loop_sz, &new_dlp, &new_loop_sz); /* --BEGIN ERROR HANDLING-- */ if (!new_dlp) return -1; /* --END ERROR HANDLING-- */ new_dlp->kind = DLOOP_KIND_VECTOR; DLOOP_Handle_get_size_macro(oldtype, new_dlp->el_size); DLOOP_Handle_get_extent_macro(oldtype, new_dlp->el_extent); DLOOP_Handle_get_basic_type_macro(oldtype, new_dlp->el_type); } /* vector-specific members * * stride stored in dataloop is always in bytes for local rep of type */ new_dlp->loop_params.v_t.count = count; new_dlp->loop_params.v_t.blocksize = blocklength; new_dlp->loop_params.v_t.stride = (strideinbytes) ? stride : stride * new_dlp->el_extent; *dlp_p = new_dlp; *dlsz_p = new_loop_sz; *dldepth_p = new_loop_depth; return 0; }