/* MPID_Segment_mpi_flatten - flatten a type into a representation * appropriate for passing to hindexed create. * * NOTE: blocks will be in units of bytes when returned. * * WARNING: there's potential for overflow here as we convert from * various types into an index of bytes. * * Parameters: * segp - pointer to segment structure * first - first byte in segment to pack * lastp - in/out parameter describing last byte to pack (and afterwards * the last byte _actually_ packed) * NOTE: actually returns index of byte _after_ last one packed * blklens, disps - the usual blocklength and displacement arrays for MPI * lengthp - in/out parameter describing length of array (and afterwards * the amount of the array that has actual data) */ void PREPEND_PREFIX(Segment_mpi_flatten)(DLOOP_Segment *segp, DLOOP_Offset first, DLOOP_Offset *lastp, DLOOP_Size *blklens, MPI_Aint *disps, DLOOP_Size *lengthp) { struct PREPEND_PREFIX(mpi_flatten_params) params; DLOOP_Assert(*lengthp > 0); params.index = 0; params.length = *lengthp; params.blklens = blklens; params.disps = disps; PREPEND_PREFIX(Segment_manipulate)(segp, first, lastp, DLOOP_Leaf_contig_mpi_flatten, DLOOP_Leaf_vector_mpi_flatten, DLOOP_Leaf_blkidx_mpi_flatten, DLOOP_Leaf_index_mpi_flatten, NULL, ¶ms); /* last value already handled by MPID_Segment_manipulate */ *lengthp = params.index; return; }
/* MPID_Leaf_contig_count_block * * Note: because bufp is just an offset, we can ignore it in our * calculations of # of contig regions. */ static int DLOOP_Leaf_contig_count_block(DLOOP_Offset *blocks_p, DLOOP_Type el_type, DLOOP_Offset rel_off, DLOOP_Buffer bufp ATTRIBUTE((unused)), void *v_paramp) { DLOOP_Offset size, el_size; struct PREPEND_PREFIX(contig_blocks_params) *paramp = v_paramp; DLOOP_Assert(*blocks_p > 0); DLOOP_Handle_get_size_macro(el_type, el_size); size = *blocks_p * el_size; #ifdef MPID_SP_VERBOSE MPIU_dbg_printf("contig count block: count = %d, buf+off = %d, lastloc = " DLOOP_OFFSET_FMT_DEC_SPEC "\n", (int) paramp->count, (int) ((char *) bufp + rel_off), paramp->last_loc); #endif if (paramp->count > 0 && rel_off == paramp->last_loc) { /* this region is adjacent to the last */ paramp->last_loc += size; } else { /* new region */ paramp->last_loc = rel_off + size; paramp->count++; } return 0; }
/* from MPICH PAC_C_MAX_DOUBLE_FP_ALIGN test: * * Determines maximum struct alignment with floats and doubles. * * Return value is 1, 2, 4, or 8. */ static int DLOOP_Structalign_double_max() { int is_packed = 1; int is_two = 1; int is_four = 1; int is_eight = 1; struct { char a; double b; } char_double; struct { double b; char a; } double_char; int size, extent1, extent2; size = sizeof(char) + sizeof(double); extent1 = sizeof(char_double); extent2 = sizeof(double_char); if (size != extent1) is_packed = 0; if ((extent1 % 2) != 0 && (extent2 % 2) != 0) is_two = 0; if ((extent1 % 4) != 0 && (extent2 % 4) != 0) is_four = 0; if (sizeof(double) == 8 && (extent1 % 8) != 0 && (extent2 % 8) != 0) is_eight = 0; if (is_eight) { is_four = 0; is_two = 0; } if (is_four) is_two = 0; DLOOP_Assert(is_packed + is_two + is_four + is_eight == 1); if (is_packed) return 1; if (is_two) return 2; if (is_four) return 4; return 8; }
/* from MPICH PAC_C_MAX_FP_ALIGN test: * * Checks for max C struct floating point alignment. Note that * in this test we are *only* testing float types, whereas in * the original test we were testing double and long double also. * * Return value is 1, 2, 4, 8, or 16. */ static int DLOOP_Structalign_float_max() { int is_packed = 1; int is_two = 1; int is_four = 1; int is_eight = 1; int is_sixteen = 1; struct { char a; float b; } char_float; struct { float b; char a; } float_char; int size, extent1, extent2; size = sizeof(char) + sizeof(float); extent1 = sizeof(char_float); extent2 = sizeof(float_char); if (size != extent1) is_packed = 0; if ((extent1 % 2) != 0 && (extent2 % 2) != 0) is_two = 0; if ((extent1 % 4) != 0 && (extent2 % 4) != 0) is_four = 0; if (sizeof(float) == 8 && (extent1 % 8) != 0 && (extent2 % 8) != 0) is_eight = 0; if (is_sixteen) { is_eight = 0; is_four = 0; is_two = 0; } if (is_eight) { is_four = 0; is_two = 0; } if (is_four) is_two = 0; DLOOP_Assert(is_packed + is_two + is_four + is_eight + is_sixteen == 1); if (is_packed) return 1; if (is_two) return 2; if (is_four) return 4; if (is_eight) return 8; return 16; }
/*@ Dataloop_dup - make a copy of a dataloop Returns 0 on success, -1 on failure. @*/ void PREPEND_PREFIX(Dataloop_dup)(DLOOP_Dataloop *old_loop, DLOOP_Count old_loop_sz, DLOOP_Dataloop **new_loop_p) { DLOOP_Dataloop *new_loop; DLOOP_Assert(old_loop != NULL); DLOOP_Assert(old_loop_sz > 0); new_loop = (DLOOP_Dataloop *) DLOOP_Malloc(old_loop_sz); if (new_loop == NULL) { *new_loop_p = NULL; return; } PREPEND_PREFIX(Dataloop_copy)(new_loop, old_loop, old_loop_sz); *new_loop_p = new_loop; return; }
/*@ Dataloop_create_pairtype - create dataloop for a pairtype Arguments: + MPI_Datatype type - the pairtype . DLOOP_Dataloop **output_dataloop_ptr . int output_dataloop_size . int output_dataloop_depth - int flag .N Errors .N Returns 0 on success, -1 on failure. Note: This function simply creates the appropriate input parameters for use with Dataloop_create_struct and then calls that function. This same function could be used to create dataloops for any type that actually consists of two distinct elements. @*/ int PREPEND_PREFIX(Dataloop_create_pairtype)(MPI_Datatype type, DLOOP_Dataloop **dlp_p, int *dlsz_p, int *dldepth_p, int flag) { int blocks[2] = { 1, 1 }; MPI_Aint disps[2]; MPI_Datatype types[2]; DLOOP_Assert(type == MPI_FLOAT_INT || type == MPI_DOUBLE_INT || type == MPI_LONG_INT || type == MPI_SHORT_INT || type == MPI_LONG_DOUBLE_INT || type == MPI_2INT); switch(type) { case MPI_FLOAT_INT: PAIRTYPE_CONTENTS(MPI_FLOAT, float, MPI_INT, int); break; case MPI_DOUBLE_INT: PAIRTYPE_CONTENTS(MPI_DOUBLE, double, MPI_INT, int); break; case MPI_LONG_INT: PAIRTYPE_CONTENTS(MPI_LONG, long, MPI_INT, int); break; case MPI_SHORT_INT: PAIRTYPE_CONTENTS(MPI_SHORT, short, MPI_INT, int); break; case MPI_LONG_DOUBLE_INT: PAIRTYPE_CONTENTS(MPI_LONG_DOUBLE, long double, MPI_INT, int); break; case MPI_2INT: PAIRTYPE_CONTENTS(MPI_INT, int, MPI_INT, int); break; } return PREPEND_PREFIX(Dataloop_create_struct)(2, blocks, disps, types, dlp_p, dlsz_p, dldepth_p, flag); }
/* from MPICH PAC_C_MAX_LONGDOUBLE_FP_ALIGN test: * * Determines maximum alignment of structs with long doubles. * * Return value is 1, 2, 4, 8, or 16. */ static int DLOOP_Structalign_long_double_max() { int is_packed = 1; int is_two = 1; int is_four = 1; int is_eight = 1; int is_sixteen = 1; struct { char a; long double b; } char_long_double; struct { long double b; char a; } long_double_char; struct { long double a; int b; char c; } long_double_int_char; int size, extent1, extent2; size = sizeof(char) + sizeof(long double); extent1 = sizeof(char_long_double); extent2 = sizeof(long_double_char); if (size != extent1) is_packed = 0; if ((extent1 % 2) != 0 && (extent2 % 2) != 0) is_two = 0; if ((extent1 % 4) != 0 && (extent2 % 4) != 0) is_four = 0; if (sizeof(long double) >= 8 && (extent1 % 8) != 0 && (extent2 % 8) != 0) is_eight = 0; if (sizeof(long double) > 8 && (extent1 % 16) != 0 && (extent2 % 16) != 0) is_sixteen = 0; extent1 = sizeof(long_double_int_char); if ((extent1 % 2) != 0) is_two = 0; if ((extent1 % 4) != 0) is_four = 0; if (sizeof(long double) >= 8 && (extent1 % 8) != 0) is_eight = 0; if (sizeof(long double) > 8 && (extent1 % 16) != 0) is_sixteen = 0; if (is_sixteen) { is_eight = 0; is_four = 0; is_two = 0; } if (is_eight) { is_four = 0; is_two = 0; } if (is_four) is_two = 0; DLOOP_Assert(is_packed + is_two + is_four + is_eight + is_sixteen == 1); if (is_packed) return 1; if (is_two) return 2; if (is_four) return 4; if (is_eight) return 8; return 16; }
int MPIDU_Type_indexed(int count, const int *blocklength_array, const void *displacement_array, int dispinbytes, MPI_Datatype oldtype, MPI_Datatype *newtype) { int mpi_errno = MPI_SUCCESS; int is_builtin, old_is_contig; int i; MPI_Aint contig_count; MPI_Aint el_sz, el_ct, old_ct, old_sz; MPI_Aint old_lb, old_ub, old_extent, old_true_lb, old_true_ub; MPI_Aint min_lb = 0, max_ub = 0, eff_disp; MPI_Datatype el_type; MPIDU_Datatype *new_dtp; if (count == 0) return MPIDU_Type_zerolen(newtype); /* sanity check that blocklens are all non-negative */ for (i = 0; i < count; ++i) { DLOOP_Assert(blocklength_array[i] >= 0); } /* allocate new datatype object and handle */ new_dtp = (MPIDU_Datatype *) MPIR_Handle_obj_alloc(&MPIDU_Datatype_mem); /* --BEGIN ERROR HANDLING-- */ if (!new_dtp) { mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, "MPIDU_Type_indexed", __LINE__, MPI_ERR_OTHER, "**nomem", 0); return mpi_errno; } /* --END ERROR HANDLING-- */ /* handle is filled in by MPIR_Handle_obj_alloc() */ MPIR_Object_set_ref(new_dtp, 1); new_dtp->is_permanent = 0; new_dtp->is_committed = 0; new_dtp->attributes = NULL; new_dtp->cache_id = 0; new_dtp->name[0] = 0; new_dtp->contents = NULL; new_dtp->dataloop = NULL; new_dtp->dataloop_size = -1; new_dtp->dataloop_depth = -1; new_dtp->hetero_dloop = NULL; new_dtp->hetero_dloop_size = -1; new_dtp->hetero_dloop_depth = -1; is_builtin = (HANDLE_GET_KIND(oldtype) == HANDLE_KIND_BUILTIN); if (is_builtin) { /* builtins are handled differently than user-defined types because * they have no associated dataloop or datatype structure. */ el_sz = MPIDU_Datatype_get_basic_size(oldtype); old_sz = el_sz; el_ct = 1; el_type = oldtype; old_lb = 0; old_true_lb = 0; old_ub = (MPI_Aint) el_sz; old_true_ub = (MPI_Aint) el_sz; old_extent = (MPI_Aint) el_sz; old_is_contig = 1; new_dtp->has_sticky_ub = 0; new_dtp->has_sticky_lb = 0; MPIR_Assign_trunc(new_dtp->alignsize, el_sz, MPI_Aint); new_dtp->builtin_element_size = el_sz; new_dtp->basic_type = el_type; new_dtp->max_contig_blocks = count; } else { /* user-defined base type (oldtype) */ MPIDU_Datatype *old_dtp; MPIDU_Datatype_get_ptr(oldtype, old_dtp); /* Ensure that "builtin_element_size" fits into an int datatype. */ MPIR_Ensure_Aint_fits_in_int(old_dtp->builtin_element_size); el_sz = old_dtp->builtin_element_size; old_sz = old_dtp->size; el_ct = old_dtp->n_builtin_elements; el_type = old_dtp->basic_type; old_lb = old_dtp->lb; old_true_lb = old_dtp->true_lb; old_ub = old_dtp->ub; old_true_ub = old_dtp->true_ub; old_extent = old_dtp->extent; old_is_contig = old_dtp->is_contig; new_dtp->has_sticky_lb = old_dtp->has_sticky_lb; new_dtp->has_sticky_ub = old_dtp->has_sticky_ub; new_dtp->builtin_element_size = (MPI_Aint) el_sz; new_dtp->basic_type = el_type; new_dtp->max_contig_blocks = 0; for(i=0; i<count; i++) new_dtp->max_contig_blocks += old_dtp->max_contig_blocks * ((MPI_Aint ) blocklength_array[i]); } /* find the first nonzero blocklength element */ i = 0; while (i < count && blocklength_array[i] == 0) i++; if (i == count) { MPIR_Handle_obj_free(&MPIDU_Datatype_mem, new_dtp); return MPIDU_Type_zerolen(newtype); } /* priming for loop */ old_ct = blocklength_array[i]; eff_disp = (dispinbytes) ? ((MPI_Aint *) displacement_array)[i] : (((MPI_Aint) ((int *) displacement_array)[i]) * old_extent); MPIDU_DATATYPE_BLOCK_LB_UB((MPI_Aint) blocklength_array[i], eff_disp, old_lb, old_ub, old_extent, min_lb, max_ub); /* determine min lb, max ub, and count of old types in remaining * nonzero size blocks */ for (i++; i < count; i++) { MPI_Aint tmp_lb, tmp_ub; if (blocklength_array[i] > 0) { old_ct += blocklength_array[i]; /* add more oldtypes */ eff_disp = (dispinbytes) ? ((MPI_Aint *) displacement_array)[i] : (((MPI_Aint) ((int *) displacement_array)[i]) * old_extent); /* calculate ub and lb for this block */ MPIDU_DATATYPE_BLOCK_LB_UB((MPI_Aint)(blocklength_array[i]), eff_disp, old_lb, old_ub, old_extent, tmp_lb, tmp_ub); if (tmp_lb < min_lb) min_lb = tmp_lb; if (tmp_ub > max_ub) max_ub = tmp_ub; } } new_dtp->size = old_ct * old_sz; new_dtp->lb = min_lb; new_dtp->ub = max_ub; new_dtp->true_lb = min_lb + (old_true_lb - old_lb); new_dtp->true_ub = max_ub + (old_true_ub - old_ub); new_dtp->extent = max_ub - min_lb; new_dtp->n_builtin_elements = old_ct * el_ct; /* new type is only contig for N types if it's all one big * block, its size and extent are the same, and the old type * was also contiguous. */ new_dtp->is_contig = 0; if(old_is_contig) { MPI_Aint *blklens = MPL_malloc(count *sizeof(MPI_Aint)); for (i=0; i<count; i++) blklens[i] = blocklength_array[i]; contig_count = MPIDU_Type_indexed_count_contig(count, blklens, displacement_array, dispinbytes, old_extent); new_dtp->max_contig_blocks = contig_count; if( (contig_count == 1) && ((MPI_Aint) new_dtp->size == new_dtp->extent)) { new_dtp->is_contig = 1; } MPL_free(blklens); } *newtype = new_dtp->handle; return mpi_errno; }
void MPIDU_Type_calc_footprint(MPI_Datatype type, DLOOP_Type_footprint *tfp) { int mpi_errno; int nr_ints, nr_aints, nr_types, combiner; int *ints; MPI_Aint *aints; MPI_Datatype *types; /* used to store parameters for constituent types */ DLOOP_Offset size = 0, lb = 0, ub = 0, true_lb = 0, true_ub = 0; DLOOP_Offset extent = 0, alignsz; int has_sticky_lb, has_sticky_ub; /* used for vector/hvector/hvector_integer calculations */ DLOOP_Offset stride; /* used for indexed/hindexed calculations */ DLOOP_Offset disp; /* used for calculations on types with more than one block of data */ DLOOP_Offset i, min_lb, max_ub, ntypes, tmp_lb, tmp_ub; /* used for processing subarray and darray types */ int ndims; MPI_Datatype tmptype; MPIR_Type_get_envelope(type, &nr_ints, &nr_aints, &nr_types, &combiner); if (combiner == MPI_COMBINER_NAMED) { int mpisize; MPI_Aint mpiextent; MPIR_Datatype_get_size_macro(type, mpisize); MPIR_Datatype_get_extent_macro(type, mpiextent); tfp->size = (DLOOP_Offset) mpisize; tfp->lb = 0; tfp->ub = (DLOOP_Offset) mpiextent; tfp->true_lb = 0; tfp->true_ub = (DLOOP_Offset) mpiextent; tfp->extent = (DLOOP_Offset) mpiextent; tfp->alignsz = DLOOP_Named_type_alignsize(type, (MPI_Aint) 0); tfp->has_sticky_lb = (type == MPI_LB) ? 1 : 0; tfp->has_sticky_ub = (type == MPI_UB) ? 1 : 0; goto clean_exit; } /* get access to contents; need it immediately to check for zero count */ MPIDU_Type_access_contents(type, &ints, &aints, &types); /* knock out all the zero count cases */ if ((combiner == MPI_COMBINER_CONTIGUOUS || combiner == MPI_COMBINER_VECTOR || combiner == MPI_COMBINER_HVECTOR_INTEGER || combiner == MPI_COMBINER_HVECTOR || combiner == MPI_COMBINER_INDEXED_BLOCK || combiner == MPI_COMBINER_HINDEXED_BLOCK || combiner == MPI_COMBINER_INDEXED || combiner == MPI_COMBINER_HINDEXED_INTEGER || combiner == MPI_COMBINER_STRUCT_INTEGER || combiner == MPI_COMBINER_STRUCT) && ints[0] == 0) { tfp->size = tfp->lb = tfp->ub = tfp->extent = tfp->alignsz = 0; tfp->true_lb = tfp->true_ub = 0; tfp->has_sticky_lb = tfp->has_sticky_ub = 0; goto clean_exit; } if (combiner != MPI_COMBINER_STRUCT && combiner != MPI_COMBINER_STRUCT_INTEGER) { DLOOP_Type_footprint cfp; MPIDU_Type_calc_footprint(types[0], &cfp); size = cfp.size; lb = cfp.lb; ub = cfp.ub; true_lb = cfp.true_lb; true_ub = cfp.true_ub; extent = cfp.extent; alignsz = cfp.alignsz; has_sticky_lb = cfp.has_sticky_lb; has_sticky_ub = cfp.has_sticky_ub; /* initialize some common values so we don't have to assign * them in every case below. */ tfp->alignsz = alignsz; tfp->has_sticky_lb = has_sticky_lb; tfp->has_sticky_ub = has_sticky_ub; } switch(combiner) { case MPI_COMBINER_DUP: tfp->size = size; tfp->lb = lb; tfp->ub = ub; tfp->true_lb = true_lb; tfp->true_ub = true_ub; tfp->extent = extent; break; case MPI_COMBINER_RESIZED: tfp->size = size; tfp->lb = aints[0]; /* lb */ tfp->ub = aints[0] + aints[1]; tfp->true_lb = true_lb; tfp->true_ub = true_ub; tfp->extent = aints[1]; /* extent */ tfp->has_sticky_lb = 1; tfp->has_sticky_ub = 1; break; case MPI_COMBINER_CONTIGUOUS: DLOOP_DATATYPE_CONTIG_LB_UB(ints[0] /* count */, lb, ub, extent, tfp->lb, tfp->ub); tfp->true_lb = tfp->lb + (true_lb - lb); tfp->true_ub = tfp->ub + (true_ub - ub); tfp->size = (DLOOP_Offset) ints[0] * size; tfp->extent = tfp->ub - tfp->lb; break; case MPI_COMBINER_VECTOR: case MPI_COMBINER_HVECTOR: case MPI_COMBINER_HVECTOR_INTEGER: if (combiner == MPI_COMBINER_VECTOR) stride = (DLOOP_Offset) ints[2] * extent; else if (combiner == MPI_COMBINER_HVECTOR) stride = aints[0]; else /* HVECTOR_INTEGER */ stride = (DLOOP_Offset) ints[2]; DLOOP_DATATYPE_VECTOR_LB_UB(ints[0] /* count */, stride /* stride in bytes */, ints[1] /* blklen */, lb, ub, extent, tfp->lb, tfp->ub); tfp->true_lb = tfp->lb + (true_lb - lb); tfp->true_ub = tfp->ub + (true_ub - ub); tfp->size = (DLOOP_Offset) ints[0] * (DLOOP_Offset) ints[1] * size; tfp->extent = tfp->ub - tfp->lb; break; case MPI_COMBINER_INDEXED_BLOCK: /* prime min_lb and max_ub */ DLOOP_DATATYPE_BLOCK_LB_UB(ints[1] /* blklen */, (DLOOP_Offset) ints[2] * extent /* disp */, lb, ub, extent, min_lb, max_ub); for (i=1; i < ints[0]; i++) { DLOOP_DATATYPE_BLOCK_LB_UB(ints[1] /* blklen */, (DLOOP_Offset) ints[i+2] * extent /* disp */, lb, ub, extent, tmp_lb, tmp_ub); if (tmp_lb < min_lb) min_lb = tmp_lb; if (tmp_ub > max_ub) max_ub = tmp_ub; } tfp->size = (DLOOP_Offset) ints[0] * (DLOOP_Offset) ints[1] * size; tfp->lb = min_lb; tfp->ub = max_ub; tfp->true_lb = min_lb + (true_lb - lb); tfp->true_ub = max_ub + (true_ub - ub); tfp->extent = tfp->ub - tfp->lb; break; case MPI_COMBINER_HINDEXED_BLOCK: /* prime min_lb and max_ub */ DLOOP_DATATYPE_BLOCK_LB_UB(ints[1] /* blklen */, (DLOOP_Offset) ints[2] /* disp */, lb, ub, extent, min_lb, max_ub); for (i=1; i < ints[0]; i++) { DLOOP_DATATYPE_BLOCK_LB_UB(ints[1] /* blklen */, (DLOOP_Offset) ints[i+2] /* disp */, lb, ub, extent, tmp_lb, tmp_ub); if (tmp_lb < min_lb) min_lb = tmp_lb; if (tmp_ub > max_ub) max_ub = tmp_ub; } tfp->size = (DLOOP_Offset) ints[0] * (DLOOP_Offset) ints[1] * size; tfp->lb = min_lb; tfp->ub = max_ub; tfp->true_lb = min_lb + (true_lb - lb); tfp->true_ub = max_ub + (true_ub - ub); tfp->extent = tfp->ub - tfp->lb; break; case MPI_COMBINER_INDEXED: case MPI_COMBINER_HINDEXED_INTEGER: case MPI_COMBINER_HINDEXED: /* find first non-zero blocklength element */ for (i=0; i < ints[0] && ints[i+1] == 0; i++); if (i == ints[0]) { /* all zero blocklengths */ tfp->size = tfp->lb = tfp->ub = tfp->extent = tfp->alignsz = 0; tfp->has_sticky_lb = tfp->has_sticky_ub = 0; } else { /* prime min_lb, max_ub, count */ ntypes = ints[i+1]; if (combiner == MPI_COMBINER_INDEXED) disp = (DLOOP_Offset) ints[ints[0]+i+1] * extent; else if (combiner == MPI_COMBINER_HINDEXED_INTEGER) disp = (DLOOP_Offset) ints[ints[0]+i+1]; else /* MPI_COMBINER_HINDEXED */ disp = aints[i]; DLOOP_DATATYPE_BLOCK_LB_UB(ints[i+1] /* blklen */, disp, lb, ub, extent, min_lb, max_ub); for (i++; i < ints[0]; i++) { /* skip zero blocklength elements */ if (ints[i+1] == 0) continue; ntypes += ints[i+1]; if (combiner == MPI_COMBINER_INDEXED) disp = (DLOOP_Offset) ints[ints[0]+i+1] * extent; else if (combiner == MPI_COMBINER_HINDEXED_INTEGER) disp = (DLOOP_Offset) ints[ints[0]+i+1]; else /* MPI_COMBINER_HINDEXED */ disp = aints[i]; DLOOP_DATATYPE_BLOCK_LB_UB(ints[i+1], disp, lb, ub, extent, tmp_lb, tmp_ub); if (tmp_lb < min_lb) min_lb = tmp_lb; if (tmp_ub > max_ub) max_ub = tmp_ub; } tfp->size = ntypes * size; tfp->lb = min_lb; tfp->ub = max_ub; tfp->true_lb = min_lb + (true_lb - lb); tfp->true_ub = max_ub + (true_ub - ub); tfp->extent = tfp->ub - tfp->lb; } break; case MPI_COMBINER_STRUCT_INTEGER: DLOOP_Assert(combiner != MPI_COMBINER_STRUCT_INTEGER); break; case MPI_COMBINER_STRUCT: /* sufficiently complicated to pull out into separate fn */ DLOOP_Type_calc_footprint_struct(type, combiner, ints, aints, types, tfp); break; case MPI_COMBINER_SUBARRAY: ndims = ints[0]; MPIDU_Type_convert_subarray(ndims, &ints[1] /* sizes */, &ints[1+ndims] /* subsz */, &ints[1+2*ndims] /* strts */, ints[1+3*ndims] /* order */, types[0], &tmptype); MPIDU_Type_calc_footprint(tmptype, tfp); MPIR_Type_free_impl(&tmptype); break; case MPI_COMBINER_DARRAY: ndims = ints[2]; MPIDU_Type_convert_darray(ints[0] /* size */, ints[1] /* rank */, ndims, &ints[3] /* gsizes */, &ints[3+ndims] /*distribs */, &ints[3+2*ndims] /* dargs */, &ints[3+3*ndims] /* psizes */, ints[3+4*ndims] /* order */, types[0], &tmptype); MPIDU_Type_calc_footprint(tmptype, tfp); MPIR_Type_free_impl(&tmptype); break; case MPI_COMBINER_F90_REAL: case MPI_COMBINER_F90_COMPLEX: case MPI_COMBINER_F90_INTEGER: default: DLOOP_Assert(0); break; } clean_exit: MPIDU_Type_release_contents(type, &ints, &aints, &types); return; }
/* from MPICH PAC_C_MAX_INTEGER_ALIGN test: * * Tests for max C struct integer alignment. Note that this is for *all* * integer types. * * Return value is 1, 2, 4, or 8. */ static int DLOOP_Structalign_integer_max() { int is_packed = 1; int is_two = 1; int is_four = 1; int is_eight = 1; int size, extent; struct { char a; int b; } char_int; struct { char a; short b; } char_short; struct { char a; long b; } char_long; struct { char a; int b; char c; } char_int_char; struct { char a; short b; char c; } char_short_char; #ifdef HAVE_LONG_LONG_INT struct { long long int a; char b; } lli_c; struct { char a; long long int b; } c_lli; int extent2; #endif /* assume max integer alignment isn't 8 if we don't have * an eight-byte value. */ #ifdef HAVE_LONG_LONG_INT if (sizeof(int) < 8 && sizeof(long) < 8 && sizeof(long long int) < 8) is_eight = 0; #else if (sizeof(int) < 8 && sizeof(long) < 8) is_eight = 0; #endif size = sizeof(char) + sizeof(int); extent = sizeof(char_int); if (size != extent) is_packed = 0; if ((extent % 2) != 0) is_two = 0; if ((extent % 4) != 0) is_four = 0; if (sizeof(int) == 8 && (extent % 8) != 0) is_eight = 0; size = sizeof(char) + sizeof(short); extent = sizeof(char_short); if (size != extent) is_packed = 0; if ((extent % 2) != 0) is_two = 0; if (sizeof(short) == 4 && (extent % 4) != 0) is_four = 0; if (sizeof(short) == 8 && (extent % 8) != 0) is_eight = 0; size = sizeof(char) + sizeof(long); extent = sizeof(char_long); if (size != extent) is_packed = 0; if ((extent % 2) != 0) is_two = 0; if ((extent % 4) != 0) is_four = 0; if (sizeof(long) == 8 && (extent % 8) != 0) is_eight = 0; #ifdef HAVE_LONG_LONG_INT size = sizeof(char) + sizeof(long long int); extent = sizeof(lli_c); extent2 = sizeof(c_lli); if (size != extent) is_packed = 0; if ((extent % 2) != 0 && (extent2 % 2) != 0) is_two = 0; if ((extent % 4) != 0 && (extent2 % 4) != 0) is_four = 0; if (sizeof(long long int) >= 8 && (extent % 8) != 0 && (extent2 % 8) != 0) is_eight = 0; #endif size = sizeof(char) + sizeof(int) + sizeof(char); extent = sizeof(char_int_char); if (size != extent) is_packed = 0; if ((extent % 2) != 0) is_two = 0; if ((extent % 4) != 0) is_four = 0; if (sizeof(int) == 8 && (extent % 8) != 0) is_eight = 0; size = sizeof(char) + sizeof(short) + sizeof(char); extent = sizeof(char_short_char); if (size != extent) is_packed = 0; if ((extent % 2) != 0) is_two = 0; if (sizeof(short) == 4 && (extent % 4) != 0) is_four = 0; if (sizeof(short) == 8 && (extent % 8) != 0) is_eight = 0; if (is_eight) { is_four = 0; is_two = 0; } if (is_four) is_two = 0; DLOOP_Assert(is_packed + is_two + is_four + is_eight == 1); if (is_packed) return 1; if (is_two) return 2; if (is_four) return 4; return 8; }
/* DLOOP_Type_indexed_array_copy() * * Copies arrays into place, combining adjacent contiguous regions and * dropping zero-length regions. * * Extent passed in is for the original type. * * Output displacements are always output in bytes, while block * lengths are always output in terms of the base type. */ static void DLOOP_Type_indexed_array_copy(DLOOP_Count count, DLOOP_Count contig_count, int *in_blklen_array, void *in_disp_array, DLOOP_Count *out_blklen_array, DLOOP_Offset *out_disp_array, int dispinbytes, DLOOP_Offset old_extent) { DLOOP_Count i, cur_idx = 0; out_blklen_array[0] = (DLOOP_Count) in_blklen_array[0]; if (!dispinbytes) { out_disp_array[0] = (DLOOP_Offset) ((int *) in_disp_array)[0] * old_extent; for (i = 1; i < count; i++) { if (in_blklen_array[i] == 0) { continue; } else if (out_disp_array[cur_idx] + ((DLOOP_Offset) out_blklen_array[cur_idx]) * old_extent == ((DLOOP_Offset) ((int *) in_disp_array)[i]) * old_extent) { /* adjacent to current block; add to block */ out_blklen_array[cur_idx] += (DLOOP_Count) in_blklen_array[i]; } else { cur_idx++; DLOOP_Assert(cur_idx < contig_count); out_disp_array[cur_idx] = ((DLOOP_Offset) ((int *) in_disp_array)[i]) * old_extent; out_blklen_array[cur_idx] = in_blklen_array[i]; } } } else /* input displacements already in bytes */ { out_disp_array[0] = (DLOOP_Offset) ((MPI_Aint *) in_disp_array)[0]; for (i = 1; i < count; i++) { if (in_blklen_array[i] == 0) { continue; } else if (out_disp_array[cur_idx] + ((DLOOP_Offset) out_blklen_array[cur_idx]) * old_extent == ((DLOOP_Offset) ((MPI_Aint *) in_disp_array)[i])) { /* adjacent to current block; add to block */ out_blklen_array[cur_idx] += in_blklen_array[i]; } else { cur_idx++; DLOOP_Assert(cur_idx < contig_count); out_disp_array[cur_idx] = (DLOOP_Offset) ((MPI_Aint *) in_disp_array)[i]; out_blklen_array[cur_idx] = (DLOOP_Count) in_blklen_array[i]; } } } DLOOP_Assert(cur_idx == contig_count - 1); return; }
/* DLOOP_Type_indexed_array_copy() * * Copies arrays into place, combining adjacent contiguous regions and * dropping zero-length regions. * * Extent passed in is for the original type. * * Output displacements are always output in bytes, while block * lengths are always output in terms of the base type. */ static void DLOOP_Type_indexed_array_copy(DLOOP_Count count, DLOOP_Count contig_count, const DLOOP_Size *in_blklen_array, const void *in_disp_array, DLOOP_Count *out_blklen_array, DLOOP_Offset *out_disp_array, int dispinbytes, DLOOP_Offset old_extent) { DLOOP_Count i, first, cur_idx = 0; /* Skip any initial zero-length blocks */ for (first = 0; first < count; ++first) if ((DLOOP_Count) in_blklen_array[first]) break; out_blklen_array[0] = (DLOOP_Count) in_blklen_array[first]; if (!dispinbytes) { out_disp_array[0] = (DLOOP_Offset) ((int *) in_disp_array)[first] * old_extent; for (i = first+1; i < count; ++i) { if (in_blklen_array[i] == 0) { continue; } else if (out_disp_array[cur_idx] + ((DLOOP_Offset) out_blklen_array[cur_idx]) * old_extent == ((DLOOP_Offset) ((int *) in_disp_array)[i]) * old_extent) { /* adjacent to current block; add to block */ out_blklen_array[cur_idx] += (DLOOP_Count) in_blklen_array[i]; } else { cur_idx++; DLOOP_Assert(cur_idx < contig_count); out_disp_array[cur_idx] = ((DLOOP_Offset) ((int *) in_disp_array)[i]) * old_extent; out_blklen_array[cur_idx] = in_blklen_array[i]; } } } else /* input displacements already in bytes */ { out_disp_array[0] = (DLOOP_Offset) ((MPI_Aint *) in_disp_array)[first]; for (i = first+1; i < count; ++i) { if (in_blklen_array[i] == 0) { continue; } else if (out_disp_array[cur_idx] + ((DLOOP_Offset) out_blklen_array[cur_idx]) * old_extent == ((DLOOP_Offset) ((MPI_Aint *) in_disp_array)[i])) { /* adjacent to current block; add to block */ out_blklen_array[cur_idx] += in_blklen_array[i]; } else { cur_idx++; DLOOP_Assert(cur_idx < contig_count); out_disp_array[cur_idx] = (DLOOP_Offset) ((MPI_Aint *) in_disp_array)[i]; out_blklen_array[cur_idx] = (DLOOP_Count) in_blklen_array[i]; } } } DLOOP_Assert(cur_idx == contig_count - 1); return; }
/*@ Dataloop_update - update pointers after a copy operation Input Parameters: + dataloop - pointer to loop to update - ptrdiff - value indicating offset between old and new pointer values This function is used to recursively update all the pointers in a dataloop tree. @*/ void PREPEND_PREFIX(Dataloop_update)(DLOOP_Dataloop *dataloop, DLOOP_Offset ptrdiff) { /* OPT: only declare these variables down in the Struct case */ int i; DLOOP_Dataloop **looparray; switch(dataloop->kind & DLOOP_KIND_MASK) { case DLOOP_KIND_CONTIG: case DLOOP_KIND_VECTOR: /* * All these really ugly assignments are really of the form: * * ((char *) dataloop->loop_params.c_t.loop) += ptrdiff; * * However, some compilers spit out warnings about casting on the * LHS, so we get this much nastier form instead (using common * struct for contig and vector): */ if (!(dataloop->kind & DLOOP_FINAL_MASK)) { DLOOP_Assert(dataloop->loop_params.cm_t.dataloop); DLOOP_Ensure_Offset_fits_in_pointer(DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) dataloop->loop_params.cm_t.dataloop + ptrdiff); dataloop->loop_params.cm_t.dataloop = (DLOOP_Dataloop *) DLOOP_OFFSET_CAST_TO_VOID_PTR (DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) dataloop->loop_params.cm_t.dataloop + ptrdiff); PREPEND_PREFIX(Dataloop_update)(dataloop->loop_params.cm_t.dataloop, ptrdiff); } break; case DLOOP_KIND_BLOCKINDEXED: DLOOP_Assert(dataloop->loop_params.bi_t.offset_array); DLOOP_Ensure_Offset_fits_in_pointer(DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) dataloop->loop_params.bi_t.offset_array + ptrdiff); dataloop->loop_params.bi_t.offset_array = (DLOOP_Offset *) DLOOP_OFFSET_CAST_TO_VOID_PTR (DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) dataloop->loop_params.bi_t.offset_array + ptrdiff); if (!(dataloop->kind & DLOOP_FINAL_MASK)) { DLOOP_Assert(dataloop->loop_params.bi_t.dataloop); DLOOP_Ensure_Offset_fits_in_pointer(DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) dataloop->loop_params.bi_t.dataloop + ptrdiff); dataloop->loop_params.bi_t.dataloop = (DLOOP_Dataloop *) DLOOP_OFFSET_CAST_TO_VOID_PTR (DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) dataloop->loop_params.bi_t.dataloop + ptrdiff); PREPEND_PREFIX(Dataloop_update)(dataloop->loop_params.bi_t.dataloop, ptrdiff); } break; case DLOOP_KIND_INDEXED: DLOOP_Assert(dataloop->loop_params.i_t.blocksize_array); DLOOP_Ensure_Offset_fits_in_pointer(DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) dataloop->loop_params.i_t.blocksize_array + ptrdiff); dataloop->loop_params.i_t.blocksize_array = (DLOOP_Count *) DLOOP_OFFSET_CAST_TO_VOID_PTR (DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) dataloop->loop_params.i_t.blocksize_array + ptrdiff); DLOOP_Assert(dataloop->loop_params.i_t.offset_array); DLOOP_Ensure_Offset_fits_in_pointer(DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) dataloop->loop_params.i_t.offset_array + ptrdiff); dataloop->loop_params.i_t.offset_array = (DLOOP_Offset *) DLOOP_OFFSET_CAST_TO_VOID_PTR (DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) dataloop->loop_params.i_t.offset_array + ptrdiff); if (!(dataloop->kind & DLOOP_FINAL_MASK)) { DLOOP_Assert(dataloop->loop_params.i_t.dataloop); DLOOP_Ensure_Offset_fits_in_pointer(DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) dataloop->loop_params.i_t.dataloop + ptrdiff); dataloop->loop_params.i_t.dataloop = (DLOOP_Dataloop *) DLOOP_OFFSET_CAST_TO_VOID_PTR (DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) dataloop->loop_params.i_t.dataloop + ptrdiff); PREPEND_PREFIX(Dataloop_update)(dataloop->loop_params.i_t.dataloop, ptrdiff); } break; case DLOOP_KIND_STRUCT: DLOOP_Assert(dataloop->loop_params.s_t.blocksize_array); DLOOP_Ensure_Offset_fits_in_pointer(DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) dataloop->loop_params.s_t.blocksize_array + ptrdiff); dataloop->loop_params.s_t.blocksize_array = (DLOOP_Count *) DLOOP_OFFSET_CAST_TO_VOID_PTR (DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) dataloop->loop_params.s_t.blocksize_array + ptrdiff); DLOOP_Assert(dataloop->loop_params.s_t.offset_array); DLOOP_Ensure_Offset_fits_in_pointer(DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) dataloop->loop_params.s_t.offset_array + ptrdiff); dataloop->loop_params.s_t.offset_array = (DLOOP_Offset *) DLOOP_OFFSET_CAST_TO_VOID_PTR (DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) dataloop->loop_params.s_t.offset_array + ptrdiff); if (dataloop->kind & DLOOP_FINAL_MASK) break; DLOOP_Assert(dataloop->loop_params.s_t.dataloop_array); DLOOP_Ensure_Offset_fits_in_pointer(DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) dataloop->loop_params.s_t.dataloop_array + ptrdiff); dataloop->loop_params.s_t.dataloop_array = (DLOOP_Dataloop **) DLOOP_OFFSET_CAST_TO_VOID_PTR (DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) dataloop->loop_params.s_t.dataloop_array + ptrdiff); /* fix the N dataloop pointers too */ looparray = dataloop->loop_params.s_t.dataloop_array; for (i=0; i < dataloop->loop_params.s_t.count; i++) { DLOOP_Assert(looparray[i]); DLOOP_Ensure_Offset_fits_in_pointer(DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) looparray[i] + ptrdiff); looparray[i] = (DLOOP_Dataloop *) DLOOP_OFFSET_CAST_TO_VOID_PTR (DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) looparray[i] + ptrdiff); } for (i=0; i < dataloop->loop_params.s_t.count; i++) { PREPEND_PREFIX(Dataloop_update)(looparray[i], ptrdiff); } break; default: /* --BEGIN ERROR HANDLING-- */ DLOOP_Assert(0); break; /* --END ERROR HANDLING-- */ } return; }
static int DLOOP_Leaf_index_mpi_flatten(DLOOP_Offset *blocks_p, DLOOP_Count count, DLOOP_Count *blockarray, DLOOP_Offset *offsetarray, DLOOP_Type el_type, DLOOP_Offset rel_off, void *bufp, void *v_paramp) { int i; DLOOP_Size size, blocks_left; DLOOP_Offset el_size; struct PREPEND_PREFIX(mpi_flatten_params) *paramp = v_paramp; DLOOP_Handle_get_size_macro(el_type, el_size); blocks_left = *blocks_p; for (i=0; i < count && blocks_left > 0; i++) { int last_idx; char *last_end = NULL; if (blocks_left > blockarray[i]) { size = blockarray[i] * el_size; blocks_left -= blockarray[i]; } else { /* last pass */ size = blocks_left * el_size; blocks_left = 0; } last_idx = paramp->index - 1; if (last_idx >= 0) { /* Since disps can be negative, we cannot use * DLOOP_Ensure_Offset_fits_in_pointer to verify that disps + * blklens fits in a pointer. Nor can we use * DLOOP_OFFSET_CAST_TO_VOID_PTR to cast the sum to a pointer. * Just let it truncate, if the sizeof a pointer is less * than the sizeof an MPI_Aint. */ last_end = (char *) DLOOP_OFFSET_CAST_TO_VOID_PTR (paramp->disps[last_idx] + (MPI_Aint)(paramp->blklens[last_idx])); } /* Since bufp can be a displacement and can be negative, we * cannot use DLOOP_Ensure_Offset_fits_in_pointer to ensure the * sum fits in a pointer. Just let it truncate. */ if ((last_idx == paramp->length-1) && (last_end != ((char *) bufp + rel_off + offsetarray[i]))) { /* we have used up all our entries, and this one doesn't fit on * the end of the last one. */ *blocks_p -= (blocks_left + (size / el_size)); return 1; } else if (last_idx >= 0 && (last_end == ((char *) bufp + rel_off + offsetarray[i]))) { /* add this size to the last vector rather than using up new one */ paramp->blklens[last_idx] += size; } else { /* Since bufp can be a displacement and can be negative, we cannot * use DLOOP_VOID_PTR_CAST_TO_OFFSET to cast the sum to a pointer. * Just let it sign extend. */ paramp->disps[last_idx+1] = DLOOP_PTR_DISP_CAST_TO_OFFSET bufp + rel_off + offsetarray[i]; paramp->blklens[last_idx+1] = size; /* these blocks are in bytes */ paramp->index++; } } /* if we get here then we processed ALL the blocks; don't need to update * blocks_p */ DLOOP_Assert(blocks_left == 0); return 0; }
/*@ Dataloop_alloc_and_copy - allocate the resources used to store a dataloop and copy in old dataloop as appropriate Input Parameters: + kind - kind of dataloop to allocate . count - number of elements in dataloop (kind dependent) . old_loop - pointer to old dataloop (or NULL for none) . old_loop_sz - size of old dataloop (should be zero if old_loop is NULL) . new_loop_p - address at which to store new dataloop pointer - new_loop_sz_p - pointer to integer in which to store new loop size Notes: The count parameter passed into this function will often be different from the count passed in at the MPI layer. @*/ void PREPEND_PREFIX(Dataloop_alloc_and_copy)(int kind, DLOOP_Count count, DLOOP_Dataloop *old_loop, DLOOP_Size old_loop_sz, DLOOP_Dataloop **new_loop_p, DLOOP_Size *new_loop_sz_p) { DLOOP_Size new_loop_sz = 0; int align_sz = 8; /* default aligns everything to 8-byte boundaries */ int epsilon; DLOOP_Size loop_sz = sizeof(DLOOP_Dataloop); DLOOP_Size off_sz = 0, blk_sz = 0, ptr_sz = 0, extent_sz = 0; char *pos; DLOOP_Dataloop *new_loop; #ifdef HAVE_MAX_STRUCT_ALIGNMENT if (align_sz > HAVE_MAX_STRUCT_ALIGNMENT) { align_sz = HAVE_MAX_STRUCT_ALIGNMENT; } #endif if (old_loop != NULL) { DLOOP_Assert((old_loop_sz % align_sz) == 0); } /* calculate the space that we actually need for everything */ switch (kind) { case DLOOP_KIND_STRUCT: /* need space for dataloop pointers and extents */ ptr_sz = count * sizeof(DLOOP_Dataloop *); extent_sz = count * sizeof(DLOOP_Offset); case DLOOP_KIND_INDEXED: /* need space for block sizes */ blk_sz = count * sizeof(DLOOP_Count); case DLOOP_KIND_BLOCKINDEXED: /* need space for block offsets */ off_sz = count * sizeof(DLOOP_Offset); case DLOOP_KIND_CONTIG: case DLOOP_KIND_VECTOR: break; default: DLOOP_Assert(0); } /* pad everything that we're going to allocate */ epsilon = loop_sz % align_sz; if (epsilon) loop_sz += align_sz - epsilon; epsilon = off_sz % align_sz; if (epsilon) off_sz += align_sz - epsilon; epsilon = blk_sz % align_sz; if (epsilon) blk_sz += align_sz - epsilon; epsilon = ptr_sz % align_sz; if (epsilon) ptr_sz += align_sz - epsilon; epsilon = extent_sz % align_sz; if (epsilon) extent_sz += align_sz - epsilon; new_loop_sz += loop_sz + off_sz + blk_sz + ptr_sz + extent_sz + old_loop_sz; /* allocate space */ new_loop = (DLOOP_Dataloop *) DLOOP_Malloc(new_loop_sz); if (new_loop == NULL) { *new_loop_p = NULL; return; } #ifdef DLOOP_DEBUG_MEMORY DLOOP_dbg_printf("DLOOP_Dataloop_alloc_and_copy: new loop @ %x (tot sz = %z, loop = %z, off = %z, blk = %z, ptr = %z, extent = %z, old = %z)\n", (int) new_loop, new_loop_sz, loop_sz, off_sz, blk_sz, ptr_sz, extent_sz, old_loop_sz); #endif /* set all the pointers in the new dataloop structure */ switch (kind) { case DLOOP_KIND_STRUCT: /* order is: * - pointers * - blocks * - offsets * - extents */ new_loop->loop_params.s_t.dataloop_array = (DLOOP_Dataloop **) (((char *) new_loop) + loop_sz); new_loop->loop_params.s_t.blocksize_array = (DLOOP_Count *) (((char *) new_loop) + loop_sz + ptr_sz); new_loop->loop_params.s_t.offset_array = (DLOOP_Offset *) (((char *) new_loop) + loop_sz + ptr_sz + blk_sz); new_loop->loop_params.s_t.el_extent_array = (DLOOP_Offset *) (((char *) new_loop) + loop_sz + ptr_sz + blk_sz + off_sz); break; case DLOOP_KIND_INDEXED: /* order is: * - blocks * - offsets */ new_loop->loop_params.i_t.blocksize_array = (DLOOP_Count *) (((char *) new_loop) + loop_sz); new_loop->loop_params.i_t.offset_array = (DLOOP_Offset *) (((char *) new_loop) + loop_sz + blk_sz); if (old_loop == NULL) { new_loop->loop_params.i_t.dataloop = NULL; } else { new_loop->loop_params.i_t.dataloop = (DLOOP_Dataloop *) (((char *) new_loop) + (new_loop_sz - old_loop_sz)); } break; case DLOOP_KIND_BLOCKINDEXED: new_loop->loop_params.bi_t.offset_array = (DLOOP_Offset *) (((char *) new_loop) + loop_sz); if (old_loop == NULL) { new_loop->loop_params.bi_t.dataloop = NULL; } else { new_loop->loop_params.bi_t.dataloop = (DLOOP_Dataloop *) (((char *) new_loop) + (new_loop_sz - old_loop_sz)); } break; case DLOOP_KIND_CONTIG: if (old_loop == NULL) { new_loop->loop_params.c_t.dataloop = NULL; } else { new_loop->loop_params.c_t.dataloop = (DLOOP_Dataloop *) (((char *) new_loop) + (new_loop_sz - old_loop_sz)); } break; case DLOOP_KIND_VECTOR: if (old_loop == NULL) { new_loop->loop_params.v_t.dataloop = NULL; } else { new_loop->loop_params.v_t.dataloop = (DLOOP_Dataloop *) (((char *) new_loop) + (new_loop_sz - old_loop_sz)); } break; default: DLOOP_Assert(0); } pos = ((char *) new_loop) + (new_loop_sz - old_loop_sz); if (old_loop != NULL) { PREPEND_PREFIX(Dataloop_copy)(pos, old_loop, old_loop_sz); } *new_loop_p = new_loop; *new_loop_sz_p = new_loop_sz; return; }
/*@ Dataloop_stream_size - return the size of the data described by the dataloop Input Parameters: + dl_p - pointer to dataloop for which we will return the size - sizefn - function for determining size of types in the corresponding stream (passing NULL will instead result in el_size values being used) @*/ DLOOP_Offset PREPEND_PREFIX(Dataloop_stream_size)(struct DLOOP_Dataloop *dl_p, DLOOP_Offset (*sizefn)(DLOOP_Type el_type)) { DLOOP_Offset tmp_sz, tmp_ct = 1; for (;;) { if ((dl_p->kind & DLOOP_KIND_MASK) == DLOOP_KIND_STRUCT) { int i; tmp_sz = 0; for (i = 0; i < dl_p->loop_params.s_t.count; i++) { tmp_sz += (DLOOP_Offset)(dl_p->loop_params.s_t.blocksize_array[i]) * PREPEND_PREFIX(Dataloop_stream_size)(dl_p->loop_params.s_t.dataloop_array[i], sizefn); } return tmp_sz * tmp_ct; } switch (dl_p->kind & DLOOP_KIND_MASK) { case DLOOP_KIND_CONTIG: tmp_ct *= (DLOOP_Offset)(dl_p->loop_params.c_t.count); #ifdef DLOOP_DEBUG_SIZE DLOOP_dbg_printf("stream_size: contig: ct = %d; new tot_ct = " DLOOP_OFFSET_FMT_DEC_SPEC "\n", (int) dl_p->loop_params.c_t.count, (DLOOP_Offset) tmp_ct); #endif break; case DLOOP_KIND_VECTOR: tmp_ct *= (DLOOP_Offset)(dl_p->loop_params.v_t.count) * (DLOOP_Offset)(dl_p->loop_params.v_t.blocksize); #ifdef DLOOP_DEBUG_SIZE DLOOP_dbg_printf("stream_size: vector: ct = %d; blk = %d; new tot_ct = " DLOOP_OFFSET_FMT_DEC_SPEC "\n", (int) dl_p->loop_params.v_t.count, (int) dl_p->loop_params.v_t.blocksize, (DLOOP_Offset) tmp_ct); #endif break; case DLOOP_KIND_BLOCKINDEXED: tmp_ct *= (DLOOP_Offset)(dl_p->loop_params.bi_t.count) * (DLOOP_Offset)(dl_p->loop_params.bi_t.blocksize); #ifdef DLOOP_DEBUG_SIZE DLOOP_dbg_printf("stream_size: blkindexed: blks = %d; new tot_ct = " DLOOP_OFFSET_FMT_DEC_SPEC "\n", (int) dl_p->loop_params.bi_t.count * (int) dl_p->loop_params.bi_t.blocksize, (DLOOP_Offset) tmp_ct); #endif break; case DLOOP_KIND_INDEXED: tmp_ct *= (DLOOP_Offset)(dl_p->loop_params.i_t.total_blocks); #ifdef DLOOP_DEBUG_SIZE DLOOP_dbg_printf("stream_size: contig: blks = %d; new tot_ct = " DLOOP_OFFSET_FMT_DEC_SPEC "\n", (int) dl_p->loop_params.i_t.total_blocks, (DLOOP_Offset) tmp_ct); #endif break; default: /* --BEGIN ERROR HANDLING-- */ DLOOP_Assert(0); break; /* --END ERROR HANDLING-- */ } if (dl_p->kind & DLOOP_FINAL_MASK) break; else { DLOOP_Assert(dl_p->loop_params.cm_t.dataloop != NULL); dl_p = dl_p->loop_params.cm_t.dataloop; } } /* call fn for size using bottom type, or use size if fnptr is NULL */ tmp_sz = ((sizefn) ? sizefn(dl_p->el_type) : dl_p->el_size); return tmp_sz * tmp_ct; }
/*@ Dataloop_print - dump a dataloop tree to stdout for debugging purposes Input Parameters: + dataloop - root of tree to dump - depth - starting depth; used to help keep up with where we are in the tree @*/ void PREPEND_PREFIX(Dataloop_print)(struct DLOOP_Dataloop *dataloop, int depth) { int i; if (dataloop == NULL) { DLOOP_dbg_printf("dataloop is NULL (probably basic type)\n"); return; } DLOOP_dbg_printf("loc=%p, treedepth=%d, kind=%d, el_extent=" DLOOP_OFFSET_FMT_DEC_SPEC "\n", dataloop, (int) depth, (int) dataloop->kind, (DLOOP_Offset) dataloop->el_extent); switch(dataloop->kind & DLOOP_KIND_MASK) { case DLOOP_KIND_CONTIG: DLOOP_dbg_printf("\tCONTIG: count=%d, datatype=%p\n", (int) dataloop->loop_params.c_t.count, dataloop->loop_params.c_t.dataloop); if (!(dataloop->kind & DLOOP_FINAL_MASK)) PREPEND_PREFIX(Dataloop_print)(dataloop->loop_params.c_t.dataloop, depth+1); break; case DLOOP_KIND_VECTOR: DLOOP_dbg_printf("\tVECTOR: count=%d, blksz=%d, stride=" DLOOP_OFFSET_FMT_DEC_SPEC ", datatype=%p\n", (int) dataloop->loop_params.v_t.count, (int) dataloop->loop_params.v_t.blocksize, (DLOOP_Offset) dataloop->loop_params.v_t.stride, dataloop->loop_params.v_t.dataloop); if (!(dataloop->kind & DLOOP_FINAL_MASK)) PREPEND_PREFIX(Dataloop_print)(dataloop->loop_params.v_t.dataloop, depth+1); break; case DLOOP_KIND_BLOCKINDEXED: DLOOP_dbg_printf("\tBLOCKINDEXED: count=%d, blksz=%d, datatype=%p\n", (int) dataloop->loop_params.bi_t.count, (int) dataloop->loop_params.bi_t.blocksize, dataloop->loop_params.bi_t.dataloop); /* print out offsets later */ if (!(dataloop->kind & DLOOP_FINAL_MASK)) PREPEND_PREFIX(Dataloop_print)(dataloop->loop_params.bi_t.dataloop, depth+1); break; case DLOOP_KIND_INDEXED: DLOOP_dbg_printf("\tINDEXED: count=%d, datatype=%p\n", (int) dataloop->loop_params.i_t.count, dataloop->loop_params.i_t.dataloop); /* print out blocksizes and offsets later */ if (!(dataloop->kind & DLOOP_FINAL_MASK)) PREPEND_PREFIX(Dataloop_print)(dataloop->loop_params.i_t.dataloop, depth+1); break; case DLOOP_KIND_STRUCT: DLOOP_dbg_printf("\tSTRUCT: count=%d\n", (int) dataloop->loop_params.s_t.count); DLOOP_dbg_printf("\tblocksizes:\n"); for (i=0; i < dataloop->loop_params.s_t.count; i++) DLOOP_dbg_printf("\t\t%d\n", (int) dataloop->loop_params.s_t.blocksize_array[i]); DLOOP_dbg_printf("\toffsets:\n"); for (i=0; i < dataloop->loop_params.s_t.count; i++) DLOOP_dbg_printf("\t\t" DLOOP_OFFSET_FMT_DEC_SPEC "\n", (DLOOP_Offset) dataloop->loop_params.s_t.offset_array[i]); DLOOP_dbg_printf("\tdatatypes:\n"); for (i=0; i < dataloop->loop_params.s_t.count; i++) DLOOP_dbg_printf("\t\t%p\n", dataloop->loop_params.s_t.dataloop_array[i]); if (dataloop->kind & DLOOP_FINAL_MASK) break; for (i=0; i < dataloop->loop_params.s_t.count; i++) { PREPEND_PREFIX(Dataloop_print)(dataloop->loop_params.s_t.dataloop_array[i],depth+1); } break; default: DLOOP_Assert(0); break; } return; }
/* DLOOP_Leaf_vector_mpi_flatten * * Input Parameters: * blocks_p - [inout] pointer to a count of blocks (total, for all noncontiguous pieces) * count - # of noncontiguous regions * blksz - size of each noncontiguous region * stride - distance in bytes from start of one region to start of next * el_type - elemental type (e.g. MPI_INT) * ... * * Note: this is only called when the starting position is at the beginning * of a whole block in a vector type. * * TODO: MAKE THIS CODE SMARTER, USING THE SAME GENERAL APPROACH AS IN THE * COUNT BLOCK CODE ABOVE. */ static int DLOOP_Leaf_vector_mpi_flatten(DLOOP_Offset *blocks_p, DLOOP_Count count, DLOOP_Count blksz, DLOOP_Offset stride, DLOOP_Type el_type, DLOOP_Offset rel_off, /* offset into buffer */ void *bufp, /* start of buffer */ void *v_paramp) { int i; DLOOP_Size size, blocks_left; DLOOP_Offset el_size; struct PREPEND_PREFIX(mpi_flatten_params) *paramp = v_paramp; DLOOP_Handle_get_size_macro(el_type, el_size); blocks_left = *blocks_p; for (i=0; i < count && blocks_left > 0; i++) { int last_idx; char *last_end = NULL; if (blocks_left > blksz) { size = blksz * el_size; blocks_left -= blksz; } else { /* last pass */ size = blocks_left * el_size; blocks_left = 0; } last_idx = paramp->index - 1; if (last_idx >= 0) { /* Since disps can be negative, we cannot use * DLOOP_Ensure_Offset_fits_in_pointer to verify that disps + * blklens fits in a pointer. Nor can we use * DLOOP_OFFSET_CAST_TO_VOID_PTR to cast the sum to a pointer. * Just let it truncate, if the sizeof a pointer is less * than the sizeof an MPI_Aint. */ last_end = (char *) DLOOP_OFFSET_CAST_TO_VOID_PTR (paramp->disps[last_idx] + (MPI_Aint)(paramp->blklens[last_idx])); } /* Since bufp can be a displacement and can be negative, we cannot use * DLOOP_Ensure_Offset_fits_in_pointer to ensure the sum fits in a pointer. * Just let it truncate. */ if ((last_idx == paramp->length-1) && (last_end != ((char *) bufp + rel_off))) { /* we have used up all our entries, and this one doesn't fit on * the end of the last one. */ *blocks_p -= (blocks_left + (size / el_size)); #ifdef MPID_SP_VERBOSE MPL_DBG_MSG_FMT(MPIR_DBG_DATATYPE,VERBOSE,(MPL_DBG_FDEST,"\t[vector to vec exiting (1): next ind = %d, " DLOOP_OFFSET_FMT_DEC_SPEC " blocks processed.\n", paramp->u.pack_vector.index, *blocks_p)); #endif return 1; } else if (last_idx >= 0 && (last_end == ((char *) bufp + rel_off))) { /* add this size to the last vector rather than using up new one */ paramp->blklens[last_idx] += size; } else { /* Since bufp can be a displacement and can be negative, we cannot use * DLOOP_VOID_PTR_CAST_TO_OFFSET to cast the sum to a pointer. Just let it * sign extend. */ paramp->disps[last_idx+1] = DLOOP_PTR_DISP_CAST_TO_OFFSET bufp + rel_off; paramp->blklens[last_idx+1] = size; paramp->index++; } rel_off += stride; } #ifdef MPID_SP_VERBOSE MPL_DBG_MSG_FMT(MPIR_DBG_DATATYPE,VERBOSE,(MPL_DBG_FDEST,"\t[vector to vec exiting (2): next ind = %d, " DLOOP_OFFSET_FMT_DEC_SPEC " blocks processed.\n", paramp->u.pack_vector.index, *blocks_p)); #endif /* if we get here then we processed ALL the blocks; don't need to update * blocks_p */ DLOOP_Assert(blocks_left == 0); return 0; }
void PREPEND_PREFIX(Dataloop_create)(MPI_Datatype type, DLOOP_Dataloop **dlp_p, int *dlsz_p, int *dldepth_p, int flag) { int i; int err; int nr_ints, nr_aints, nr_types, combiner; MPI_Datatype *types; int *ints; MPI_Aint *aints; DLOOP_Dataloop *old_dlp; int old_dlsz, old_dldepth; int dummy1, dummy2, dummy3, type0_combiner, ndims; MPI_Datatype tmptype; MPI_Aint stride; MPI_Aint *disps; MPIR_Type_get_envelope_impl(type, &nr_ints, &nr_aints, &nr_types, &combiner); /* some named types do need dataloops; handle separately. */ if (combiner == MPI_COMBINER_NAMED) { DLOOP_Dataloop_create_named(type, dlp_p, dlsz_p, dldepth_p, flag); return; } else if (combiner == MPI_COMBINER_F90_REAL || combiner == MPI_COMBINER_F90_COMPLEX || combiner == MPI_COMBINER_F90_INTEGER) { MPI_Datatype f90basetype; DLOOP_Handle_get_basic_type_macro(type, f90basetype); PREPEND_PREFIX(Dataloop_create_contiguous)(1 /* count */, f90basetype, dlp_p, dlsz_p, dldepth_p, flag); return; } /* Q: should we also check for "hasloop", or is the COMBINER * check above enough to weed out everything that wouldn't * have a loop? */ DLOOP_Handle_get_loopptr_macro(type, old_dlp, flag); if (old_dlp != NULL) { /* dataloop already created; just return it. */ *dlp_p = old_dlp; DLOOP_Handle_get_loopsize_macro(type, *dlsz_p, flag); DLOOP_Handle_get_loopdepth_macro(type, *dldepth_p, flag); return; } PREPEND_PREFIX(Type_access_contents)(type, &ints, &aints, &types); /* first check for zero count on types where that makes sense */ switch(combiner) { case MPI_COMBINER_CONTIGUOUS: case MPI_COMBINER_VECTOR: case MPI_COMBINER_HVECTOR_INTEGER: case MPI_COMBINER_HVECTOR: case MPI_COMBINER_INDEXED_BLOCK: case MPI_COMBINER_HINDEXED_BLOCK: case MPI_COMBINER_INDEXED: case MPI_COMBINER_HINDEXED_INTEGER: case MPI_COMBINER_HINDEXED: case MPI_COMBINER_STRUCT_INTEGER: case MPI_COMBINER_STRUCT: if (ints[0] == 0) { PREPEND_PREFIX(Dataloop_create_contiguous)(0, MPI_INT, dlp_p, dlsz_p, dldepth_p, flag); goto clean_exit; } break; default: break; } /* recurse, processing types "below" this one before processing * this one, if those type don't already have dataloops. * * note: in the struct case below we'll handle any additional * types "below" the current one. */ MPIR_Type_get_envelope_impl(types[0], &dummy1, &dummy2, &dummy3, &type0_combiner); if (type0_combiner != MPI_COMBINER_NAMED) { DLOOP_Handle_get_loopptr_macro(types[0], old_dlp, flag); if (old_dlp == NULL) { /* no dataloop already present; create and store one */ PREPEND_PREFIX(Dataloop_create)(types[0], &old_dlp, &old_dlsz, &old_dldepth, flag); DLOOP_Handle_set_loopptr_macro(types[0], old_dlp, flag); DLOOP_Handle_set_loopsize_macro(types[0], old_dlsz, flag); DLOOP_Handle_set_loopdepth_macro(types[0], old_dldepth, flag); } else { DLOOP_Handle_get_loopsize_macro(types[0], old_dlsz, flag); DLOOP_Handle_get_loopdepth_macro(types[0], old_dldepth, flag); } } switch(combiner) { case MPI_COMBINER_DUP: if (type0_combiner != MPI_COMBINER_NAMED) { PREPEND_PREFIX(Dataloop_dup)(old_dlp, old_dlsz, dlp_p); *dlsz_p = old_dlsz; *dldepth_p = old_dldepth; } else { PREPEND_PREFIX(Dataloop_create_contiguous)(1, types[0], dlp_p, dlsz_p, dldepth_p, flag); } break; case MPI_COMBINER_RESIZED: if (type0_combiner != MPI_COMBINER_NAMED) { PREPEND_PREFIX(Dataloop_dup)(old_dlp, old_dlsz, dlp_p); *dlsz_p = old_dlsz; *dldepth_p = old_dldepth; } else { PREPEND_PREFIX(Dataloop_create_contiguous)(1, types[0], dlp_p, dlsz_p, dldepth_p, flag); (*dlp_p)->el_extent = aints[1]; /* extent */ } break; case MPI_COMBINER_CONTIGUOUS: PREPEND_PREFIX(Dataloop_create_contiguous)(ints[0] /* count */, types[0] /* oldtype */, dlp_p, dlsz_p, dldepth_p, flag); break; case MPI_COMBINER_VECTOR: PREPEND_PREFIX(Dataloop_create_vector)(ints[0] /* count */, ints[1] /* blklen */, ints[2] /* stride */, 0 /* stride not bytes */, types[0] /* oldtype */, dlp_p, dlsz_p, dldepth_p, flag); break; case MPI_COMBINER_HVECTOR_INTEGER: case MPI_COMBINER_HVECTOR: /* fortran hvector has integer stride in bytes */ if (combiner == MPI_COMBINER_HVECTOR_INTEGER) { stride = (MPI_Aint) ints[2]; } else { stride = aints[0]; } PREPEND_PREFIX(Dataloop_create_vector)(ints[0] /* count */, ints[1] /* blklen */, stride, 1 /* stride in bytes */, types[0] /* oldtype */, dlp_p, dlsz_p, dldepth_p, flag); break; case MPI_COMBINER_INDEXED_BLOCK: PREPEND_PREFIX(Dataloop_create_blockindexed)(ints[0] /* count */, ints[1] /* blklen */, &ints[2] /* disps */, 0 /* disp not bytes */, types[0] /* oldtype */, dlp_p, dlsz_p, dldepth_p, flag); break; case MPI_COMBINER_HINDEXED_BLOCK: disps = (MPI_Aint *) DLOOP_Malloc(ints[0] * sizeof(MPI_Aint)); for (i = 0; i < ints[0]; i++) disps[i] = aints[i]; PREPEND_PREFIX(Dataloop_create_blockindexed)(ints[0] /* count */, ints[1] /* blklen */, disps /* disps */, 1 /* disp is bytes */, types[0] /* oldtype */, dlp_p, dlsz_p, dldepth_p, flag); DLOOP_Free(disps); break; case MPI_COMBINER_INDEXED: PREPEND_PREFIX(Dataloop_create_indexed)(ints[0] /* count */, &ints[1] /* blklens */, &ints[ints[0]+1] /* disp */, 0 /* disp not in bytes */, types[0] /* oldtype */, dlp_p, dlsz_p, dldepth_p, flag); break; case MPI_COMBINER_HINDEXED_INTEGER: case MPI_COMBINER_HINDEXED: if (combiner == MPI_COMBINER_HINDEXED_INTEGER) { disps = (MPI_Aint *) DLOOP_Malloc(ints[0] * sizeof(MPI_Aint)); for (i=0; i < ints[0]; i++) { disps[i] = (MPI_Aint) ints[ints[0] + 1 + i]; } } else { disps = aints; } PREPEND_PREFIX(Dataloop_create_indexed)(ints[0] /* count */, &ints[1] /* blklens */, disps, 1 /* disp in bytes */, types[0] /* oldtype */, dlp_p, dlsz_p, dldepth_p, flag); if (combiner == MPI_COMBINER_HINDEXED_INTEGER) { DLOOP_Free(disps); } break; case MPI_COMBINER_STRUCT_INTEGER: case MPI_COMBINER_STRUCT: for (i = 1; i < ints[0]; i++) { int type_combiner; MPIR_Type_get_envelope_impl(types[i], &dummy1, &dummy2, &dummy3, &type_combiner); if (type_combiner != MPI_COMBINER_NAMED) { DLOOP_Handle_get_loopptr_macro(types[i], old_dlp, flag); if (old_dlp == NULL) { PREPEND_PREFIX(Dataloop_create)(types[i], &old_dlp, &old_dlsz, &old_dldepth, flag); DLOOP_Handle_set_loopptr_macro(types[i], old_dlp, flag); DLOOP_Handle_set_loopsize_macro(types[i], old_dlsz, flag); DLOOP_Handle_set_loopdepth_macro(types[i], old_dldepth, flag); } } } if (combiner == MPI_COMBINER_STRUCT_INTEGER) { disps = (MPI_Aint *) DLOOP_Malloc(ints[0] * sizeof(MPI_Aint)); for (i=0; i < ints[0]; i++) { disps[i] = (MPI_Aint) ints[ints[0] + 1 + i]; } } else { disps = aints; } err = PREPEND_PREFIX(Dataloop_create_struct)(ints[0] /* count */, &ints[1] /* blklens */, disps, types /* oldtype array */, dlp_p, dlsz_p, dldepth_p, flag); /* TODO if/when this function returns error codes, propagate this failure instead */ DLOOP_Assert(0 == err); /* if (err) return err; */ if (combiner == MPI_COMBINER_STRUCT_INTEGER) { DLOOP_Free(disps); } break; case MPI_COMBINER_SUBARRAY: ndims = ints[0]; PREPEND_PREFIX(Type_convert_subarray)(ndims, &ints[1] /* sizes */, &ints[1+ndims] /* subsizes */, &ints[1+2*ndims] /* starts */, ints[1+3*ndims] /* order */, types[0], &tmptype); PREPEND_PREFIX(Dataloop_create)(tmptype, dlp_p, dlsz_p, dldepth_p, flag); MPIR_Type_free_impl(&tmptype); break; case MPI_COMBINER_DARRAY: ndims = ints[2]; PREPEND_PREFIX(Type_convert_darray)(ints[0] /* size */, ints[1] /* rank */, ndims, &ints[3] /* gsizes */, &ints[3+ndims] /*distribs */, &ints[3+2*ndims] /* dargs */, &ints[3+3*ndims] /* psizes */, ints[3+4*ndims] /* order */, types[0], &tmptype); PREPEND_PREFIX(Dataloop_create)(tmptype, dlp_p, dlsz_p, dldepth_p, flag); MPIR_Type_free_impl(&tmptype); break; default: DLOOP_Assert(0); break; } clean_exit: PREPEND_PREFIX(Type_release_contents)(type, &ints, &aints, &types); /* for now we just leave the intermediate dataloops in place. * could remove them to save space if we wanted. */ return; }