/*@ Dataloop_free - deallocate the resources used to store a dataloop Input/output Parameters: . dataloop - pointer to dataloop structure @*/ void PREPEND_PREFIX(Dataloop_free)(DLOOP_Dataloop **dataloop) { if (*dataloop == NULL) return; #ifdef DLOOP_DEBUG_MEMORY DLOOP_dbg_printf("DLOOP_Dataloop_free: freeing loop @ %x.\n", (int) *dataloop); #endif memset(*dataloop, 0, sizeof(DLOOP_Dataloop_common)); DLOOP_Free(*dataloop); *dataloop = NULL; return; }
static int DLOOP_Dataloop_create_flattened_struct(DLOOP_Count count, const int *blklens, const MPI_Aint *disps, const DLOOP_Type *oldtypes, DLOOP_Dataloop **dlp_p, MPI_Aint *dlsz_p, int *dldepth_p, int flag) { /* arbitrary types, convert to bytes and use indexed */ int i, err, nr_blks = 0; DLOOP_Size *tmp_blklens; MPI_Aint *tmp_disps; /* since we're calling another fn that takes this type as an input parameter */ DLOOP_Offset bytes; DLOOP_Segment *segp; int first_ind; DLOOP_Size last_ind; segp = MPIR_Segment_alloc(); /* --BEGIN ERROR HANDLING-- */ if (!segp) { return DLOOP_Dataloop_create_struct_memory_error(); } /* --END ERROR HANDLING-- */ /* use segment code once to count contiguous regions */ for (i=0; i < count; i++) { int is_basic; /* ignore type elements with a zero blklen */ if (blklens[i] == 0) continue; is_basic = (DLOOP_Handle_hasloop_macro(oldtypes[i])) ? 0 : 1; if (is_basic && (oldtypes[i] != MPI_LB && oldtypes[i] != MPI_UB)) { nr_blks++; } else /* derived type; get a count of contig blocks */ { DLOOP_Count tmp_nr_blks, sz; DLOOP_Handle_get_size_macro(oldtypes[i], sz); /* if the derived type has some data to contribute, * add to flattened representation */ if (sz > 0) { err = MPIR_Segment_init(NULL, (DLOOP_Count) blklens[i], oldtypes[i], segp, flag); if (err) return err; bytes = SEGMENT_IGNORE_LAST; MPIR_Segment_count_contig_blocks(segp, 0, &bytes, &tmp_nr_blks); nr_blks += tmp_nr_blks; } } } /* it's possible for us to get to this point only to realize that * there isn't any data in this type. in that case do what we always * do: store a simple contig of zero ints and call it done. */ if (nr_blks == 0) { MPIR_Segment_free(segp); err = MPIR_Dataloop_create_contiguous(0, MPI_INT, dlp_p, dlsz_p, dldepth_p, flag); return err; } nr_blks += 2; /* safety measure */ tmp_blklens = (DLOOP_Size *) DLOOP_Malloc(nr_blks * sizeof(DLOOP_Size), MPL_MEM_DATATYPE); /* --BEGIN ERROR HANDLING-- */ if (!tmp_blklens) { MPIR_Segment_free(segp); return DLOOP_Dataloop_create_struct_memory_error(); } /* --END ERROR HANDLING-- */ tmp_disps = (MPI_Aint *) DLOOP_Malloc(nr_blks * sizeof(MPI_Aint), MPL_MEM_DATATYPE); /* --BEGIN ERROR HANDLING-- */ if (!tmp_disps) { DLOOP_Free(tmp_blklens); MPIR_Segment_free(segp); return DLOOP_Dataloop_create_struct_memory_error(); } /* --END ERROR HANDLING-- */ /* use segment code again to flatten the type */ first_ind = 0; for (i=0; i < count; i++) { int is_basic; DLOOP_Count sz = -1; is_basic = (DLOOP_Handle_hasloop_macro(oldtypes[i])) ? 0 : 1; if (!is_basic) DLOOP_Handle_get_size_macro(oldtypes[i], sz); /* we're going to use the segment code to flatten the type. * we put in our displacement as the buffer location, and use * the blocklength as the count value to get N contiguous copies * of the type. * * Note that we're going to get back values in bytes, so that will * be our new element type. */ if (oldtypes[i] != MPI_UB && oldtypes[i] != MPI_LB && blklens[i] != 0 && (is_basic || sz > 0)) { err = MPIR_Segment_init((char *) DLOOP_OFFSET_CAST_TO_VOID_PTR disps[i], (DLOOP_Count) blklens[i], oldtypes[i], segp, 0 /* homogeneous */); if (err) return err; last_ind = nr_blks - first_ind; bytes = SEGMENT_IGNORE_LAST; MPIR_Segment_mpi_flatten(segp, 0, &bytes, &tmp_blklens[first_ind], &tmp_disps[first_ind], &last_ind); if (err) return err; first_ind += last_ind; } } nr_blks = first_ind; #if 0 if (MPL_DBG_SELECTED(MPIR_DBG_DATATYPE,VERBOSE)) { MPL_DBG_OUT(MPIR_DBG_DATATYPE,"--- start of flattened type ---"); for (i=0; i < nr_blks; i++) { MPL_DBG_OUT_FMT(MPIR_DBG_DATATYPE,(MPL_DBG_FDEST, "a[%d] = (%d, " DLOOP_OFFSET_FMT_DEC_SPEC ")", i, tmp_blklens[i], tmp_disps[i])); } MPL_DBG_OUT(MPIR_DBG_DATATYPE,"--- end of flattened type ---"); } #endif MPIR_Segment_free(segp); err = MPIR_Dataloop_create_indexed(nr_blks, tmp_blklens, tmp_disps, 1, /* disp in bytes */ MPI_BYTE, dlp_p, dlsz_p, dldepth_p, flag); DLOOP_Free(tmp_blklens); DLOOP_Free(tmp_disps); return err; }
static int DLOOP_Dataloop_create_basic_all_bytes_struct( DLOOP_Count count, const int *blklens, const MPI_Aint *disps, const DLOOP_Type *oldtypes, DLOOP_Dataloop **dlp_p, MPI_Aint *dlsz_p, int *dldepth_p, int flag) { int i, err, cur_pos = 0; DLOOP_Size *tmp_blklens; MPI_Aint *tmp_disps; /* count is an upper bound on number of type instances */ tmp_blklens = (DLOOP_Size *) DLOOP_Malloc(count * sizeof(DLOOP_Size), MPL_MEM_DATATYPE); /* --BEGIN ERROR HANDLING-- */ if (!tmp_blklens) { return DLOOP_Dataloop_create_struct_memory_error(); } /* --END ERROR HANDLING-- */ tmp_disps = (MPI_Aint *) DLOOP_Malloc(count * sizeof(MPI_Aint), MPL_MEM_DATATYPE); /* --BEGIN ERROR HANDLING-- */ if (!tmp_disps) { DLOOP_Free(tmp_blklens); return DLOOP_Dataloop_create_struct_memory_error(); } /* --END ERROR HANDLING-- */ for (i=0; i < count; i++) { if (oldtypes[i] != MPI_LB && oldtypes[i] != MPI_UB && blklens[i] != 0) { DLOOP_Offset sz; DLOOP_Handle_get_size_macro(oldtypes[i], sz); tmp_blklens[cur_pos] = (int) sz * blklens[i]; tmp_disps[cur_pos] = disps[i]; cur_pos++; } } err = MPIR_Dataloop_create_indexed(cur_pos, tmp_blklens, tmp_disps, 1, /* disp in bytes */ MPI_BYTE, dlp_p, dlsz_p, dldepth_p, flag); DLOOP_Free(tmp_blklens); DLOOP_Free(tmp_disps); return err; }
static int DLOOP_Dataloop_create_unique_type_struct(DLOOP_Count count, const int *blklens, const MPI_Aint *disps, const DLOOP_Type *oldtypes, int type_pos, DLOOP_Dataloop **dlp_p, MPI_Aint *dlsz_p, int *dldepth_p, int flag) { /* the same type used more than once in the array; type_pos * indexes to the first of these. */ int i, err, cur_pos = 0; DLOOP_Size *tmp_blklens; DLOOP_Offset *tmp_disps; /* count is an upper bound on number of type instances */ tmp_blklens = (DLOOP_Size *) DLOOP_Malloc(count * sizeof(DLOOP_Size), MPL_MEM_DATATYPE); /* --BEGIN ERROR HANDLING-- */ if (!tmp_blklens) { /* TODO: ??? */ return DLOOP_Dataloop_create_struct_memory_error(); } /* --END ERROR HANDLING-- */ tmp_disps = (DLOOP_Offset *) DLOOP_Malloc(count * sizeof(DLOOP_Offset), MPL_MEM_DATATYPE); /* --BEGIN ERROR HANDLING-- */ if (!tmp_disps) { DLOOP_Free(tmp_blklens); /* TODO: ??? */ return DLOOP_Dataloop_create_struct_memory_error(); } /* --END ERROR HANDLING-- */ for (i=type_pos; i < count; i++) { if (oldtypes[i] == oldtypes[type_pos] && blklens != 0) { tmp_blklens[cur_pos] = blklens[i]; tmp_disps[cur_pos] = disps[i]; cur_pos++; } } err = MPIR_Dataloop_create_indexed(cur_pos, tmp_blklens, tmp_disps, 1, /* disp in bytes */ oldtypes[type_pos], dlp_p, dlsz_p, dldepth_p, flag); DLOOP_Free(tmp_blklens); DLOOP_Free(tmp_disps); return err; }
void PREPEND_PREFIX(Dataloop_create)(MPI_Datatype type, DLOOP_Dataloop **dlp_p, int *dlsz_p, int *dldepth_p, int flag) { int i; int err; int nr_ints, nr_aints, nr_types, combiner; MPI_Datatype *types; int *ints; MPI_Aint *aints; DLOOP_Dataloop *old_dlp; int old_dlsz, old_dldepth; int dummy1, dummy2, dummy3, type0_combiner, ndims; MPI_Datatype tmptype; MPI_Aint stride; MPI_Aint *disps; MPIR_Type_get_envelope_impl(type, &nr_ints, &nr_aints, &nr_types, &combiner); /* some named types do need dataloops; handle separately. */ if (combiner == MPI_COMBINER_NAMED) { DLOOP_Dataloop_create_named(type, dlp_p, dlsz_p, dldepth_p, flag); return; } else if (combiner == MPI_COMBINER_F90_REAL || combiner == MPI_COMBINER_F90_COMPLEX || combiner == MPI_COMBINER_F90_INTEGER) { MPI_Datatype f90basetype; DLOOP_Handle_get_basic_type_macro(type, f90basetype); PREPEND_PREFIX(Dataloop_create_contiguous)(1 /* count */, f90basetype, dlp_p, dlsz_p, dldepth_p, flag); return; } /* Q: should we also check for "hasloop", or is the COMBINER * check above enough to weed out everything that wouldn't * have a loop? */ DLOOP_Handle_get_loopptr_macro(type, old_dlp, flag); if (old_dlp != NULL) { /* dataloop already created; just return it. */ *dlp_p = old_dlp; DLOOP_Handle_get_loopsize_macro(type, *dlsz_p, flag); DLOOP_Handle_get_loopdepth_macro(type, *dldepth_p, flag); return; } PREPEND_PREFIX(Type_access_contents)(type, &ints, &aints, &types); /* first check for zero count on types where that makes sense */ switch(combiner) { case MPI_COMBINER_CONTIGUOUS: case MPI_COMBINER_VECTOR: case MPI_COMBINER_HVECTOR_INTEGER: case MPI_COMBINER_HVECTOR: case MPI_COMBINER_INDEXED_BLOCK: case MPI_COMBINER_HINDEXED_BLOCK: case MPI_COMBINER_INDEXED: case MPI_COMBINER_HINDEXED_INTEGER: case MPI_COMBINER_HINDEXED: case MPI_COMBINER_STRUCT_INTEGER: case MPI_COMBINER_STRUCT: if (ints[0] == 0) { PREPEND_PREFIX(Dataloop_create_contiguous)(0, MPI_INT, dlp_p, dlsz_p, dldepth_p, flag); goto clean_exit; } break; default: break; } /* recurse, processing types "below" this one before processing * this one, if those type don't already have dataloops. * * note: in the struct case below we'll handle any additional * types "below" the current one. */ MPIR_Type_get_envelope_impl(types[0], &dummy1, &dummy2, &dummy3, &type0_combiner); if (type0_combiner != MPI_COMBINER_NAMED) { DLOOP_Handle_get_loopptr_macro(types[0], old_dlp, flag); if (old_dlp == NULL) { /* no dataloop already present; create and store one */ PREPEND_PREFIX(Dataloop_create)(types[0], &old_dlp, &old_dlsz, &old_dldepth, flag); DLOOP_Handle_set_loopptr_macro(types[0], old_dlp, flag); DLOOP_Handle_set_loopsize_macro(types[0], old_dlsz, flag); DLOOP_Handle_set_loopdepth_macro(types[0], old_dldepth, flag); } else { DLOOP_Handle_get_loopsize_macro(types[0], old_dlsz, flag); DLOOP_Handle_get_loopdepth_macro(types[0], old_dldepth, flag); } } switch(combiner) { case MPI_COMBINER_DUP: if (type0_combiner != MPI_COMBINER_NAMED) { PREPEND_PREFIX(Dataloop_dup)(old_dlp, old_dlsz, dlp_p); *dlsz_p = old_dlsz; *dldepth_p = old_dldepth; } else { PREPEND_PREFIX(Dataloop_create_contiguous)(1, types[0], dlp_p, dlsz_p, dldepth_p, flag); } break; case MPI_COMBINER_RESIZED: if (type0_combiner != MPI_COMBINER_NAMED) { PREPEND_PREFIX(Dataloop_dup)(old_dlp, old_dlsz, dlp_p); *dlsz_p = old_dlsz; *dldepth_p = old_dldepth; } else { PREPEND_PREFIX(Dataloop_create_contiguous)(1, types[0], dlp_p, dlsz_p, dldepth_p, flag); (*dlp_p)->el_extent = aints[1]; /* extent */ } break; case MPI_COMBINER_CONTIGUOUS: PREPEND_PREFIX(Dataloop_create_contiguous)(ints[0] /* count */, types[0] /* oldtype */, dlp_p, dlsz_p, dldepth_p, flag); break; case MPI_COMBINER_VECTOR: PREPEND_PREFIX(Dataloop_create_vector)(ints[0] /* count */, ints[1] /* blklen */, ints[2] /* stride */, 0 /* stride not bytes */, types[0] /* oldtype */, dlp_p, dlsz_p, dldepth_p, flag); break; case MPI_COMBINER_HVECTOR_INTEGER: case MPI_COMBINER_HVECTOR: /* fortran hvector has integer stride in bytes */ if (combiner == MPI_COMBINER_HVECTOR_INTEGER) { stride = (MPI_Aint) ints[2]; } else { stride = aints[0]; } PREPEND_PREFIX(Dataloop_create_vector)(ints[0] /* count */, ints[1] /* blklen */, stride, 1 /* stride in bytes */, types[0] /* oldtype */, dlp_p, dlsz_p, dldepth_p, flag); break; case MPI_COMBINER_INDEXED_BLOCK: PREPEND_PREFIX(Dataloop_create_blockindexed)(ints[0] /* count */, ints[1] /* blklen */, &ints[2] /* disps */, 0 /* disp not bytes */, types[0] /* oldtype */, dlp_p, dlsz_p, dldepth_p, flag); break; case MPI_COMBINER_HINDEXED_BLOCK: disps = (MPI_Aint *) DLOOP_Malloc(ints[0] * sizeof(MPI_Aint)); for (i = 0; i < ints[0]; i++) disps[i] = aints[i]; PREPEND_PREFIX(Dataloop_create_blockindexed)(ints[0] /* count */, ints[1] /* blklen */, disps /* disps */, 1 /* disp is bytes */, types[0] /* oldtype */, dlp_p, dlsz_p, dldepth_p, flag); DLOOP_Free(disps); break; case MPI_COMBINER_INDEXED: PREPEND_PREFIX(Dataloop_create_indexed)(ints[0] /* count */, &ints[1] /* blklens */, &ints[ints[0]+1] /* disp */, 0 /* disp not in bytes */, types[0] /* oldtype */, dlp_p, dlsz_p, dldepth_p, flag); break; case MPI_COMBINER_HINDEXED_INTEGER: case MPI_COMBINER_HINDEXED: if (combiner == MPI_COMBINER_HINDEXED_INTEGER) { disps = (MPI_Aint *) DLOOP_Malloc(ints[0] * sizeof(MPI_Aint)); for (i=0; i < ints[0]; i++) { disps[i] = (MPI_Aint) ints[ints[0] + 1 + i]; } } else { disps = aints; } PREPEND_PREFIX(Dataloop_create_indexed)(ints[0] /* count */, &ints[1] /* blklens */, disps, 1 /* disp in bytes */, types[0] /* oldtype */, dlp_p, dlsz_p, dldepth_p, flag); if (combiner == MPI_COMBINER_HINDEXED_INTEGER) { DLOOP_Free(disps); } break; case MPI_COMBINER_STRUCT_INTEGER: case MPI_COMBINER_STRUCT: for (i = 1; i < ints[0]; i++) { int type_combiner; MPIR_Type_get_envelope_impl(types[i], &dummy1, &dummy2, &dummy3, &type_combiner); if (type_combiner != MPI_COMBINER_NAMED) { DLOOP_Handle_get_loopptr_macro(types[i], old_dlp, flag); if (old_dlp == NULL) { PREPEND_PREFIX(Dataloop_create)(types[i], &old_dlp, &old_dlsz, &old_dldepth, flag); DLOOP_Handle_set_loopptr_macro(types[i], old_dlp, flag); DLOOP_Handle_set_loopsize_macro(types[i], old_dlsz, flag); DLOOP_Handle_set_loopdepth_macro(types[i], old_dldepth, flag); } } } if (combiner == MPI_COMBINER_STRUCT_INTEGER) { disps = (MPI_Aint *) DLOOP_Malloc(ints[0] * sizeof(MPI_Aint)); for (i=0; i < ints[0]; i++) { disps[i] = (MPI_Aint) ints[ints[0] + 1 + i]; } } else { disps = aints; } err = PREPEND_PREFIX(Dataloop_create_struct)(ints[0] /* count */, &ints[1] /* blklens */, disps, types /* oldtype array */, dlp_p, dlsz_p, dldepth_p, flag); /* TODO if/when this function returns error codes, propagate this failure instead */ DLOOP_Assert(0 == err); /* if (err) return err; */ if (combiner == MPI_COMBINER_STRUCT_INTEGER) { DLOOP_Free(disps); } break; case MPI_COMBINER_SUBARRAY: ndims = ints[0]; PREPEND_PREFIX(Type_convert_subarray)(ndims, &ints[1] /* sizes */, &ints[1+ndims] /* subsizes */, &ints[1+2*ndims] /* starts */, ints[1+3*ndims] /* order */, types[0], &tmptype); PREPEND_PREFIX(Dataloop_create)(tmptype, dlp_p, dlsz_p, dldepth_p, flag); MPIR_Type_free_impl(&tmptype); break; case MPI_COMBINER_DARRAY: ndims = ints[2]; PREPEND_PREFIX(Type_convert_darray)(ints[0] /* size */, ints[1] /* rank */, ndims, &ints[3] /* gsizes */, &ints[3+ndims] /*distribs */, &ints[3+2*ndims] /* dargs */, &ints[3+3*ndims] /* psizes */, ints[3+4*ndims] /* order */, types[0], &tmptype); PREPEND_PREFIX(Dataloop_create)(tmptype, dlp_p, dlsz_p, dldepth_p, flag); MPIR_Type_free_impl(&tmptype); break; default: DLOOP_Assert(0); break; } clean_exit: PREPEND_PREFIX(Type_release_contents)(type, &ints, &aints, &types); /* for now we just leave the intermediate dataloops in place. * could remove them to save space if we wanted. */ return; }
int MPIR_Type_convert_darray(int size, int rank, int ndims, int *array_of_gsizes, int *array_of_distribs, int *array_of_dargs, int *array_of_psizes, int order, MPI_Datatype oldtype, MPI_Datatype * newtype) { int mpi_errno = MPI_SUCCESS; MPI_Datatype type_old, type_new = MPI_DATATYPE_NULL, types[3]; int procs, tmp_rank, i, tmp_size, blklens[3], *coords; MPI_Aint *st_offsets, orig_extent, disps[3]; MPIR_Datatype_get_extent_macro(oldtype, orig_extent); /* calculate position in Cartesian grid as MPI would (row-major ordering) */ coords = (int *) DLOOP_Malloc(ndims * sizeof(int), MPL_MEM_DATATYPE); procs = size; tmp_rank = rank; for (i = 0; i < ndims; i++) { procs = procs / array_of_psizes[i]; coords[i] = tmp_rank / procs; tmp_rank = tmp_rank % procs; } st_offsets = (MPI_Aint *) DLOOP_Malloc(ndims * sizeof(MPI_Aint), MPL_MEM_DATATYPE); type_old = oldtype; if (order == MPI_ORDER_FORTRAN) { /* dimension 0 changes fastest */ for (i = 0; i < ndims; i++) { switch (array_of_distribs[i]) { case MPI_DISTRIBUTE_BLOCK: mpi_errno = MPII_Type_block(array_of_gsizes, i, ndims, array_of_psizes[i], coords[i], array_of_dargs[i], order, orig_extent, type_old, &type_new, st_offsets + i); if (mpi_errno) MPIR_ERR_POP(mpi_errno); break; case MPI_DISTRIBUTE_CYCLIC: mpi_errno = MPII_Type_cyclic(array_of_gsizes, i, ndims, array_of_psizes[i], coords[i], array_of_dargs[i], order, orig_extent, type_old, &type_new, st_offsets + i); if (mpi_errno) MPIR_ERR_POP(mpi_errno); break; case MPI_DISTRIBUTE_NONE: /* treat it as a block distribution on 1 process */ mpi_errno = MPII_Type_block(array_of_gsizes, i, ndims, 1, 0, MPI_DISTRIBUTE_DFLT_DARG, order, orig_extent, type_old, &type_new, st_offsets + i); if (mpi_errno) MPIR_ERR_POP(mpi_errno); break; } if (i) MPIR_Type_free_impl(&type_old); type_old = type_new; } /* add displacement and UB */ disps[1] = st_offsets[0]; tmp_size = 1; for (i = 1; i < ndims; i++) { tmp_size *= array_of_gsizes[i - 1]; disps[1] += ((MPI_Aint) tmp_size) * st_offsets[i]; } /* rest done below for both Fortran and C order */ } else { /* order == MPI_ORDER_C */ /* dimension ndims-1 changes fastest */ for (i = ndims - 1; i >= 0; i--) { switch (array_of_distribs[i]) { case MPI_DISTRIBUTE_BLOCK: mpi_errno = MPII_Type_block(array_of_gsizes, i, ndims, array_of_psizes[i], coords[i], array_of_dargs[i], order, orig_extent, type_old, &type_new, st_offsets + i); if (mpi_errno) MPIR_ERR_POP(mpi_errno); break; case MPI_DISTRIBUTE_CYCLIC: mpi_errno = MPII_Type_cyclic(array_of_gsizes, i, ndims, array_of_psizes[i], coords[i], array_of_dargs[i], order, orig_extent, type_old, &type_new, st_offsets + i); if (mpi_errno) MPIR_ERR_POP(mpi_errno); break; case MPI_DISTRIBUTE_NONE: /* treat it as a block distribution on 1 process */ mpi_errno = MPII_Type_block(array_of_gsizes, i, ndims, array_of_psizes[i], coords[i], MPI_DISTRIBUTE_DFLT_DARG, order, orig_extent, type_old, &type_new, st_offsets + i); if (mpi_errno) MPIR_ERR_POP(mpi_errno); break; } if (i != ndims - 1) MPIR_Type_free_impl(&type_old); type_old = type_new; } /* add displacement and UB */ disps[1] = st_offsets[ndims - 1]; tmp_size = 1; for (i = ndims - 2; i >= 0; i--) { tmp_size *= array_of_gsizes[i + 1]; disps[1] += ((MPI_Aint) tmp_size) * st_offsets[i]; } } disps[1] *= orig_extent; disps[2] = orig_extent; for (i = 0; i < ndims; i++) disps[2] *= (MPI_Aint) (array_of_gsizes[i]); disps[0] = 0; blklens[0] = blklens[1] = blklens[2] = 1; types[0] = MPI_LB; types[1] = type_new; types[2] = MPI_UB; DLOOP_Free(st_offsets); DLOOP_Free(coords); mpi_errno = MPIR_Type_struct_impl(3, blklens, disps, types, newtype); if (mpi_errno) MPIR_ERR_POP(mpi_errno); MPIR_Type_free_impl(&type_new); fn_exit: return mpi_errno; fn_fail: goto fn_exit; }