static int block(const int *gsize_array, int dim, int ndims, int nprocs, int rank, int darg, int order, ptrdiff_t orig_extent, ompi_datatype_t *type_old, ompi_datatype_t **type_new, ptrdiff_t *st_offset) { int blksize, global_size, mysize, i, j, rc, start_loop, step; ptrdiff_t stride; global_size = gsize_array[dim]; if (darg == MPI_DISTRIBUTE_DFLT_DARG) blksize = (global_size + nprocs - 1) / nprocs; else { blksize = darg; } j = global_size - blksize*rank; mysize = blksize < j ? blksize : j; if (mysize < 0) mysize = 0; if (MPI_ORDER_C == order) { start_loop = ndims - 1 ; step = -1; } else { start_loop = 0 ; step = 1; } stride = orig_extent; if (dim == start_loop) { rc = ompi_datatype_create_contiguous(mysize, type_old, type_new); if (OMPI_SUCCESS != rc) return rc; } else { for (i = start_loop ; i != dim ; i += step) { stride *= gsize_array[i]; } rc = ompi_datatype_create_hvector(mysize, 1, stride, type_old, type_new); if (OMPI_SUCCESS != rc) return rc; } *st_offset = blksize * rank; /* in terms of no. of elements of type oldtype in this dimension */ if (mysize == 0) *st_offset = 0; return OMPI_SUCCESS; }
static int cyclic(const int *gsize_array, int dim, int ndims, int nprocs, int rank, int darg, int order, ptrdiff_t orig_extent, ompi_datatype_t* type_old, ompi_datatype_t **type_new, ptrdiff_t *st_offset) { int blksize, i, blklens[2], st_index, end_index, local_size, rem, count, rc; ptrdiff_t stride, disps[2]; ompi_datatype_t *type_tmp, *types[2]; if (darg == MPI_DISTRIBUTE_DFLT_DARG) { blksize = 1; } else { blksize = darg; } st_index = rank * blksize; end_index = gsize_array[dim] - 1; if (end_index < st_index) { local_size = 0; } else { local_size = ((end_index - st_index + 1)/(nprocs*blksize))*blksize; rem = (end_index - st_index + 1) % (nprocs*blksize); local_size += rem < blksize ? rem : blksize; } count = local_size / blksize; rem = local_size % blksize; stride = nprocs*blksize*orig_extent; if (order == MPI_ORDER_FORTRAN) { for (i=0; i<dim; i++) { stride *= gsize_array[i]; } } else { for (i=ndims-1; i>dim; i--) { stride *= gsize_array[i]; } } rc = ompi_datatype_create_hvector(count, blksize, stride, type_old, type_new); if (OMPI_SUCCESS != rc) return rc; if (rem) { /* if the last block is of size less than blksize, include it separately using MPI_Type_struct */ types [0] = *type_new; types [1] = type_old; disps [0] = 0; disps [1] = count*stride; blklens[0] = 1; blklens[1] = rem; rc = ompi_datatype_create_struct(2, blklens, disps, types, &type_tmp); ompi_datatype_destroy(type_new); /* even in error condition, need to destroy type_new, so check for error after destroy. */ if (OMPI_SUCCESS != rc) return rc; *type_new = type_tmp; } /* need to set the UB for block-cyclic to work */ disps[0] = 0; disps[1] = orig_extent; if (order == MPI_ORDER_FORTRAN) { for(i=0; i<=dim; i++) { disps[1] *= gsize_array[i]; } } else { for(i=ndims-1; i>=dim; i--) { disps[1] *= gsize_array[i]; } } rc = opal_datatype_resize( &(*type_new)->super, disps[0], disps[1] ); if (OMPI_SUCCESS != rc) return rc; *st_offset = rank * blksize; /* in terms of no. of elements of type oldtype in this dimension */ if (local_size == 0) *st_offset = 0; return OMPI_SUCCESS; }
static ompi_datatype_t* __ompi_datatype_create_from_args( int32_t* i, MPI_Aint* a, ompi_datatype_t** d, int32_t type ) { ompi_datatype_t* datatype = NULL; switch(type){ /******************************************************************/ case MPI_COMBINER_DUP: /* should we duplicate d[0]? */ /* ompi_datatype_set_args( datatype, 0, NULL, 0, NULL, 1, d[0], MPI_COMBINER_DUP ); */ assert(0); /* shouldn't happen */ break; /******************************************************************/ case MPI_COMBINER_CONTIGUOUS: ompi_datatype_create_contiguous( i[0], d[0], &datatype ); ompi_datatype_set_args( datatype, 1, (const int **) &i, 0, NULL, 1, d, MPI_COMBINER_CONTIGUOUS ); break; /******************************************************************/ case MPI_COMBINER_VECTOR: ompi_datatype_create_vector( i[0], i[1], i[2], d[0], &datatype ); { const int* a_i[3] = {&i[0], &i[1], &i[2]}; ompi_datatype_set_args( datatype, 3, a_i, 0, NULL, 1, d, MPI_COMBINER_VECTOR ); } break; /******************************************************************/ case MPI_COMBINER_HVECTOR_INTEGER: case MPI_COMBINER_HVECTOR: ompi_datatype_create_hvector( i[0], i[1], a[0], d[0], &datatype ); { const int* a_i[2] = {&i[0], &i[1]}; ompi_datatype_set_args( datatype, 2, a_i, 1, a, 1, d, MPI_COMBINER_HVECTOR ); } break; /******************************************************************/ case MPI_COMBINER_INDEXED: /* TO CHECK */ ompi_datatype_create_indexed( i[0], &(i[1]), &(i[1+i[0]]), d[0], &datatype ); { const int* a_i[3] = {&i[0], &i[1], &(i[1+i[0]])}; ompi_datatype_set_args( datatype, 2 * i[0] + 1, a_i, 0, NULL, 1, d, MPI_COMBINER_INDEXED ); } break; /******************************************************************/ case MPI_COMBINER_HINDEXED_INTEGER: case MPI_COMBINER_HINDEXED: ompi_datatype_create_hindexed( i[0], &(i[1]), a, d[0], &datatype ); { const int* a_i[2] = {&i[0], &i[1]}; ompi_datatype_set_args( datatype, i[0] + 1, a_i, i[0], a, 1, d, MPI_COMBINER_HINDEXED ); } break; /******************************************************************/ case MPI_COMBINER_INDEXED_BLOCK: ompi_datatype_create_indexed_block( i[0], i[1], &(i[2]), d[0], &datatype ); { const int* a_i[3] = {&i[0], &i[1], &i[2]}; ompi_datatype_set_args( datatype, i[0] + 2, a_i, 0, NULL, 1, d, MPI_COMBINER_INDEXED_BLOCK ); } break; /******************************************************************/ case MPI_COMBINER_STRUCT_INTEGER: case MPI_COMBINER_STRUCT: ompi_datatype_create_struct( i[0], &(i[1]), a, d, &datatype ); { const int* a_i[2] = {&i[0], &i[1]}; ompi_datatype_set_args( datatype, i[0] + 1, a_i, i[0], a, i[0], d, MPI_COMBINER_STRUCT ); } break; /******************************************************************/ case MPI_COMBINER_SUBARRAY: ompi_datatype_create_subarray( i[0], &i[1 + 0 * i[0]], &i[1 + 1 * i[0]], &i[1 + 2 * i[0]], i[1 + 3 * i[0]], d[0], &datatype ); { const int* a_i[5] = {&i[0], &i[1 + 0 * i[0]], &i[1 + 1 * i[0]], &i[1 + 2 * i[0]], &i[1 + 3 * i[0]]}; ompi_datatype_set_args( datatype, 3 * i[0] + 2, a_i, 0, NULL, 1, d, MPI_COMBINER_SUBARRAY); } break; /******************************************************************/ case MPI_COMBINER_DARRAY: ompi_datatype_create_darray( i[0] /* size */, i[1] /* rank */, i[2] /* ndims */, &i[3 + 0 * i[0]], &i[3 + 1 * i[0]], &i[3 + 2 * i[0]], &i[3 + 3 * i[0]], i[3 + 4 * i[0]], d[0], &datatype ); { const int* a_i[8] = {&i[0], &i[1], &i[2], &i[3 + 0 * i[0]], &i[3 + 1 * i[0]], &i[3 + 2 * i[0]], &i[3 + 3 * i[0]], &i[3 + 4 * i[0]]}; ompi_datatype_set_args( datatype, 4 * i[0] + 4,a_i, 0, NULL, 1, d, MPI_COMBINER_DARRAY); } break; /******************************************************************/ case MPI_COMBINER_F90_REAL: case MPI_COMBINER_F90_COMPLEX: /*pArgs->i[0] = i[0][0]; pArgs->i[1] = i[1][0]; */ break; /******************************************************************/ case MPI_COMBINER_F90_INTEGER: /*pArgs->i[0] = i[0][0];*/ break; /******************************************************************/ case MPI_COMBINER_RESIZED: ompi_datatype_create_resized(d[0], a[0], a[1], &datatype); ompi_datatype_set_args( datatype, 0, NULL, 2, a, 1, d, MPI_COMBINER_RESIZED ); break; /******************************************************************/ case MPI_COMBINER_HINDEXED_BLOCK: ompi_datatype_create_hindexed_block( i[0], i[1], a, d[0], &datatype ); { const int* a_i[2] = {&i[0], &i[1]}; ompi_datatype_set_args( datatype, 2 + i[0], a_i, i[0], a, 1, d, MPI_COMBINER_HINDEXED_BLOCK ); } break; /******************************************************************/ default: break; } return datatype; }
int32_t ompi_datatype_create_subarray(int ndims, int const* size_array, int const* subsize_array, int const* start_array, int order, const ompi_datatype_t* oldtype, ompi_datatype_t** newtype) { MPI_Datatype last_type; int32_t i, step, end_loop; MPI_Aint size, displ, extent; /** * If the oldtype contains the original MPI_LB and MPI_UB markers then we * are forced to follow the MPI standard suggestion and reset these 2 * markers (MPI 3.0 page 96 line 37). Otherwise we can simply resize the * datatype. */ ompi_datatype_type_extent( oldtype, &extent ); /* If the ndims is zero then return the NULL datatype */ if( ndims < 2 ) { if( 0 == ndims ) { *newtype = &ompi_mpi_datatype_null.dt; return MPI_SUCCESS; } ompi_datatype_create_contiguous( subsize_array[0], oldtype, &last_type ); size = size_array[0]; displ = start_array[0]; goto replace_subarray_type; } if( MPI_ORDER_C == order ) { i = ndims - 1; step = -1; end_loop = -1; } else { i = 0; step = 1; end_loop = ndims; } /* As we know that the ndims is at least 1 we can start by creating the * first dimension data outside the loop, such that we dont have to create * a duplicate of the oldtype just to be able to free it. */ ompi_datatype_create_vector( subsize_array[i+step], subsize_array[i], size_array[i], oldtype, newtype ); last_type = *newtype; size = (MPI_Aint)size_array[i] * (MPI_Aint)size_array[i+step]; displ = (MPI_Aint)start_array[i] + (MPI_Aint)start_array[i+step] * (MPI_Aint)size_array[i]; for( i += 2 * step; i != end_loop; i += step ) { ompi_datatype_create_hvector( subsize_array[i], 1, size * extent, last_type, newtype ); ompi_datatype_destroy( &last_type ); displ += size * start_array[i]; size *= size_array[i]; last_type = *newtype; } replace_subarray_type: /** * We need to shift the content (useful data) of the datatype, so * we need to force the displacement to be moved. Therefore, we * cannot use resize as it will only set the soft lb and ub * markers without moving the data. Instead, we have to create a * new data, and insert the last_Type with the correct * displacement. */ *newtype = ompi_datatype_create( last_type->super.desc.used ); ompi_datatype_add( *newtype, last_type, 1, displ * extent, size * extent); ompi_datatype_destroy( &last_type ); return OMPI_SUCCESS; }
int32_t ompi_datatype_create_subarray(int ndims, int const* size_array, int const* subsize_array, int const* start_array, int order, const ompi_datatype_t* oldtype, ompi_datatype_t** newtype) { MPI_Datatype last_type; int32_t i, step, end_loop; MPI_Aint size, displ, extent; /** * If the oldtype contains the original MPI_LB and MPI_UB markers then we * are forced to follow the MPI standard suggestion and reset these 2 * markers (MPI 3.0 page 96 line 37). Otherwise we can simply resize the * datatype. */ ompi_datatype_type_extent( oldtype, &extent ); /* If the ndims is zero then return the NULL datatype */ if( ndims < 2 ) { if( 0 == ndims ) { *newtype = &ompi_mpi_datatype_null.dt; return MPI_SUCCESS; } ompi_datatype_create_contiguous( subsize_array[0], oldtype, &last_type ); size = size_array[0]; displ = start_array[0]; goto replace_subarray_type; } if( MPI_ORDER_C == order ) { i = ndims - 1; step = -1; end_loop = -1; } else { i = 0; step = 1; end_loop = ndims; } /* As we know that the ndims is at least 1 we can start by creating the * first dimension data outside the loop, such that we dont have to create * a duplicate of the oldtype just to be able to free it. */ ompi_datatype_create_vector( subsize_array[i+step], subsize_array[i], size_array[i], oldtype, newtype ); last_type = *newtype; size = (MPI_Aint)size_array[i] * (MPI_Aint)size_array[i+step]; displ = (MPI_Aint)start_array[i] + (MPI_Aint)start_array[i+step] * (MPI_Aint)size_array[i]; for( i += 2 * step; i != end_loop; i += step ) { ompi_datatype_create_hvector( subsize_array[i], 1, size * extent, last_type, newtype ); ompi_datatype_destroy( &last_type ); displ += size * start_array[i]; size *= size_array[i]; last_type = *newtype; } replace_subarray_type: /* * Resized will only set the soft lb and ub markers without moving the real * data inside. Thus, in case the original data contains the hard markers * (MPI_LB or MPI_UB) we must force the displacement of the data upward to * the right position AND set the hard markers LB and UB. * * NTH: ompi_datatype_create_resized() does not do enough for the general * pack/unpack functions to work correctly. Until this is fixed always use * ompi_datatype_create_struct(). Once this is fixed remove 1 || below. To * verify that the regression is fixed run the subarray test in the Open MPI * ibm testsuite. */ if(1 || oldtype->super.flags & (OPAL_DATATYPE_FLAG_USER_LB | OPAL_DATATYPE_FLAG_USER_UB) ) { MPI_Aint displs[3]; MPI_Datatype types[3]; int blength[3] = { 1, 1, 1 }; displs[0] = 0; displs[1] = displ * extent; displs[2] = size * extent; types[0] = MPI_LB; types[1] = last_type; types[2] = MPI_UB; ompi_datatype_create_struct( 3, blength, displs, types, newtype ); } else { ompi_datatype_create_resized(last_type, displ * extent, size * extent, newtype); } ompi_datatype_destroy( &last_type ); return OMPI_SUCCESS; }
int32_t ompi_datatype_create_subarray(int ndims, int const* size_array, int const* subsize_array, int const* start_array, int order, const ompi_datatype_t* oldtype, ompi_datatype_t** newtype) { MPI_Datatype last_type; int32_t i, step, end_loop; MPI_Aint size, displ, extent; ompi_datatype_type_extent( oldtype, &extent ); /* If the ndims is zero then return the NULL datatype */ if( ndims < 2 ) { if( 0 == ndims ) { *newtype = &ompi_mpi_datatype_null.dt; return MPI_SUCCESS; } ompi_datatype_create_contiguous( subsize_array[0], oldtype, &last_type ); size = size_array[0]; displ = start_array[0]; goto replace_subarray_type; } if( MPI_ORDER_C == order ) { i = ndims - 1; step = -1; end_loop = -1; } else { i = 0; step = 1; end_loop = ndims; } /* As we know that the ndims is at least 1 we can start by creating the * first dimension data outside the loop, such that we dont have to create * a duplicate of the oldtype just to be able to free it. */ ompi_datatype_create_vector( subsize_array[i+step], subsize_array[i], size_array[i], oldtype, newtype ); last_type = *newtype; size = size_array[i] * size_array[i+step]; displ = start_array[i] + start_array[i+step] * size_array[i]; for( i += 2 * step; i != end_loop; i += step ) { ompi_datatype_create_hvector( subsize_array[i], 1, size * extent, last_type, newtype ); ompi_datatype_destroy( &last_type ); displ += size * start_array[i]; size *= size_array[i]; last_type = *newtype; } replace_subarray_type: /** * We cannot use resized here. Resized will only set the soft lb and ub markers * without moving the real data inside. What we need is to force the displacement * of the data upward to the right position AND set the LB and UB. A type * struct is the function we need. */ { MPI_Aint displs[3]; MPI_Datatype types[3]; int blength[3] = { 1, 1, 1 }; displs[0] = 0; displs[1] = displ * extent; displs[2] = size * extent; types[0] = MPI_LB; types[1] = last_type; types[2] = MPI_UB; ompi_datatype_create_struct( 3, blength, displs, types, newtype ); } ompi_datatype_destroy( &last_type ); return OMPI_SUCCESS; }