void create_pattern(gchar* name, PatternType type, gint iter, gint elem, gint level, GroupBlock* group) { Verbose("Creating pattern%d \"%s\" elem %d level %d\n", type, name, elem, level); Pattern* pattern = pattern_new(type, iter, elem, level); gint groupSize = (group? group->groupsize : size); gint groupRank; if (group) MPI_Comm_rank(group->mpicomm, &groupRank); else groupRank = rank; Verbose("GroupSize = %d, GroupRank = %d\n", groupSize, groupRank); MPI_Type_contiguous(elem, MPI_BYTE, &pattern->eType); MPI_Type_commit(&pattern->eType); pattern->type_size = 1; switch (type) { /* contiguous data */ case PATTERN1: { int array_sizes[] = { groupSize }; int array_subsizes[] = { 1 }; int array_starts[] = { groupRank }; MPI_Type_create_subarray( 1, /* number of array dimensions*/ array_sizes, /* number of eTypes in each dimension of the full array*/ array_subsizes, /* number of eTypes in each dimension of the subarray */ array_starts, /* starting coordinates of the subarray in each dimension*/ MPI_ORDER_C, /* array storage order flag (state) */ pattern->eType, /* eType (old datatype) */ &pattern->datatype); MPI_Type_commit(&pattern->datatype); break; } /* non-contiguous data */ case PATTERN2: { int array_sizes[] = { iter, groupSize }; int array_subsizes[] = { iter, 1 }; int array_starts[] = { 0, groupRank }; MPI_Type_create_subarray( 2, /* number of array dimensions*/ array_sizes, /* number of eTypes in each dimension of the full array*/ array_subsizes, /* number of eTypes in each dimension of the subarray */ array_starts, /* starting coordinates of the subarray in each dimension*/ MPI_ORDER_C, /* array storage order flag (state) */ pattern->eType, /* eType (old datatype) */ &pattern->datatype); MPI_Type_commit(&pattern->datatype); break; } default: Error("Pattern%d not yet supported!\n", type); } g_hash_table_insert(patternMap, name, pattern); }
void Domain::create_cart_3d() { for (int i=-1; i<=1; ++i) { for (int j=-1; j<=1; ++j) { for (int k=-1; k<=1; ++k) { if (!i && !j && !k) continue; // if neighbor is me, don't do anything int nint[3] = { loc_shape[0] , loc_shape[1] , loc_shape[2] }; int size[3] = { loc_shape[0]+2*Ng, loc_shape[1]+2*Ng, loc_shape[2]+2*Ng }; int Plx[3] = { Ng,Ng,nint[0] }; int Ply[3] = { Ng,Ng,nint[1] }; int Plz[3] = { Ng,Ng,nint[2] }; int Qlx[3] = { 0,Ng,nint[0]+Ng }; int Qly[3] = { 0,Ng,nint[1]+Ng }; int Qlz[3] = { 0,Ng,nint[2]+Ng }; int rel_index [3] = { i, j, k }; int start_send[3] = { Plx[i+1], Ply[j+1], Plz[k+1] }; int start_recv[3] = { Qlx[i+1], Qly[j+1], Qlz[k+1] }; int subsize[3] = { (1-abs(i))*nint[0] + abs(i)*Ng, (1-abs(j))*nint[1] + abs(j)*Ng, (1-abs(k))*nint[2] + abs(k)*Ng }; int index[3]; for (int d=0; d<3; ++d) { index[d] = mpi_index[d] + rel_index[d]; } int their_rank; MPI_Cart_rank(mpi_cart, index, &their_rank); neighbors.push_back(their_rank); send_tags.push_back(100*(+i+5) + 10*(+j+5) + 1*(+k+5)); recv_tags.push_back(100*(-i+5) + 10*(-j+5) + 1*(-k+5)); MPI_Datatype send, recv; MPI_Type_create_subarray(3, size, subsize, start_send, MPI_ORDER_C, mpi_type, &send); MPI_Type_create_subarray(3, size, subsize, start_recv, MPI_ORDER_C, mpi_type, &recv); MPI_Type_commit(&send); MPI_Type_commit(&recv); send_type.push_back(send); recv_type.push_back(recv); } } } }
static int sdf_plain_mesh_distribution(sdf_file_t *h) { #ifdef PARALLEL sdf_block_t *b = h->current_block; int n; int sizes[SDF_MAXDIMS], subsizes[SDF_MAXDIMS]; for (n=0; n < b->ndims; n++) { b->dims[n] -= 2 * b->ng; b->local_dims[n] -= 2 * b->ng; sizes[n] = (int)b->dims[n]; subsizes[n] = (int)b->local_dims[n]; } // Get starts for creating subarray sdf_factor(h); MPI_Type_create_subarray(b->ndims, sizes, subsizes, b->starts, MPI_ORDER_FORTRAN, b->mpitype, &b->distribution); MPI_Type_commit(&b->distribution); b->nelements_local = 1; for (n=0; n < b->ndims; n++) { b->dims[n] += 2 * b->ng; b->local_dims[n] += 2 * b->ng; b->nelements_local *= b->local_dims[n]; } #endif return 0; }
/* subarray_2d_c_test2() * * Returns the number of errors encountered. */ int subarray_2d_c_test2(void) { MPI_Datatype subarray; int array[12] = { -1, -2, -3, -4, 1, 2, -5, -6, -7, -8, -9, -10 }; int array_size[2] = { 2, 6 }; int array_subsize[2] = { 1, 2 }; int array_start[2] = { 0, 4 }; int i, err, errs = 0, sizeoftype; /* set up type */ err = MPI_Type_create_subarray(2, /* dims */ array_size, array_subsize, array_start, MPI_ORDER_C, MPI_INT, &subarray); if (err != MPI_SUCCESS) { errs++; if (verbose) { fprintf(stderr, "error in MPI_Type_create_subarray call; aborting after %d errors\n", errs); } return errs; } MPI_Type_commit(&subarray); MPI_Type_size(subarray, &sizeoftype); if (sizeoftype != 2 * sizeof(int)) { errs++; if (verbose) fprintf(stderr, "size of type = %d; should be %d\n", sizeoftype, (int) (2 * sizeof(int))); return errs; } err = pack_and_unpack((char *) array, 1, subarray, 12 * sizeof(int)); for (i = 0; i < 12; i++) { int goodval; switch (i) { case 4: goodval = 1; break; case 5: goodval = 2; break; default: goodval = 0; break; } if (array[i] != goodval) { errs++; if (verbose) fprintf(stderr, "array[%d] = %d; should be %d\n", i, array[i], goodval); } } MPI_Type_free(&subarray); return errs; }
void ompi_type_create_subarray_f(MPI_Fint *ndims, MPI_Fint *size_array, MPI_Fint *subsize_array, MPI_Fint *start_array, MPI_Fint *order, MPI_Fint *oldtype, MPI_Fint *newtype, MPI_Fint *ierr) { int c_ierr; MPI_Datatype c_old; MPI_Datatype c_new; OMPI_ARRAY_NAME_DECL(size_array); OMPI_ARRAY_NAME_DECL(subsize_array); OMPI_ARRAY_NAME_DECL(start_array); c_old = MPI_Type_f2c(*oldtype); OMPI_ARRAY_FINT_2_INT(size_array, *ndims); OMPI_ARRAY_FINT_2_INT(subsize_array, *ndims); OMPI_ARRAY_FINT_2_INT(start_array, *ndims); c_ierr = MPI_Type_create_subarray(OMPI_FINT_2_INT(*ndims), OMPI_ARRAY_NAME_CONVERT(size_array), OMPI_ARRAY_NAME_CONVERT(subsize_array), OMPI_ARRAY_NAME_CONVERT(start_array), *order, c_old, &c_new); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); if (MPI_SUCCESS == c_ierr) { *newtype = MPI_Type_c2f(c_new); } OMPI_ARRAY_FINT_2_INT_CLEANUP(size_array); OMPI_ARRAY_FINT_2_INT_CLEANUP(subsize_array); OMPI_ARRAY_FINT_2_INT_CLEANUP(start_array); }
/* Definitions of Fortran Wrapper routines */ FORTRAN_API void FORT_CALL mpi_type_create_subarray_(MPI_Fint *ndims, MPI_Fint *array_of_sizes, MPI_Fint *array_of_subsizes, MPI_Fint *array_of_starts, MPI_Fint *order, MPI_Fint *oldtype, MPI_Fint *newtype, MPI_Fint *__ierr ) { int i; int *l_array_of_sizes = 0; int local_l_array_of_sizes[MPIR_USE_LOCAL_ARRAY]; int *l_array_of_subsizes = 0; int local_l_array_of_subsizes[MPIR_USE_LOCAL_ARRAY]; int *l_array_of_starts = 0; int local_l_array_of_starts[MPIR_USE_LOCAL_ARRAY]; MPI_Datatype oldtype_c, newtype_c; oldtype_c = MPI_Type_f2c(*oldtype); if ((int)*ndims > 0) { if ((int)*ndims > MPIR_USE_LOCAL_ARRAY) { MPIR_FALLOC(l_array_of_sizes,(int *) MALLOC( *ndims * sizeof(int) ), MPIR_COMM_WORLD, MPI_ERR_EXHAUSTED, "MPI_TYPE_CREATE_SUBARRAY" ); MPIR_FALLOC(l_array_of_subsizes,(int *) MALLOC( *ndims * sizeof(int) ), MPIR_COMM_WORLD, MPI_ERR_EXHAUSTED, "MPI_TYPE_CREATE_SUBARRAY" ); MPIR_FALLOC(l_array_of_starts,(int *) MALLOC( *ndims * sizeof(int) ), MPIR_COMM_WORLD, MPI_ERR_EXHAUSTED, "MPI_TYPE_CREATE_SUBARRAY" ); } else { l_array_of_sizes = local_l_array_of_sizes; l_array_of_subsizes = local_l_array_of_subsizes; l_array_of_starts = local_l_array_of_starts; } for (i=0; i<(int)*ndims; i++) { l_array_of_sizes[i] = (int)array_of_sizes[i]; l_array_of_subsizes[i] = (int)array_of_subsizes[i]; l_array_of_starts[i] = (int)array_of_starts[i]; } } *__ierr = MPI_Type_create_subarray((int)*ndims, l_array_of_sizes, l_array_of_subsizes, l_array_of_starts, (int)*order, oldtype_c, &newtype_c); if ((int)*ndims > MPIR_USE_LOCAL_ARRAY) { FREE( l_array_of_sizes ); FREE( l_array_of_subsizes ); FREE( l_array_of_starts ); } if (*__ierr == MPI_SUCCESS) *newtype = MPI_Type_c2f(newtype_c); }
int main(int argc, char *argv[]) { int myrank; MPI_Datatype subarray; int array_size[] = {X, Y, Z}; int array_subsize[] = {X/2, Y/2, Z}; int array_start[] = {0, 0, 0}; int i, j, k; int errs = 0; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &myrank); for (i = 0; i < X; ++i) { for (j = 0; j < Y; ++j) { for (k = 0; k < Z; ++k) { if (myrank == 0) array[i][j][k] = 2.0; else array[i][j][k] = -2.0; } } } MPI_Type_create_subarray(3, array_size, array_subsize, array_start, MPI_ORDER_C, MPI_DOUBLE, &subarray); MPI_Type_commit(&subarray); if(myrank == 0) MPI_Send(array, 1, subarray, 1, 0, MPI_COMM_WORLD); else { MPI_Recv(array, 1, subarray, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); for (i = array_start[0]; i < array_subsize[0]; ++i) { for (j = array_start[1]; j < array_subsize[1]; ++j) { for (k = array_start[2]; k < array_subsize[2]; ++k) { if (array[i][j][k] != 2.0) ++errs; } } } } MPI_Type_free(&subarray); MPI_Allreduce(MPI_IN_PLACE, &errs, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); if (myrank == 0) { if (errs) printf("Found %d errors\n", errs); else printf(" No Errors\n"); } MPI_Finalize(); return 0; }
/** Convert an ARMCI strided access description into an MPI subarray datatype. * * @param[in] stride_array Array of strides * @param[in] count Array of transfer counts * @param[in] stride_levels Number of levels of striding * @param[in] old_type Type of the data element described by count and stride_array * @param[out] new_type New MPI type for the given strided access */ void ARMCII_Strided_to_dtype(int stride_array[/*stride_levels*/], int count[/*stride_levels+1*/], int stride_levels, MPI_Datatype old_type, MPI_Datatype *new_type) { int sizes [stride_levels+1]; int subsizes[stride_levels+1]; int starts [stride_levels+1]; int i, old_type_size; MPI_Type_size(old_type, &old_type_size); /* Eliminate counts that don't count (all 1 counts at the end) */ for (i = stride_levels+1; i > 0 && stride_levels > 0 && count[i-1] == 1; i--) stride_levels--; /* A correct strided spec should me monotonic increasing and stride_array[i+1] should be a multiple of stride_array[i]. */ if (stride_levels > 0) { for (i = 1; i < stride_levels; i++) ARMCII_Assert(stride_array[i] >= stride_array[i-1] && stride_array[i] % stride_array[i-1] == 0); } /* Test for a contiguous transfer */ if (stride_levels == 0) { int elem_count = count[0]/old_type_size; ARMCII_Assert(count[0] % old_type_size == 0); MPI_Type_contiguous(elem_count, old_type, new_type); } /* Transfer is non-contiguous */ else { for (i = 0; i < stride_levels+1; i++) starts[i] = 0; sizes [stride_levels] = stride_array[0]/old_type_size; subsizes[stride_levels] = count[0]/old_type_size; ARMCII_Assert(stride_array[0] % old_type_size == 0 && count[0] % old_type_size == 0); for (i = 1; i < stride_levels; i++) { /* Convert strides into dimensions by dividing out contributions from lower dims */ sizes [stride_levels-i] = stride_array[i]/stride_array[i-1]; subsizes[stride_levels-i] = count[i]; ARMCII_Assert_msg(stride_array[i] % stride_array[i-1] == 0, "Invalid striding"); } sizes [0] = count[stride_levels]; subsizes[0] = count[stride_levels]; MPI_Type_create_subarray(stride_levels+1, sizes, subsizes, starts, MPI_ORDER_C, old_type, new_type); } }
void mpi_type_create_subarray_(int *ndims,int *array_of_sizes, int *array_of_subsizes,int *array_of_starts, int *order,MPI_Fint *oldtype, MPI_Fint *newtype, int *__ierr ) { MPI_Datatype oldtype_c, newtype_c; oldtype_c = MPI_Type_f2c(*oldtype); *__ierr = MPI_Type_create_subarray(*ndims,array_of_sizes,array_of_subsizes,array_of_starts,*order,oldtype_c,&newtype_c); *newtype = MPI_Type_c2f(newtype_c); }
//-------------------------------------------------------------------------- // // creates a subarray datatype // // num_dims: number of dimensions in the subarray // full_size: full sizes of the array ([x][y][z] order) // start_pos: starting indices of the array ([x][y][z] order) // sub_size: desired sizes of subarray ([x][y][z] order) // base_type: data type of array elements // type: new (output) data type // // returns: error code // int DIY_Create_subarray_datatype(int num_dims, int *full_size, int *sub_size, int *start_pos, DIY_Datatype base_type, DIY_Datatype *type) { // fortran order below is not a bug: I always want [x][y][z] order MPI_Type_create_subarray(num_dims, full_size, sub_size, start_pos, MPI_ORDER_FORTRAN, base_type, type); MPI_Type_commit(type); dtype_absolute_address = false; return 0; }
/* * Setup order-Fortran subarray type info and handlers. * * A 2D-subarray datatype specified with order Fortran and located in the right * bottom of the full array is created by using input parameters. * Number of elements in the dimensions of the full array: {stride, nblock + lb} * Number of elements in the dimensions of the subarray: {blocklen, nblock} * Starting of the subarray in each dimension: {stride - blocklen, lb} * order: MPI_ORDER_FORTRAN * oldtype: oldtype */ static int MTestTypeSubArrayOrderFortranCreate(MPI_Aint nblock, MPI_Aint blocklen, MPI_Aint stride, MPI_Aint lb, MPI_Datatype oldtype, const char *typename_prefix, MTestDatatype * mtype) { int merr; char type_name[128]; MTestTypeReset(mtype); merr = MPI_Type_size(oldtype, &mtype->basesize); if (merr) MTestPrintError(merr); /* use the same row and col as that of order-c subarray for buffer * initialization and check because we access buffer in order-c */ mtype->arr_sizes[0] = nblock + lb; /* {row, col} */ mtype->arr_sizes[1] = stride; mtype->arr_subsizes[0] = nblock; /* {row, col} */ mtype->arr_subsizes[1] = blocklen; mtype->arr_starts[0] = lb; /* {row, col} */ mtype->arr_starts[1] = stride - blocklen; mtype->order = MPI_ORDER_FORTRAN; /* reverse row and col when create datatype so that we can get the same * packed data on the other side in order to reuse the contig check function */ int arr_sizes[2] = { mtype->arr_sizes[1], mtype->arr_sizes[0] }; int arr_subsizes[2] = { mtype->arr_subsizes[1], mtype->arr_subsizes[0] }; int arr_starts[2] = { mtype->arr_starts[1], mtype->arr_starts[0] }; merr = MPI_Type_create_subarray(2, arr_sizes, arr_subsizes, arr_starts, mtype->order, oldtype, &mtype->datatype); if (merr) MTestPrintError(merr); merr = MPI_Type_commit(&mtype->datatype); if (merr) MTestPrintError(merr); memset(type_name, 0, sizeof(type_name)); sprintf(type_name, "%s %s (full{%d,%d}, sub{%d,%d},start{%d,%d})", typename_prefix, "subarray-f", arr_sizes[0], arr_sizes[1], arr_subsizes[0], arr_subsizes[1], arr_starts[0], arr_starts[1]); merr = MPI_Type_set_name(mtype->datatype, (char *) type_name); if (merr) MTestPrintError(merr); mtype->InitBuf = MTestTypeSubarrayInit; mtype->FreeBuf = MTestTypeFree; mtype->CheckBuf = MTestTypeSubarrayCheckbuf; return merr; }
void main (int argc, char *argv[]) { int my_rank, size, i; int ndims, array_of_sizes[1], array_of_subsizes[1]; int array_of_starts[1], order; MPI_File fh; MPI_Datatype etype; MPI_Datatype filetype; ____ disp; MPI_Status status; char buf; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); MPI_Comm_size(MPI_COMM_WORLD, &size); etype = MPI_CHAR; ndims = ____; array_of_sizes[0] = ____; array_of_subsizes[0] = ____; array_of_starts[0] = ____; order = MPI_ORDER_C; MPI_Type_create_subarray(ndims, array_of_sizes, array_of_subsizes, array_of_starts, order, etype, &filetype); MPI_Type_____; MPI_File_open(MPI_COMM_WORLD, "my_test_file", MPI_MODE_____ | MPI_MODE_____ , MPI_INFO_NULL, &fh); disp = ____; MPI_File_set_view(fh, disp, etype, filetype, "native", MPI_INFO_NULL); for (i=0; i<3; i++) { buf = 'a' + (char)my_rank; MPI_File_write(fh, &buf, ____, ____, &status); } MPI_File_close(&fh); printf ("PE%d\n", my_rank); MPI_Finalize(); }
void Domain::create_cart_1d() { for (int i=-1; i<=1; ++i) { if (!i) continue; // if neighbor is me, don't do anything int nint[1] = { loc_shape[0] }; int size[1] = { loc_shape[0]+2*Ng }; int Plx[3] = { Ng,Ng,nint[0] }; int Qlx[3] = { 0,Ng,nint[0]+Ng }; int rel_index [1] = { i }; int start_send[1] = { Plx[i+1] }; int start_recv[1] = { Qlx[i+1] }; int subsize[1] = { (1-abs(i))*nint[0] + abs(i)*Ng }; int index[1]; for (int d=0; d<1; ++d) { index[d] = mpi_index[d] + rel_index[d]; } int their_rank; MPI_Cart_rank(mpi_cart, index, &their_rank); neighbors.push_back(their_rank); send_tags.push_back(1*(+i+5)); recv_tags.push_back(1*(-i+5)); MPI_Datatype send, recv; MPI_Type_create_subarray(1, size, subsize, start_send, MPI_ORDER_C, mpi_type, &send); MPI_Type_create_subarray(1, size, subsize, start_recv, MPI_ORDER_C, mpi_type, &recv); MPI_Type_commit(&send); MPI_Type_commit(&recv); send_type.push_back(send); recv_type.push_back(recv); } }
void distribute_matrix(double **matrix, double *global_mat_ptr, int *sendCounts, int *displs, int * global_size, int *local_size) { int start[2]= {0,0}; double *local_ptr =&(matrix[0][0]); MPI_Datatype subType; MPI_Datatype type; MPI_Type_create_subarray(2, global_size, local_size, start, MPI_ORDER_C, MPI_DOUBLE, &subType); MPI_Type_create_resized(subType, 0, local_size[1]*sizeof(double), &type); MPI_Type_commit(&type); MPI_Scatterv(global_mat_ptr, sendCounts, displs, type, local_ptr, local_size[0]*local_size[1], MPI_DOUBLE, 0, MPI_COMM_WORLD); MPI_Type_free(&type); }
void gather_submatrices(double **matrix, double *global_mat_ptr, int *sendCounts, int *displs, int global_a_row, int *local_size, int my_rank) { int start[2]= {0,0}, global_size[2]= {global_a_row,global_a_row}; double *local_ptr =&(matrix[0][0]); if (my_rank == 0) MPI_Datatype subType; MPI_Datatype type; MPI_Type_create_subarray(2, global_size, local_size, start, MPI_ORDER_C, MPI_DOUBLE, &subType); MPI_Type_create_resized(subType, 0, local_size[1]*sizeof(double), &type); MPI_Type_commit(&type); //printf("Global 0 : %d global 1 : %d local 0 : %d local 1 : %d\n",global_size[0],global_size[1],local_size[0], local_size[1]); MPI_Gatherv(local_ptr, local_size[0]*local_size[1],MPI_DOUBLE,global_mat_ptr, sendCounts, displs, type, 0, MPI_COMM_WORLD); MPI_Type_free(&type); }
/* * Setup order-C subarray type info and handlers. * * A 2D-subarray datatype specified with order C and located in the right-bottom * of the full array is created by using input parameters. * Number of elements in the dimensions of the full array: {nblock + lb, stride} * Number of elements in the dimensions of the subarray: {nblock, blocklen} * Starting of the subarray in each dimension: {1, stride - blocklen} * order: MPI_ORDER_C * oldtype: oldtype */ static int MTestTypeSubArrayOrderCCreate(MPI_Aint nblock, MPI_Aint blocklen, MPI_Aint stride, MPI_Aint lb, MPI_Datatype oldtype, const char *typename_prefix, MTestDatatype * mtype) { int merr; char type_name[128]; MTestTypeReset(mtype); merr = MPI_Type_size(oldtype, &mtype->basesize); if (merr) MTestPrintError(merr); mtype->arr_sizes[0] = nblock + lb; /* {row, col} */ mtype->arr_sizes[1] = stride; mtype->arr_subsizes[0] = nblock; /* {row, col} */ mtype->arr_subsizes[1] = blocklen; mtype->arr_starts[0] = lb; /* {row, col} */ mtype->arr_starts[1] = stride - blocklen; mtype->order = MPI_ORDER_C; merr = MPI_Type_create_subarray(2, mtype->arr_sizes, mtype->arr_subsizes, mtype->arr_starts, mtype->order, oldtype, &mtype->datatype); if (merr) MTestPrintError(merr); merr = MPI_Type_commit(&mtype->datatype); if (merr) MTestPrintError(merr); memset(type_name, 0, sizeof(type_name)); sprintf(type_name, "%s %s (full{%d,%d}, sub{%d,%d},start{%d,%d})", typename_prefix, "subarray-c", mtype->arr_sizes[0], mtype->arr_sizes[1], mtype->arr_subsizes[0], mtype->arr_subsizes[1], mtype->arr_starts[0], mtype->arr_starts[1]); merr = MPI_Type_set_name(mtype->datatype, (char *) type_name); if (merr) MTestPrintError(merr); mtype->InitBuf = MTestTypeSubarrayInit; mtype->FreeBuf = MTestTypeFree; mtype->CheckBuf = MTestTypeSubarrayCheckbuf; return merr; }
void BIL_Pio_read_raw_blocks(MPI_Comm all_readers_comm, MPI_Comm io_comm, int num_blocks, BIL_Block* blocks) { int i; for (i = 0; i < num_blocks; i++) { MPI_File fp; BIL_Timing_fopen_start(all_readers_comm); assert(MPI_File_open(io_comm, blocks[i].file_name, MPI_MODE_RDONLY, BIL->io_hints, &fp) == MPI_SUCCESS); BIL_Timing_fopen_stop(all_readers_comm); // Get variable and subarray datatype for I/O. MPI_Datatype var_type; assert(MPI_Type_contiguous(blocks[i].var_size, MPI_BYTE, &var_type) == MPI_SUCCESS); assert(MPI_Type_commit(&var_type) == MPI_SUCCESS); // added by TP MPI_Datatype file_type; assert(MPI_Type_create_subarray(blocks[i].num_dims, blocks[i].file_dim_sizes, blocks[i].sizes, blocks[i].starts, MPI_ORDER_C, var_type, &file_type) == MPI_SUCCESS); assert(MPI_Type_commit(&file_type) == MPI_SUCCESS); assert(MPI_File_set_view(fp, BIL->io_header_size, var_type, file_type, (char *)"native", MPI_INFO_NULL) == MPI_SUCCESS); // Allocate data and read it collectively. blocks[i].data = BIL_Misc_malloc(blocks[i].total_size * blocks[i].var_size); BIL_Timing_io_start(all_readers_comm); assert(MPI_File_read(fp, blocks[i].data, blocks[i].total_size, var_type, MPI_STATUS_IGNORE) == MPI_SUCCESS); BIL_Timing_io_stop(all_readers_comm, blocks[i].total_size * blocks[i].var_size); // Clean up. MPI_File_close(&fp); MPI_Type_free(&var_type); MPI_Type_free(&file_type); } }
void MPIIO_WriteData(simulation_data *sim, char *Filename) { int dimuids[3]={sim->global_dims[2], sim->global_dims[1], sim->global_dims[0]}; int f, rc, ustart[3], ucount[3]; MPI_Offset disp = 0; long offset; MPI_File filehandle; MPI_Datatype filetype; rc = MPI_File_open(sim->comm_cart, Filename, MPI_MODE_CREATE | MPI_MODE_WRONLY, MPI_INFO_NULL, &filehandle); ustart[2] = sim->grid.Ncolumns * sim->coords[0]; ustart[1] = sim->grid.Nrows * sim->coords[1]; ustart[0] = 0; ucount[2] = sim->grid.Ncolumns; ucount[1] = sim->grid.Nrows; ucount[0] = sim->grid.Nlevels; // Create the subarray representing the local block MPI_Type_create_subarray(3, dimuids, ucount, ustart, MPI_ORDER_C, MPI_FLOAT, &filetype); MPI_Type_commit(&filetype); for(f=0; f < NFIELDS; f++) { MPI_File_set_view(filehandle, disp, MPI_FLOAT, filetype, "native", MPI_INFO_NULL); MPI_File_write_all(filehandle, sim->grid.data[f], ucount[0]*ucount[1]*ucount[2], MPI_FLOAT, MPI_STATUS_IGNORE); disp += sim->global_dims[2] * sim->global_dims[1] * sim->global_dims[0] * sizeof(float); } MPI_File_close(&filehandle); MPI_Type_free(&filetype); }
/* * Construct a sub array type for scatter and gather data * Reference: http://stackoverflow.com/questions/9269399/sending-blocks-of-2d-array-in-c-using-mpi/9271753#9271753 */ void init_subarrtype(int root, int me, int n, int dim_sz, int per_n, MPI_Datatype* subarrtype_addr, int sendcounts[], int displs[]) { int sizes[2] = {n, n}; /* global size */ int subsizes[2] = {per_n, per_n}; /* local size */ int starts[2] = {0,0}; /* where this one starts */ MPI_Datatype type; mpi_check(MPI_Type_create_subarray(2, sizes, subsizes, starts, MPI_ORDER_C, MPI_DOUBLE, &type)); mpi_check(MPI_Type_create_resized(type, 0, per_n*sizeof(double), subarrtype_addr)); mpi_check(MPI_Type_commit(subarrtype_addr)); int i,j; if(me == root) { for (i=0; i< dim_sz*dim_sz; i++) { sendcounts[i] = 1; } int disp = 0; for (i=0; i<dim_sz; i++) { for (j=0; j<dim_sz; j++) { displs[i*dim_sz+j] = disp; disp += 1; } disp += (per_n-1)*dim_sz; } } }
int main(int argc, char* argv[]) { int n, my_rank; int array_of_subsizes[NDIMS], array_of_starts[NDIMS], array_of_sizes[NDIMS]; int size = 4; int sqrtn; int ln; MPI_Datatype filetype, memtype; MPI_File fh; char hdr[128]; int header_bytes; unsigned char *cur; char name[128]; int resultlen; int ret; int i, j; /* Initialize MPI. */ MPI_Init(&argc, &argv); MPI_Errhandler_set(MPI_COMM_WORLD, MPI_ERRORS_RETURN); /* Learn my rank and the total number of processors. */ MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); MPI_Comm_size(MPI_COMM_WORLD, &n); /* Speak! */ MPI_Get_processor_name(name, &resultlen); printf("process %d running on %s\n", my_rank, name); /* Set up our values. */ sqrtn = (int)sqrt(n); ln = size/sqrtn; printf("n = %d, sqrtn = %d, ln = %d storage = %d\n", n, sqrtn, ln, (ln + 2) * (ln + 2)); /* Allocation storage. */ if (!(cur = calloc((ln + 2) * (ln + 2), 1))) return ERR; /* Initialize data. */ for (i = 1; i < ln + 1; i++) for (j = 1; j < ln + 1; j++) cur[i * (ln + 2) + j] = my_rank; /* Create a subarray type for the file. */ array_of_sizes[0] = array_of_sizes[1] = size; array_of_subsizes[0] = array_of_subsizes[1] = ln; array_of_starts[0] = my_rank/sqrtn * ln; array_of_starts[1] = (my_rank % sqrtn) * ln; if ((ret = MPI_Type_create_subarray(NDIMS, array_of_sizes, array_of_subsizes, array_of_starts, MPI_ORDER_C, MPI_BYTE, &filetype))) MPIERR(ret); if ((ret = MPI_Type_commit(&filetype))) MPIERR(ret); /* Create a subarray type for memory. */ array_of_sizes[0] = array_of_sizes[1] = ln + 2; array_of_subsizes[0] = array_of_subsizes[1] = ln; array_of_starts[0] = array_of_starts[1] = 1; if ((ret = MPI_Type_create_subarray(NDIMS, array_of_sizes, array_of_subsizes, array_of_starts, MPI_ORDER_C, MPI_BYTE, &memtype))) MPIERR(ret); if ((ret = MPI_Type_commit(&memtype))) MPIERR(ret); MPI_File_delete(FILE_NAME, MPI_INFO_NULL); if ((ret = MPI_File_open(MPI_COMM_WORLD, FILE_NAME, MPI_MODE_CREATE|MPI_MODE_RDWR, MPI_INFO_NULL, &fh))) MPIERR(ret); /* Create header info, and have process 0 write it to the file. */ sprintf(hdr, "P5\n%d %d\n255\n", size, size); header_bytes = strlen(hdr); if ((ret = MPI_File_write_all(fh, hdr, header_bytes, MPI_BYTE, MPI_STATUS_IGNORE))) MPIERR(ret); /* Set the file view to translate our memory data into the file's data layout. */ MPI_File_set_view(fh, header_bytes, MPI_BYTE, filetype, "native", MPI_INFO_NULL); /* Write the output. */ MPI_File_write(fh, cur, 1, memtype, MPI_STATUS_IGNORE); if ((ret = MPI_File_close(&fh))) MPIERR(ret); MPI_Finalize(); return 0; }
int ADCL_subarray_init ( int ntopodim, int nvecdims, int *vecdims, int hwidth, int nc, int order, int nneigh, MPI_Datatype btype, int ndats, MPI_Datatype **senddats, MPI_Datatype **recvdats) { int i, j, k; int ret = ADCL_SUCCESS; int *subdims=NULL, *sstarts=NULL, *rstarts=NULL; MPI_Datatype *sdats=NULL, *rdats=NULL; subdims = ( int*) malloc ( nvecdims * sizeof(int) ); if ( NULL == subdims ) { return ADCL_NO_MEMORY; } sstarts = ( int*) malloc ( nvecdims * sizeof(int) ); rstarts = ( int*) malloc ( nvecdims * sizeof(int) ); if ( NULL == sstarts || NULL == rstarts ) { ret = ADCL_NO_MEMORY; goto exit; } sdats = ( MPI_Datatype *) malloc ( ndats * sizeof(MPI_Datatype)); rdats = ( MPI_Datatype *) malloc ( ndats * sizeof(MPI_Datatype)); if ( NULL == sdats || NULL == rdats ) { ret = ADCL_NO_MEMORY; goto exit; } if ( nc > 0 ) { subdims[nvecdims-1] = nc; sstarts[nvecdims-1] = 0; rstarts[nvecdims-1] = 0; } /* Loop over all topology dimensions */ for ( i = 0; i < ntopodim; i++ ) { /* handle left and right neighbor separatly */ for ( j=2*i; j<= 2*i+1; j++ ) { /* Set subdims and starts arrays. Basically, subdims is in each direction the total extent of the according dimension of the data array without the halo-cells except for the dimension which we are currently dealing with. For this dimension it is 1. The starts arrays are 1 for all dimensions except for the dimension (lets say k) which we are dealing with. There it is for sending: - 1 for the left neighbor, - ldims[k]-2*HWIDTH for the right neighbor for receiving: - 0 for the left neighbor - ldims[k]-HWDITH for the right neighbor */ for ( k=0; k < ntopodim; k++ ) { if ( k == i ) { subdims[k] = hwidth; sstarts[k] = (j == 2*i) ? hwidth : (vecdims[k]-2*hwidth); rstarts[k] = (j == 2*i) ? 0 : (vecdims[k]-hwidth); } else { subdims[k] = vecdims[k]- 2*hwidth; sstarts[k] = hwidth; rstarts[k] = hwidth; } } MPI_Type_create_subarray ( nvecdims, vecdims, subdims, sstarts, order, btype, &(sdats[j])); MPI_Type_create_subarray ( nvecdims, vecdims, subdims, rstarts, order, btype, &(rdats[j])); MPI_Type_commit ( &(sdats[j])); MPI_Type_commit ( &(rdats[j])); } } if ( nneigh > ntopodim ) { if ( ntopodim == 2 && nneigh == 4 ) { subdims[0] = hwidth; subdims[1] = hwidth; /* lower left and upper right corner */ for ( j = 0; j<2; j++ ) { sstarts[0] = ( j == 0 ) ? hwidth : vecdims[0]-2*hwidth; sstarts[1] = ( j == 0 ) ? hwidth : vecdims[1]-2*hwidth; rstarts[0] = ( j == 0 ) ? 0 : vecdims[0]-hwidth; rstarts[1] = ( j == 0 ) ? 0 : vecdims[1]-hwidth; MPI_Type_create_subarray ( nvecdims, vecdims, subdims, sstarts, order, btype, &(sdats[4+j])); MPI_Type_create_subarray ( nvecdims, vecdims, subdims, rstarts, order, btype, &(rdats[4+j])); MPI_Type_commit ( &(sdats[4+j])); MPI_Type_commit ( &(rdats[4+j])); } /* lower right and upper left corner */ for ( j = 0; j<2; j++ ) { sstarts[0] = ( j == 0 ) ? vecdims[0]-2*hwidth : hwidth; sstarts[1] = ( j == 0 ) ? hwidth : vecdims[1]-2*hwidth; rstarts[0] = ( j == 0 ) ? vecdims[0]-hwidth : 0; rstarts[1] = ( j == 0 ) ? 0 : vecdims[1]-hwidth; MPI_Type_create_subarray ( nvecdims, vecdims, subdims, sstarts, order, btype, &(sdats[6+j])); MPI_Type_create_subarray ( nvecdims, vecdims, subdims, rstarts, order, btype, &(rdats[6+j])); MPI_Type_commit ( &(sdats[6+j])); MPI_Type_commit ( &(rdats[6+j])); } /* create additional datatypes for blocking send */ for ( i = 0; i < ntopodim; i++ ) { /* handle left and right neighbor separatly */ for ( j=2*i; j<= 2*i+1; j++ ) { /* Set subdims and starts arrays. Basically, subdims is in each direction the total extent of the according dimension of the data array without the halo-cells except for the dimension which we are currently dealing with. For this dimension it is 1. The starts arrays are 1 for all dimensions except for the dimension (lets say k) which we are dealing with. There it is for sending: - 1 for the left neighbor, - ldims[k]-2*HWIDTH for the right neighbor for receiving: - 0 for the left neighbor - ldims[k]-HWDITH for the right neighbor */ for ( k=0; k < ntopodim; k++ ) { if ( k == i ) { subdims[k] = hwidth; sstarts[k] = (j == 2*i) ? hwidth : (vecdims[k]-2*hwidth); rstarts[k] = (j == 2*i) ? 0 : (vecdims[k]-hwidth); } else { subdims[k] = vecdims[k]; sstarts[k] = 0; rstarts[k] = 0; } } MPI_Type_create_subarray ( nvecdims, vecdims, subdims, sstarts, order, btype, &(sdats[8+j])); MPI_Type_create_subarray ( nvecdims, vecdims, subdims, rstarts, order, btype, &(rdats[8+j])); MPI_Type_commit ( &(sdats[8+j])); MPI_Type_commit ( &(rdats[8+j])); } } } else if ( ntopodim == 3 && nneigh == 9 ) { /* *** VERTICAL EDGES *** */ subdims[0] = hwidth; subdims[1] = hwidth; subdims[2] = vecdims[2]-2*hwidth; /* Set the send and recv derived datatype for the edge (0,0,z) */ sstarts[0] = hwidth; sstarts[1] = hwidth; sstarts[2] = hwidth; rstarts[0] = 0; rstarts[1] = 0; rstarts[2] = hwidth; MPI_Type_create_subarray ( nvecdims, vecdims, subdims, sstarts, order, btype, &(sdats[6])); MPI_Type_create_subarray ( nvecdims, vecdims, subdims, rstarts, order, btype, &(rdats[6])); MPI_Type_commit ( &(sdats[6])); MPI_Type_commit ( &(rdats[6])); /* Set the send and recv derived datatype for the edge (1,1,z) */ sstarts[0] = vecdims[0]-2*hwidth; sstarts[1] = vecdims[1]-2*hwidth; sstarts[2] = hwidth; rstarts[0] = vecdims[0]-hwidth; rstarts[1] = vecdims[1]-hwidth; rstarts[2] = hwidth; MPI_Type_create_subarray ( nvecdims, vecdims, subdims, sstarts, order, btype, &(sdats[7])); MPI_Type_create_subarray ( nvecdims, vecdims, subdims, rstarts, order, btype, &(rdats[7])); MPI_Type_commit ( &(sdats[7])); MPI_Type_commit ( &(rdats[7])); /* Set the send and recv derived datatypes for the edge (0,1,z) */ sstarts[0] = hwidth; sstarts[1] = vecdims[1]-2*hwidth; sstarts[2] = hwidth; rstarts[0] = 0; rstarts[1] = vecdims[1]-hwidth; rstarts[2] = hwidth; MPI_Type_create_subarray ( nvecdims, vecdims, subdims, sstarts, order, btype, &(sdats[8])); MPI_Type_create_subarray ( nvecdims, vecdims, subdims, rstarts, order, btype, &(rdats[8])); MPI_Type_commit ( &(sdats[8])); MPI_Type_commit ( &(rdats[8])); /* Set the send and recv derived datatypes for the edge (1,0,z) */ sstarts[0] = vecdims[0]-2*hwidth; sstarts[1] = hwidth; sstarts[2] = hwidth; rstarts[0] = vecdims[0]-hwidth; rstarts[1] = 0; rstarts[2] = hwidth; MPI_Type_create_subarray ( nvecdims, vecdims, subdims, sstarts, order, btype, &(sdats[9])); MPI_Type_create_subarray ( nvecdims, vecdims, subdims, rstarts, order, btype, &(rdats[9])); MPI_Type_commit ( &(sdats[9])); MPI_Type_commit ( &(rdats[9])); /* *** HORIZONTAL EDGES *** */ /* (0,-1,-1) - (0,+1,+1) */ subdims[0] = hwidth; subdims[1] = vecdims[1]-2*hwidth; subdims[2] = hwidth; /* Set the send and recv derived datatypes for the edge (0,y,0) (3) */ sstarts[0] = hwidth; sstarts[1] = hwidth; sstarts[2] = hwidth; rstarts[0] = 0; rstarts[1] = hwidth; rstarts[2] = 0; MPI_Type_create_subarray ( nvecdims, vecdims, subdims, sstarts, order, btype, &(sdats[10])); MPI_Type_create_subarray ( nvecdims, vecdims, subdims, rstarts, order, btype, &(rdats[10])); MPI_Type_commit ( &(sdats[10])); MPI_Type_commit ( &(rdats[10])); /* Set the send and recv derived datatypes for the edge (1,y,1) (23) */ sstarts[0] = vecdims[0]-2*hwidth; sstarts[1] = hwidth; sstarts[2] = vecdims[2]-2*hwidth; rstarts[0] = vecdims[0]-hwidth; rstarts[1] = hwidth; rstarts[2] = vecdims[2]-hwidth; MPI_Type_create_subarray ( nvecdims, vecdims, subdims, sstarts, order, btype, &(sdats[11])); MPI_Type_create_subarray ( nvecdims, vecdims, subdims, rstarts, order, btype, &(rdats[11])); MPI_Type_commit ( &(sdats[11])); MPI_Type_commit ( &(rdats[11])); /* (1,0,-1) - (-1,0,+1) */ subdims[0] = vecdims[0]-2*hwidth; subdims[1] = hwidth; subdims[2] = hwidth; /* Set the send and recv derived datatypes for the edge (x,1,0) (15) */ sstarts[0] = hwidth; sstarts[1] = vecdims[1]-2*hwidth; sstarts[2] = hwidth; rstarts[0] = hwidth; rstarts[1] = vecdims[1]-hwidth; rstarts[2] = 0; MPI_Type_create_subarray ( nvecdims, vecdims, subdims, sstarts, order, btype, &(sdats[12])); MPI_Type_create_subarray ( nvecdims, vecdims, subdims, rstarts, order, btype, &(rdats[12])); MPI_Type_commit ( &(sdats[12])); MPI_Type_commit ( &(rdats[12])); /* Set the send and recv derived datatypes for the edge (x,0,1) (11) */ sstarts[0] = hwidth; sstarts[1] = hwidth; sstarts[2] = vecdims[2]-2*hwidth; rstarts[0] = hwidth; rstarts[1] = 0; rstarts[2] = vecdims[2]-hwidth; MPI_Type_create_subarray ( nvecdims, vecdims, subdims, sstarts, order, btype, &(sdats[13])); MPI_Type_create_subarray ( nvecdims, vecdims, subdims, rstarts, order, btype, &(rdats[13])); MPI_Type_commit ( &(sdats[13])); MPI_Type_commit ( &(rdats[13])); /* (0,-1,1) - (0,1,-1) */ subdims[0] = hwidth; subdims[1] = vecdims[1]-2*hwidth; subdims[2] = hwidth; /* Set the send and recv derived datatypes for the edge (1,y,0) (21) */ sstarts[0] = vecdims[0]-2*hwidth; sstarts[1] = hwidth; sstarts[2] = hwidth; rstarts[0] = vecdims[0]-hwidth; rstarts[1] = hwidth; rstarts[2] = 0; MPI_Type_create_subarray ( nvecdims, vecdims, subdims, sstarts, order, btype, &(sdats[14])); MPI_Type_create_subarray ( nvecdims, vecdims, subdims, rstarts, order, btype, &(rdats[14])); MPI_Type_commit ( &(sdats[14])); MPI_Type_commit ( &(rdats[14])); /* Set the send and recv derived datatypes for the edge (0,y,1) (5) */ sstarts[0] = hwidth; sstarts[1] = hwidth; sstarts[2] = vecdims[2]-2*hwidth; rstarts[0] = 0; rstarts[1] = hwidth; rstarts[2] = vecdims[2]-hwidth; MPI_Type_create_subarray ( nvecdims, vecdims, subdims, sstarts, order, btype, &(sdats[15])); MPI_Type_create_subarray ( nvecdims, vecdims, subdims, rstarts, order, btype, &(rdats[15])); MPI_Type_commit ( &(sdats[15])); MPI_Type_commit ( &(rdats[15])); /* (-1,0,-1) - (+1,0,+1) */ subdims[0] = vecdims[0]-2*hwidth; subdims[1] = hwidth; subdims[2] = hwidth; /* Set the send and recv derived datatypes for the edge (x,0,0) (9) */ sstarts[0] = hwidth; sstarts[1] = hwidth; sstarts[2] = hwidth; rstarts[0] = hwidth; rstarts[1] = 0; rstarts[2] = 0; MPI_Type_create_subarray ( nvecdims, vecdims, subdims, sstarts, order, btype, &(sdats[16])); MPI_Type_create_subarray ( nvecdims, vecdims, subdims, rstarts, order, btype, &(rdats[16])); MPI_Type_commit ( &(sdats[16])); MPI_Type_commit ( &(rdats[16])); /* Set the send and recv derived datatypes for the edge (x,1,1) (17) */ sstarts[0] = hwidth; sstarts[1] = vecdims[1]-2*hwidth; sstarts[2] = vecdims[2]-2*hwidth; rstarts[0] = hwidth; rstarts[1] = vecdims[1]-hwidth; rstarts[2] = vecdims[2]-hwidth; MPI_Type_create_subarray ( nvecdims, vecdims, subdims, sstarts, order, btype, &(sdats[17])); MPI_Type_create_subarray ( nvecdims, vecdims, subdims, rstarts, order, btype, &(rdats[17])); MPI_Type_commit ( &(sdats[17])); MPI_Type_commit ( &(rdats[17])); /* create additional datatypes for blocking send */ for ( i = 0; i < ntopodim; i++ ) { /* handle left and right neighbor separatly */ for ( j=2*i; j<= 2*i+1; j++ ) { /* Set subdims and starts arrays. Basically, subdims is in each direction the total extent of the according dimension of the data array without the halo-cells except for the dimension which we are currently dealing with. For this dimension it is 1. The starts arrays are 1 for all dimensions except for the dimension (lets say k) which we are dealing with. There it is for sending: - 1 for the left neighbor, - ldims[k]-2*HWIDTH for the right neighbor for receiving: - 0 for the left neighbor - ldims[k]-HWDITH for the right neighbor */ for ( k=0; k < ntopodim; k++ ) { if ( k == i ) { subdims[k] = hwidth; sstarts[k] = (j == 2*i) ? hwidth : (vecdims[k]-2*hwidth); rstarts[k] = (j == 2*i) ? 0 : (vecdims[k]-hwidth); } else { subdims[k] = vecdims[k]; sstarts[k] = 0; rstarts[k] = 0; } } MPI_Type_create_subarray ( nvecdims, vecdims, subdims, sstarts, order, btype, &(sdats[18+j])); MPI_Type_create_subarray ( nvecdims, vecdims, subdims, rstarts, order, btype, &(rdats[18+j])); MPI_Type_commit ( &(sdats[18+j])); MPI_Type_commit ( &(rdats[18+j])); } } } else { printf("not implemented\n"); exit(-1); } } exit: if ( ret != ADCL_SUCCESS ) { if ( NULL != subdims ) { free ( subdims ) ; } if ( NULL != sstarts ) { free ( sstarts ); } if ( NULL != rstarts ) { free ( rstarts ); } if ( NULL != sdats ) { free ( sdats ); } if ( NULL != rdats ) { free ( rdats ); } } *senddats = sdats; *recvdats = rdats; return ret; }
int main(int argc, char *argv[]) { int iarrayOfSizes[2], iarrayOfSubsizes[2], iarrayOfStarts[2], ilocal_size; int nproc[2], periods[2], icoord[2]; int m, n, i, j, wsize, wrank, crank, ndims, lrows, lcols, grow, gcol, err; MPI_Datatype filetype; MPI_File fh; MPI_Comm cartcomm; MPI_Info info0, info3; double t, topen, twrite, tclose, wrate; double *local_array; char nstripesStr[12], stripeUnitStr[12]; int nstripes = -1; int stripeUnit = -1; MPI_Offset headerSize = 0; MPI_Init(0,0); MPI_Comm_rank(MPI_COMM_WORLD, &wrank); /* Get global array size */ m = n = 128; /* Set default size */ /* ioda [ n ] [ m ] [ nstripes ] [ stripeunit ] [ headersize ] */ if (argc > 0) { if (argc > 1) m = atoi(argv[1]); if (argc > 2) n = atoi(argv[2]); if (argc > 3) nstripes = atoi(argv[3]); if (argc > 4) stripeUnit = atoi(argv[4]); if (argc > 5) headerSize = atoi(argv[5]); if (argc > 6) { if (wrank == 0) fprintf(stderr,"Unrecognized argument %s\n", argv[6]); MPI_Abort(MPI_COMM_WORLD,1); } } if (wrank == 0) printf("Matrix is [%d,%d]; file dir = %s\n", m, n, MYSCRATCHDIR ); /* The default number of stripes = totalsize/1M */ if (nstripes < 0) { nstripes = n * m * sizeof(double) / (1024*1024); if (nstripes < 1) nstripes = 1; } if (wrank == 0) printf("nstripes = %d, stripeUnit = %d, header size = %d\n", nstripes, stripeUnit, (int)headerSize); /* Use topology routines to get decomposition and coordinates */ MPI_Comm_size(MPI_COMM_WORLD, &wsize); nproc[0] = 0; nproc[1] = 0; ndims = 2; MPI_Dims_create(wsize, ndims, nproc); periods[0] = 0; periods[1] = 0; MPI_Cart_create(MPI_COMM_WORLD, ndims, nproc, periods, 1, &cartcomm); MPI_Comm_rank(cartcomm, &crank); MPI_Cart_coords(cartcomm, crank, ndims, icoord); iarrayOfSizes[0] = m; iarrayOfSizes[1] = n; iarrayOfSubsizes[0] = m/nproc[0]; iarrayOfSubsizes[1] = n/nproc[1]; iarrayOfStarts[0] = icoord[0] * iarrayOfSubsizes[0]; iarrayOfStarts[1] = icoord[1] * iarrayOfSubsizes[1]; /* Initialize my block of the data */ ilocal_size = iarrayOfSubsizes[0] * iarrayOfSubsizes[1]; lrows = iarrayOfSubsizes[0]; lcols = iarrayOfSubsizes[1]; local_array = (double *)malloc(lrows*lcols*sizeof(double)); gcol = iarrayOfStarts[1]; grow = iarrayOfStarts[0]; for (i=0; i<lrows; i++) { for (j=0; j<lcols; j++) { local_array[j*lrows+i] = (grow+i) + (gcol+j)*m; } } /* Fortran order simply means the data is stored by columns */ MPI_Type_create_subarray(ndims, iarrayOfSizes, iarrayOfSubsizes, iarrayOfStarts, MPI_ORDER_FORTRAN, MPI_DOUBLE, &filetype); MPI_Type_commit(&filetype); info0 = MPI_INFO_NULL; info3 = MPI_INFO_NULL; if (nstripes > 0 || stripeUnit > 0) { MPI_Info_create(&info0); if (nstripes > 0) { snprintf(nstripesStr, sizeof(nstripesStr), "%d", nstripes); MPI_Info_set(info0, "striping_factor", nstripesStr); MPI_Info_set(info0, "cb_nodes", nstripesStr); } if (stripeUnit > 0) { snprintf(stripeUnitStr, sizeof(stripeUnitStr), "%d", stripeUnit); MPI_Info_set(info0, "striping_unit", stripeUnitStr); } MPI_Info_dup(info0, &info3); MPI_Info_set(info3, "romio_no_indep_rw", "true"); /* Other hints to consider: direct_io=true The default cb_buffer_size is 16777216 , but is overridden by the striping unit, which is smaller by default. */ } /* level - 3 */ MPI_Barrier(MPI_COMM_WORLD); t = MPI_Wtime(); err = MPI_File_open(cartcomm, MYSCRATCHDIR "testfile-3.out", MPI_MODE_CREATE | MPI_MODE_RDWR, info3, &fh); topen = MPI_Wtime() - t; if (err != MPI_SUCCESS) myAbort(err, "open testfile-3.out"); if (headerSize > 0) { /* Simulate writing a header */ if (wrank == 0) { char *header; header = (char *)calloc(1,(size_t)headerSize); MPI_File_write(fh, header, headerSize, MPI_BYTE, MPI_STATUS_IGNORE); free(header); } MPI_Barrier(cartcomm); } MPI_File_set_view(fh, headerSize, MPI_DOUBLE, filetype, "native", MPI_INFO_NULL); MPI_Barrier(MPI_COMM_WORLD); t = MPI_Wtime(); err = MPI_File_write_all(fh, local_array, ilocal_size, MPI_DOUBLE, MPI_STATUS_IGNORE); twrite = MPI_Wtime() - t; if (err != MPI_SUCCESS) myAbort(err, "collective write"); err = MPI_File_close(&fh); tclose = MPI_Wtime() - t; /* tclose is the time for the write(s) + the close, in case the implementation delays (some of) the writes until the close */ if (err != MPI_SUCCESS) myAbort(err, "close testfile-3.out"); MPI_Allreduce(MPI_IN_PLACE, &topen, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); MPI_Allreduce(MPI_IN_PLACE, &twrite, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); MPI_Allreduce(MPI_IN_PLACE, &tclose, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); if (twrite > 0) wrate = (double)m * (double)n * sizeof(double)/twrite; if (wrank == 0) printf("%d\t[%d,%d]\t%d\t%.2e\t%.2e\t%.2e\t%.2e\n", wsize, m, n, nstripes, topen, twrite, tclose, wrate); /* level - 0 */ MPI_Barrier(MPI_COMM_WORLD); t = MPI_Wtime(); err = MPI_File_open(cartcomm, MYSCRATCHDIR "testfile-0.out", MPI_MODE_CREATE | MPI_MODE_RDWR, info0, &fh); topen = MPI_Wtime() - t; if (err != MPI_SUCCESS) myAbort(err, "open testfile-0.out"); if (headerSize > 0) { /* Simulate writing a header */ if (wrank == 0) { char *header; header = (char *)calloc(1,(size_t)headerSize); MPI_File_write(fh, header, headerSize, MPI_BYTE, MPI_STATUS_IGNORE); free(header); } MPI_Barrier(cartcomm); } MPI_Barrier(MPI_COMM_WORLD); t = MPI_Wtime(); gcol = iarrayOfStarts[1]; grow = iarrayOfStarts[0]; for (j=0; j<lcols; j++) { MPI_Offset offset = headerSize + ((MPI_Offset)(grow) + (MPI_Offset)(gcol+j)*m) * sizeof(double); err = MPI_File_write_at(fh, offset, local_array+j*lrows, lrows, MPI_DOUBLE, MPI_STATUS_IGNORE); if (err != MPI_SUCCESS) myAbort(err, "write at"); } twrite = MPI_Wtime() - t; err = MPI_File_close(&fh); tclose = MPI_Wtime() - t; /* tclose is the time for the write(s) + the close, in case the implementation delays (some of) the writes until the close */ if (err != MPI_SUCCESS) myAbort(err, "close testfile-0"); MPI_Allreduce(MPI_IN_PLACE, &topen, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); MPI_Allreduce(MPI_IN_PLACE, &twrite, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); MPI_Allreduce(MPI_IN_PLACE, &tclose, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); if (twrite > 0) wrate = (double)m * (double)n * sizeof(double)/twrite; if (wrank == 0) printf("%d\t[%d,%d]\t%d\t%.2e\t%.2e\t%.2e\t%.2e\n", wsize, m, n, nstripes, topen, twrite, tclose, wrate); if (info0 != MPI_INFO_NULL) { MPI_Info_free(&info0); MPI_Info_free(&info3); } free(local_array); MPI_Finalize(); return 0; }
void Strided_to_dtype(int stride_array[/*stride_levels*/], int count[/*stride_levels+1*/], int stride_levels, MPI_Datatype old_type, MPI_Datatype *new_type) { int sizes [stride_levels+1]; int subsizes[stride_levels+1]; int starts [stride_levels+1]; int i, old_type_size; MPI_Type_size(old_type, &old_type_size); /* Eliminate counts that don't count (all 1 counts at the end) */ for (i = stride_levels+1; (i > 0) && (stride_levels > 0) && (count[i-1] == 1); i--) stride_levels--; /* A correct strided spec should me monotonic increasing and stride_array[i+1] should be a multiple of stride_array[i]. */ if (stride_levels > 0) { for (i = 1; i < stride_levels; i++) { assert(stride_array[i] >= stride_array[i-1]); /* This assertion is violated by what seems to be valid usage resulting from * the new GA API call nga_strided_get during the stride test in GA 5.2. * assert((stride_array[i] % stride_array[i-1]) == 0); */ } } /* Test for a contiguous transfer */ if (stride_levels == 0) { int elem_count = count[0]/old_type_size; assert((count[0] % old_type_size) == 0); MPI_Type_contiguous(elem_count, old_type, new_type); } /* Transfer is non-contiguous */ else { for (i = 0; i < stride_levels+1; i++) starts[i] = 0; sizes [stride_levels] = stride_array[0]/old_type_size; subsizes[stride_levels] = count[0]/old_type_size; assert((stride_array[0] % old_type_size) == 0); assert((count[0] % old_type_size) == 0); for (i = 1; i < stride_levels; i++) { /* Convert strides into dimensions by dividing out contributions from lower dims */ sizes [stride_levels-i] = stride_array[i]/stride_array[i-1]; subsizes[stride_levels-i] = count[i]; /* This assertion is violated by what seems to be valid usage resulting from * the new GA API call nga_strided_get during the stride test in GA 5.2. * assert_msg((stride_array[i] % stride_array[i-1]) == 0, "Invalid striding"); */ } sizes [0] = count[stride_levels]; subsizes[0] = count[stride_levels]; MPI_Type_create_subarray(stride_levels+1, sizes, subsizes, starts, MPI_ORDER_C, old_type, new_type); } }
/* subarray_1d_c_test1() * * Returns the number of errors encountered. */ int subarray_1d_c_test1(void) { MPI_Datatype subarray; int array[9] = { -1, 1, 2, 3, -2, -3, -4, -5, -6 }; int array_size[] = { 9 }; int array_subsize[] = { 3 }; int array_start[] = { 1 }; int i, err, errs = 0, sizeoftype; /* set up type */ err = MPI_Type_create_subarray(1, /* dims */ array_size, array_subsize, array_start, MPI_ORDER_C, MPI_INT, &subarray); if (err != MPI_SUCCESS) { errs++; if (verbose) { fprintf(stderr, "error in MPI_Type_create_subarray call; aborting after %d errors\n", errs); } return errs; } MPI_Type_commit(&subarray); MPI_Type_size(subarray, &sizeoftype); if (sizeoftype != 3 * sizeof(int)) { errs++; if (verbose) fprintf(stderr, "size of type = %d; should be %d\n", sizeoftype, (int) (3 * sizeof(int))); return errs; } err = pack_and_unpack((char *) array, 1, subarray, 9 * sizeof(int)); for (i = 0; i < 9; i++) { int goodval; switch (i) { case 1: goodval = 1; break; case 2: goodval = 2; break; case 3: goodval = 3; break; default: goodval = 0; /* pack_and_unpack() zeros before unpacking */ break; } if (array[i] != goodval) { errs++; if (verbose) fprintf(stderr, "array[%d] = %d; should be %d\n", i, array[i], goodval); } } MPI_Type_free(&subarray); return errs; }
/* subarray_4d_fortran_test1() * * Returns the number of errors encountered. */ int subarray_4d_fortran_test1(void) { MPI_Datatype subarray; int array[] = { -1111, -1112, -1113, -1114, -1115, -1116, -1121, -1122, -1123, -1124, -1125, -1126, -1131, -1132, -1133, -1134, -1135, -1136, -1211, -1212, -1213, -1214, -1215, -1216, -1221, -1222, -1223, -1224, -1225, -1226, -1231, -1232, -1233, -1234, -1235, -1236, -2111, -2112, -2113, -2114, 1, -2116, -2121, -2122, -2123, -2124, 2, -2126, -2131, -2132, -2133, -2134, 3, -2136, -2211, -2212, -2213, -2214, 4, -2216, -2221, -2222, -2223, -2224, 5, -2226, -2231, -2232, -2233, -2234, 6, -2236 }; int array_size[4] = { 6, 3, 2, 2 }; int array_subsize[4] = { 1, 3, 2, 1 }; int array_start[4] = { 4, 0, 0, 1 }; int i, err, errs = 0, sizeoftype; /* set up type */ err = MPI_Type_create_subarray(4, /* dims */ array_size, array_subsize, array_start, MPI_ORDER_FORTRAN, MPI_INT, &subarray); if (err != MPI_SUCCESS) { errs++; if (verbose) { fprintf(stderr, "error in MPI_Type_create_subarray call; aborting after %d errors\n", errs); } return errs; } MPI_Type_commit(&subarray); MPI_Type_size(subarray, &sizeoftype); if (sizeoftype != 6 * sizeof(int)) { errs++; if (verbose) fprintf(stderr, "size of type = %d; should be %d\n", sizeoftype, (int) (6 * sizeof(int))); return errs; } err = pack_and_unpack((char *) array, 1, subarray, 72 * sizeof(int)); for (i = 0; i < 72; i++) { int goodval; switch (i) { case 40: goodval = 1; break; case 46: goodval = 2; break; case 52: goodval = 3; break; case 58: goodval = 4; break; case 64: goodval = 5; break; case 70: goodval = 6; break; default: goodval = 0; break; } if (array[i] != goodval) { errs++; if (verbose) fprintf(stderr, "array[%d] = %d; should be %d\n", i, array[i], goodval); } } MPI_Type_free(&subarray); return errs; }
void inimesh(int MEDIASTART, Grid3D d1, Grid3D mu, Grid3D lam, Grid3D qp, Grid3D qs, float *taumax, float *taumin, int nvar, float FP, float FL, float FH, int nxt, int nyt, int nzt, int PX, int PY, int NX, int NY, int NZ, int *coords, MPI_Comm MCW, int IDYNA, int NVE, int SoCalQ, char *INVEL, float *vse, float *vpe, float *dde) { int merr; int rank; int i,j,k,err; float vp,vs,dd,pi; int rmtype[3], rptype[3], roffset[3]; MPI_Datatype readtype; MPI_Status filestatus; MPI_File fh; pi = 4.*atan(1.); if(MEDIASTART==0) { *taumax = 1./(2*pi*FL); *taumin = 1./(2*pi*FH); if(IDYNA==1) { vp=6000.0; vs=3464.0; dd=2670.0; } else { vp=4800.0; vs=2800.0; dd=2500.0; } for(i=0;i<nxt+4+8*loop;i++) for(j=0;j<nyt+4+8*loop;j++) for(k=0;k<nzt+2*align;k++) { lam[i][j][k]=1./(dd*(vp*vp - 2.*vs*vs)); mu[i][j][k]=1./(dd*vs*vs); d1[i][j][k]=dd; } } else { Grid3D tmpvp=NULL, tmpvs=NULL, tmpdd=NULL; Grid3D tmppq=NULL, tmpsq=NULL; int var_offset; tmpvp = Alloc3D(nxt, nyt, nzt); tmpvs = Alloc3D(nxt, nyt, nzt); tmpdd = Alloc3D(nxt, nyt, nzt); for(i=0;i<nxt;i++) for(j=0;j<nyt;j++) for(k=0;k<nzt;k++) { tmpvp[i][j][k]=0.0f; tmpvs[i][j][k]=0.0f; tmpdd[i][j][k]=0.0f; } if(NVE==1) { tmppq = Alloc3D(nxt, nyt, nzt); tmpsq = Alloc3D(nxt, nyt, nzt); for(i=0;i<nxt;i++) for(j=0;j<nyt;j++) for(k=0;k<nzt;k++) { tmppq[i][j][k]=0.0f; tmpsq[i][j][k]=0.0f; } } if(nvar==8) { var_offset=3; } else if(nvar==5) { var_offset=0; } else { var_offset=0; } if(MEDIASTART>=1 && MEDIASTART<=3) { char filename[40]; if(MEDIASTART<3) sprintf(filename,INVEL); else if(MEDIASTART==3){ MPI_Comm_rank(MCW,&rank); sprintf(filename,"input_rst/mediapart/media%07d.bin",rank); if(rank%100==0) printf("Rank=%d, reading file=%s\n",rank,filename); } Grid1D tmpta = Alloc1D(nvar*nxt*nyt*nzt); if(MEDIASTART==3 || (PX==1 && PY==1)) { FILE *file; file = fopen(filename,"rb"); if(!file) { printf("can't open file %s", filename); return; } if(!fread(tmpta,sizeof(float),nvar*nxt*nyt*nzt,file)) { printf("can't read file %s", filename); return; } //printf("%d) 0-0-0,1-10-3=%f, %f\n",rank,tmpta[0],tmpta[1+10*nxt+3*nxt*nyt]); } else{ rmtype[0] = NZ; rmtype[1] = NY; rmtype[2] = NX*nvar; rptype[0] = nzt; rptype[1] = nyt; rptype[2] = nxt*nvar; roffset[0] = 0; roffset[1] = nyt*coords[1]; roffset[2] = nxt*coords[0]*nvar; err = MPI_Type_create_subarray(3, rmtype, rptype, roffset, MPI_ORDER_C, MPI_FLOAT, &readtype); err = MPI_Type_commit(&readtype); err = MPI_File_open(MCW,filename,MPI_MODE_RDONLY,MPI_INFO_NULL,&fh); err = MPI_File_set_view(fh, 0, MPI_FLOAT, readtype, "native", MPI_INFO_NULL); err = MPI_File_read_all(fh, tmpta, nvar*nxt*nyt*nzt, MPI_FLOAT, &filestatus); err = MPI_File_close(&fh); } for(k=0;k<nzt;k++) for(j=0;j<nyt;j++) for(i=0;i<nxt;i++){ tmpvp[i][j][k]=tmpta[(k*nyt*nxt+j*nxt+i)*nvar+var_offset]; tmpvs[i][j][k]=tmpta[(k*nyt*nxt+j*nxt+i)*nvar+var_offset+1]; tmpdd[i][j][k]=tmpta[(k*nyt*nxt+j*nxt+i)*nvar+var_offset+2]; if(nvar>3){ tmppq[i][j][k]=tmpta[(k*nyt*nxt+j*nxt+i)*nvar+var_offset+3]; tmpsq[i][j][k]=tmpta[(k*nyt*nxt+j*nxt+i)*nvar+var_offset+4]; } /*if(tmpvp[i][j][k]!=tmpvp[i][j][k] || tmpvs[i][j][k]!=tmpvs[i][j][k] || tmpdd[i][j][k]!=tmpdd[i][j][k]){ printf("%d) tmpvp,vs,dd is NAN!\n"); MPI_Abort(MPI_COMM_WORLD,1); }*/ } //printf("%d) vp,vs,dd[0^3]=%f,%f,%f\n",rank,tmpvp[0][0][0], // tmpvs[0][0][0], tmpdd[0][0][0]); Delloc1D(tmpta); } if(nvar==3 && NVE==1) { for(i=0;i<nxt;i++) for(j=0;j<nyt;j++){ for(k=0;k<nzt;k++){ tmpsq[i][j][k]=0.05*tmpvs[i][j][k]; tmppq[i][j][k]=2.0*tmpsq[i][j][k]; } } } float w0=0.0f, ww1=0.0f, w2=0.0f, tmp1=0.0f, tmp2=0.0f; float qpinv=0.0f, qsinv=0.0f, vpvs=0.0f; if(NVE==1) { w0=2*pi*FP; ww1=2*pi*FL; w2=2*pi*FH; *taumax=1./ww1; *taumin=1./w2; tmp1=2./pi*(log((*taumax)/(*taumin))); tmp2=2./pi*log(w0*(*taumin)); } vse[0] = 1.0e10; vpe[0] = 1.0e10; dde[0] = 1.0e10; vse[1] = -1.0e10; vpe[1] = -1.0e10; dde[1] = -1.0e10; for(i=0;i<nxt;i++) for(j=0;j<nyt;j++) for(k=0;k<nzt;k++) { tmpvs[i][j][k] = tmpvs[i][j][k]*(1+ ( log(w2/w0) )/(pi*tmpsq[i][j][k]) ); tmpvp[i][j][k] = tmpvp[i][j][k]*(1+ ( log(w2/w0) )/(pi*tmppq[i][j][k]) ); if (SoCalQ==1) { vpvs=tmpvp[i][j][k]/tmpvs[i][j][k]; if (vpvs<1.45) tmpvs[i][j][k]=tmpvp[i][j][k]/1.45; } //if(tmpvs[i][j][k]<400.0) if(tmpvs[i][j][k]<200.0) { //tmpvs[i][j][k]=400.0; //tmpvp[i][j][k]=1200.0; tmpvs[i][j][k]=200.0; tmpvp[i][j][k]=600.0; } if(tmpvp[i][j][k]>6500.0){ tmpvs[i][j][k]=3752.0; tmpvp[i][j][k]=6500.0; } if(tmpdd[i][j][k]<1700.0) tmpdd[i][j][k]=1700.0; mu[i+2+4*loop][j+2+4*loop][(nzt+align-1) - k] = 1./(tmpdd[i][j][k]*tmpvs[i][j][k]*tmpvs[i][j][k]); lam[i+2+4*loop][j+2+4*loop][(nzt+align-1) - k] = 1./(tmpdd[i][j][k]*(tmpvp[i][j][k]*tmpvp[i][j][k] -2.*tmpvs[i][j][k]*tmpvs[i][j][k])); d1[i+2+4*loop][j+2+4*loop][(nzt+align-1) - k] = tmpdd[i][j][k]; if(NVE==1) { if(tmppq[i][j][k]<=0.0) { qpinv=0.0; qsinv=0.0; } else { qpinv=1./tmppq[i][j][k]; qsinv=1./tmpsq[i][j][k]; } tmppq[i][j][k]=tmp1*qpinv/(1.0-tmp2*qpinv); tmpsq[i][j][k]=tmp1*qsinv/(1.0-tmp2*qsinv); qp[i+2+4*loop][j+2+4*loop][(nzt+align-1) - k] = tmppq[i][j][k]; qs[i+2+4*loop][j+2+4*loop][(nzt+align-1) - k] = tmpsq[i][j][k]; } if(tmpvs[i][j][k]<vse[0]) vse[0] = tmpvs[i][j][k]; if(tmpvs[i][j][k]>vse[1]) vse[1] = tmpvs[i][j][k]; if(tmpvp[i][j][k]<vpe[0]) vpe[0] = tmpvp[i][j][k]; if(tmpvp[i][j][k]>vpe[1]) vpe[1] = tmpvp[i][j][k]; if(tmpdd[i][j][k]<dde[0]) dde[0] = tmpdd[i][j][k]; if(tmpdd[i][j][k]>dde[1]) dde[1] = tmpdd[i][j][k]; } Delloc3D(tmpvp); Delloc3D(tmpvs); Delloc3D(tmpdd); if(NVE==1){ Delloc3D(tmppq); Delloc3D(tmpsq); } //5 Planes (except upper XY-plane) for(j=2+4*loop;j<nyt+2+4*loop;j++) for(k=align;k<nzt+align;k++){ lam[1+4*loop][j][k] = lam[2+4*loop][j][k]; lam[nxt+2+4*loop][j][k] = lam[nxt+1+4*loop][j][k]; mu[1+4*loop][j][k] = mu[2+4*loop][j][k]; mu[nxt+2+4*loop][j][k] = mu[nxt+1+4*loop][j][k]; d1[1+4*loop][j][k] = d1[2+4*loop][j][k]; d1[nxt+2+4*loop][j][k] = d1[nxt+1+4*loop][j][k]; } for(i=2+4*loop;i<nxt+2+4*loop;i++) for(k=align;k<nzt+align;k++){ lam[i][1+4*loop][k] = lam[i][2+4*loop][k]; lam[i][nyt+2+4*loop][k] = lam[i][nyt+1+4*loop][k]; mu[i][1+4*loop][k] = mu[i][2+4*loop][k]; mu[i][nyt+2+4*loop][k] = mu[i][nyt+1+4*loop][k]; d1[i][1+4*loop][k] = d1[i][2+4*loop][k]; d1[i][nyt+2+4*loop][k] = d1[i][nyt+1+4*loop][k]; } for(i=2+4*loop;i<nxt+2+4*loop;i++) for(j=2+4*loop;j<nyt+2+4*loop;j++){ lam[i][j][align-1] = lam[i][j][align]; mu[i][j][align-1] = mu[i][j][align]; d1[i][j][align-1] = d1[i][j][align]; } //12 border lines for(i=2+4*loop;i<nxt+2+4*loop;i++){ lam[i][1+4*loop][align-1] = lam[i][2+4*loop][align]; mu[i][1+4*loop][align-1] = mu[i][2+4*loop][align]; d1[i][1+4*loop][align-1] = d1[i][2+4*loop][align]; lam[i][nyt+2+4*loop][align-1] = lam[i][nyt+1+4*loop][align]; mu[i][nyt+2+4*loop][align-1] = mu[i][nyt+1+4*loop][align]; d1[i][nyt+2+4*loop][align-1] = d1[i][nyt+1+4*loop][align]; lam[i][1+4*loop][nzt+align] = lam[i][2+4*loop][nzt+align-1]; mu[i][1+4*loop][nzt+align] = mu[i][2+4*loop][nzt+align-1]; d1[i][1+4*loop][nzt+align] = d1[i][2+4*loop][nzt+align-1]; lam[i][nyt+2+4*loop][nzt+align] = lam[i][nyt+1+4*loop][nzt+align-1]; mu[i][nyt+2+4*loop][nzt+align] = mu[i][nyt+1+4*loop][nzt+align-1]; d1[i][nyt+2+4*loop][nzt+align] = d1[i][nyt+1+4*loop][nzt+align-1]; } for(j=2+4*loop;j<nyt+2+4*loop;j++){ lam[1+4*loop][j][align-1] = lam[2+4*loop][j][align]; mu[1+4*loop][j][align-1] = mu[2+4*loop][j][align]; d1[1+4*loop][j][align-1] = d1[2+4*loop][j][align]; lam[nxt+2+4*loop][j][align-1] = lam[nxt+1+4*loop][j][align]; mu[nxt+2+4*loop][j][align-1] = mu[nxt+1+4*loop][j][align]; d1[nxt+2+4*loop][j][align-1] = d1[nxt+1+4*loop][j][align]; lam[1+4*loop][j][nzt+align] = lam[2+4*loop][j][nzt+align-1]; mu[1+4*loop][j][nzt+align] = mu[2+4*loop][j][nzt+align-1]; d1[1+4*loop][j][nzt+align] = d1[2+4*loop][j][nzt+align-1]; lam[nxt+2+4*loop][j][nzt+align] = lam[nxt+1+4*loop][j][nzt+align-1]; mu[nxt+2+4*loop][j][nzt+align] = mu[nxt+1+4*loop][j][nzt+align-1]; d1[nxt+2+4*loop][j][nzt+align] = d1[nxt+1+4*loop][j][nzt+align-1]; } for(k=align;k<nzt+align;k++){ lam[1+4*loop][1+4*loop][k] = lam[2+4*loop][2+4*loop][k]; mu[1+4*loop][1+4*loop][k] = mu[2+4*loop][2+4*loop][k]; d1[1+4*loop][1+4*loop][k] = d1[2+4*loop][2+4*loop][k]; lam[nxt+2+4*loop][1+4*loop][k] = lam[nxt+1+4*loop][2+4*loop][k]; mu[nxt+2+4*loop][1+4*loop][k] = mu[nxt+1+4*loop][2+4*loop][k]; d1[nxt+2+4*loop][1+4*loop][k] = d1[nxt+1+4*loop][2+4*loop][k]; lam[1+4*loop][nyt+2+4*loop][k] = lam[2+4*loop][nyt+1+4*loop][k]; mu[1+4*loop][nyt+2+4*loop][k] = mu[2+4*loop][nyt+1+4*loop][k]; d1[1+4*loop][nyt+2+4*loop][k] = d1[2+4*loop][nyt+1+4*loop][k]; lam[nxt+2+4*loop][nyt+2+4*loop][k] = lam[nxt+1+4*loop][nyt+1+4*loop][k]; mu[nxt+2+4*loop][nyt+2+4*loop][k] = mu[nxt+1+4*loop][nyt+1+4*loop][k]; d1[nxt+2+4*loop][nyt+2+4*loop][k] = d1[nxt+1+4*loop][nyt+1+4*loop][k]; } //8 Corners lam[1+4*loop][1+4*loop][align-1] = lam[2+4*loop][2+4*loop][align]; mu[1+4*loop][1+4*loop][align-1] = mu[2+4*loop][2+4*loop][align]; d1[1+4*loop][1+4*loop][align-1] = d1[2+4*loop][2+4*loop][align]; lam[nxt+2+4*loop][1+4*loop][align-1] = lam[nxt+1+4*loop][2+4*loop][align]; mu[nxt+2+4*loop][1+4*loop][align-1] = mu[nxt+1+4*loop][2+4*loop][align]; d1[nxt+2+4*loop][1+4*loop][align-1] = d1[nxt+1+4*loop][2+4*loop][align]; lam[1+4*loop][nyt+2+4*loop][align-1] = lam[2+4*loop][nyt+1+4*loop][align]; mu[1+4*loop][nyt+2+4*loop][align-1] = mu[2+4*loop][nyt+1+4*loop][align]; d1[1+4*loop][nyt+2+4*loop][align-1] = d1[2+4*loop][nyt+1+4*loop][align]; lam[1+4*loop][1+4*loop][nzt+align] = lam[2+4*loop][2+4*loop][nzt+align-1]; mu[1+4*loop][1+4*loop][nzt+align] = mu[2+4*loop][2+4*loop][nzt+align-1]; d1[1+4*loop][1+4*loop][nzt+align] = d1[2+4*loop][2+4*loop][nzt+align-1]; lam[nxt+2+4*loop][1+4*loop][nzt+align] = lam[nxt+1+4*loop][2+4*loop][nzt+align-1]; mu[nxt+2+4*loop][1+4*loop][nzt+align] = mu[nxt+1+4*loop][2+4*loop][nzt+align-1]; d1[nxt+2+4*loop][1+4*loop][nzt+align] = d1[nxt+1+4*loop][2+4*loop][nzt+align-1]; lam[nxt+2+4*loop][nyt+2+4*loop][align-1] = lam[nxt+1+4*loop][nyt+1+4*loop][align]; mu[nxt+2+4*loop][nyt+2+4*loop][align-1] = mu[nxt+1+4*loop][nyt+1+4*loop][align]; d1[nxt+2+4*loop][nyt+2+4*loop][align-1] = d1[nxt+1+4*loop][nyt+1+4*loop][align]; lam[1+4*loop][nyt+2+4*loop][nzt+align] = lam[2+4*loop][nyt+1+4*loop][nzt+align-1]; mu[1+4*loop][nyt+2+4*loop][nzt+align] = mu[2+4*loop][nyt+1+4*loop][nzt+align-1]; d1[1+4*loop][nyt+2+4*loop][nzt+align] = d1[2+4*loop][nyt+1+4*loop][nzt+align-1]; lam[nxt+2+4*loop][nyt+2+4*loop][nzt+align] = lam[nxt+1+4*loop][nyt+1+4*loop][nzt+align-1]; mu[nxt+2+4*loop][nyt+2+4*loop][nzt+align] = mu[nxt+1+4*loop][nyt+1+4*loop][nzt+align-1]; d1[nxt+2+4*loop][nyt+2+4*loop][nzt+align] = d1[nxt+1+4*loop][nyt+1+4*loop][nzt+align-1]; k = nzt+align; for(i=2+4*loop;i<nxt+2+4*loop;i++) for(j=2+4*loop;j<nyt+2+4*loop;j++){ d1[i][j][k] = d1[i][j][k-1]; mu[i][j][k] = mu[i][j][k-1]; lam[i][j][k] = lam[i][j][k-1]; if(NVE==1){ qp[i][j][k] = qp[i][j][k-1]; qs[i][j][k] = qs[i][j][k-1]; } } float tmpvse[2],tmpvpe[2],tmpdde[2]; merr = MPI_Allreduce(vse,tmpvse,2,MPI_FLOAT,MPI_MAX,MCW); merr = MPI_Allreduce(vpe,tmpvpe,2,MPI_FLOAT,MPI_MAX,MCW); merr = MPI_Allreduce(dde,tmpdde,2,MPI_FLOAT,MPI_MAX,MCW); vse[1] = tmpvse[1]; vpe[1] = tmpvpe[1]; dde[1] = tmpdde[1]; merr = MPI_Allreduce(vse,tmpvse,2,MPI_FLOAT,MPI_MIN,MCW); merr = MPI_Allreduce(vpe,tmpvpe,2,MPI_FLOAT,MPI_MIN,MCW); merr = MPI_Allreduce(dde,tmpdde,2,MPI_FLOAT,MPI_MIN,MCW); vse[0] = tmpvse[0]; vpe[0] = tmpvpe[0]; dde[0] = tmpdde[0]; } return; }
void step4(inst i, int r, int s) { inst instance = i; int rank = r; int size = s; // Creation of the 2D torus we will then use MPI_Comm comm; int dim[2] = {instance.p, instance.q}; int period[2] = {1, 1}; int reorder = 0; int coord[2]; MPI_Cart_create(MPI_COMM_WORLD, 2, dim, period, reorder, &comm); MPI_Cart_coords(comm, rank, 2, coord); grid global_grid; char type = 0; MPI_File input_file; // We start by reading the header of the file MPI_File_open(comm, instance.input_path, MPI_MODE_RDONLY, MPI_INFO_NULL, &input_file); MPI_File_read_all(input_file, &type, 1, MPI_CHAR, MPI_STATUS_IGNORE); if(type == 1) { if (rank == 0) fprintf(stderr, "Error: type 1 files are not supported in step 4\n"); MPI_Barrier(MPI_COMM_WORLD); MPI_Finalize(); exit(EXIT_FAILURE); } // we needed to swap the next 2 lines MPI_File_read_all(input_file, &(global_grid.n), 1, MPI_UINT64_T, MPI_STATUS_IGNORE); MPI_File_read_all(input_file, &(global_grid.m), 1, MPI_UINT64_T, MPI_STATUS_IGNORE); #ifdef DEBUG if(rank == 0) printf("n, m = %zu %zu\n", global_grid.n, global_grid.m); #endif if(!(global_grid.n % instance.p == 0 && global_grid.m % instance.q == 0)) { if(rank == 0) fprintf(stderr, "Error: please choose the grid parameters so they divide the grid of the cellular automaton. For example %zu %zu, but you need to move from %d procs to %zu\n", instance.p + (global_grid.n % instance.p), instance.q + (global_grid.m % instance.q), size, (instance.p + (global_grid.n % instance.p))*(instance.q + (global_grid.m % instance.q))); MPI_Barrier(MPI_COMM_WORLD); MPI_Finalize(); exit(EXIT_FAILURE); } size_t local_nrows = global_grid.n/instance.p; size_t local_ncols = global_grid.m/instance.q; // Now we create the data structures. int blocks[2] = {1, 2}; MPI_Datatype types[2] = {MPI_BYTE, MPI_DOUBLE}; MPI_Aint a_size = sizeof(cell2); MPI_Aint a_disp[3] = {offsetof(cell2, type), offsetof(cell2, u), offsetof(cell2, s)}; MPI_Aint p_size = 17; MPI_Aint p_disp[3] = {0, 1, 9}; MPI_Datatype p_tmp, a_tmp, p_cell, a_cell; // Aligned struct, memory representation MPI_Type_create_struct(2, blocks, a_disp, types, &a_tmp); MPI_Type_create_resized(a_tmp, 0, a_size, &a_cell); MPI_Type_commit(&a_cell); // Packed struct, file-based representation MPI_Type_create_struct(2, blocks, p_disp, types, &p_tmp); MPI_Type_create_resized(p_tmp, 0, p_size, &p_cell); MPI_Type_commit(&p_cell); // Now, we create our matrix MPI_Datatype matrix; int sizes[2] = {global_grid.n, global_grid.m}; int subsizes[2] = {local_nrows, local_ncols}; int starts[2] = {0, 0}; MPI_Type_create_subarray(2, sizes, subsizes, starts, MPI_ORDER_C, p_cell, &matrix); MPI_Type_commit(&matrix); // We extend this matrix MPI_Datatype ematrix; int e_subsizes[2] = {2 + subsizes[0], 2 + subsizes[1]}; int e_start[2] = {1, 1}; MPI_Type_create_subarray(2, e_subsizes, subsizes, e_start, MPI_ORDER_C, a_cell, &ematrix); MPI_Type_commit(&ematrix); // The next 3 types are for the export of the grid MPI_Datatype d_type; MPI_Type_create_resized(MPI_DOUBLE, 0, sizeof(cell2), &d_type); MPI_Type_commit(&d_type); MPI_Datatype d_matrix; MPI_Type_create_subarray(2, sizes, subsizes, starts, MPI_ORDER_C, MPI_DOUBLE, &d_matrix); MPI_Type_commit(&d_matrix); MPI_Datatype d_rmatrix; // to go from the extended matrix with ghost zones to the other one MPI_Type_create_subarray(2, e_subsizes, subsizes, e_start, MPI_ORDER_C, d_type, &d_rmatrix); MPI_Type_commit(&d_rmatrix); // Set file view for each element MPI_Offset grid_start; MPI_File_get_position(input_file, &grid_start); MPI_File_set_view(input_file, grid_start + global_grid.m*local_nrows*p_size*coord[0] + local_ncols*p_size*coord[1], p_cell, matrix, "native", MPI_INFO_NULL); // allocate the cell array we will use cell2 **cells; cells = malloc(2*sizeof(cell2 *)); double *sensors; cells[1] = calloc((2+local_nrows)*(2+local_ncols),sizeof(cell2)); cells[0] = calloc((2+local_nrows)*(2+local_ncols),sizeof(cell2)); sensors = calloc(local_nrows*local_ncols, sizeof(double)); MPI_File_read_all(input_file, cells[0], 1, ematrix, MPI_STATUS_IGNORE); MPI_File_close(&input_file); #ifdef DEBUG for(size_t i = 1; i < 1+local_nrows; i++) for(size_t j = 1; j < 1+local_ncols; j++) fprintf(stderr, "%d - %d %f\n", rank, cells[0][i*(2+local_ncols)+j].type, cells[0][i*(2+local_ncols)+j].u); #endif MPI_Datatype l_row; // local row MPI_Type_contiguous(local_ncols, d_type, &l_row); MPI_Type_commit(&l_row); MPI_Datatype l_col; // local column. A bit trickier, we need a type_vector. MPI_Type_vector(local_nrows, 1, local_ncols+2, d_type, &l_col); MPI_Type_commit(&l_col); int top, bot, left, right; double sqspeed = 0; int curr = 0, next = 0; char *alldump = malloc(256); for(int s = 0; s < instance.iteration; s++) { // We will update cell[next], and use the data of cell[curr] curr = s % 2; next = (s+1) % 2; // We copy the edges of the grid. // We first need the ranks of the neighbours MPI_Cart_shift(comm, 0, 1, &top, &bot); MPI_Cart_shift(comm, 1, 1, &left, &right); // Then we need to update the edges of our local grid // Update top and bottom rows MPI_Sendrecv(&(cells[curr][1*(local_ncols+2)+1].u), 1, l_row, top, 0, &(cells[curr][(local_ncols+2)*(local_nrows+1)+1].u), 1, l_row, bot, 0, comm, MPI_STATUS_IGNORE); MPI_Sendrecv(&(cells[curr][(local_ncols+2)*(local_nrows)+1].u), 1, l_row, bot, 0, &(cells[curr][1].u), 1, l_row, top, 0, comm, MPI_STATUS_IGNORE); // Update left and right MPI_Sendrecv(&(cells[curr][1*(local_ncols+2)+1].u), 1, l_col, left, 0, &(cells[curr][1*(local_ncols+2)+local_ncols+1].u), 1, l_col, right, 0, comm, MPI_STATUS_IGNORE); MPI_Sendrecv(&(cells[curr][1*(local_ncols+2)+local_ncols].u), 1, l_col, right, 0, &(cells[curr][1*(local_ncols+2)].u), 1, l_col, left, 0, comm, MPI_STATUS_IGNORE); // We compute the update of the grid for(size_t i = 1; i < 1+local_nrows; i++) { for(size_t j = 1; j < 1+local_ncols; j++) { if(instance.step < 2 || cells[next][j+i*(2+local_ncols)].type != 1) { // If walls we do not do anything sqspeed = cells[0][j+i*(2+local_ncols)].s * cells[0][j+i*(2+local_ncols)].s; cells[next][j+i*(2+local_ncols)].u = cells[curr][j+i*(2+local_ncols)].u + (cells[curr][j+i*(2+local_ncols)].v * instance.dt); cells[next][j+i*(2+local_ncols)].v = cells[curr][j+i*(2+local_ncols)].v + sqspeed * (cells[curr][j+(i+1)*(2+local_ncols)].u + cells[curr][j+(i-1)*(2+local_ncols)].u + cells[curr][(j+1) + i*(2+local_ncols)].u + cells[curr][(j-1) + i*(2+local_ncols)].u - (4 * cells[curr][j+i*(2+local_ncols)].u)) * instance.dt; if(instance.step == 3 && cells[next][j+i*(2+local_ncols)].type == 2) { // Case of sensors sensors[(j-1)+(i-1)*local_ncols] += cells[next][j+i*(2+local_ncols)].u * cells[next][j+i*(2+local_ncols)].u; } } } } if(instance.alldump != NULL && s % instance.frequency == 0) { MPI_File dump_file; sprintf(alldump, instance.alldump, (s / instance.frequency)); MPI_File_open(comm, alldump, MPI_MODE_WRONLY | MPI_MODE_CREATE, MPI_INFO_NULL, &dump_file); MPI_File_set_view(dump_file, global_grid.m*local_nrows*sizeof(double)*coord[0] + local_ncols*sizeof(double)*coord[1], MPI_DOUBLE, d_matrix, "native", MPI_INFO_NULL); MPI_File_write_all(dump_file, &(cells[curr][0].u), 1, d_rmatrix, MPI_STATUS_IGNORE); MPI_File_close(&dump_file); } } if(instance.lastdump != NULL) { // bon, comment on fait ça ? peut être qu'en faisant un resize ça marche ? MPI_File last_file; MPI_File_open(comm, instance.lastdump, MPI_MODE_WRONLY | MPI_MODE_CREATE, MPI_INFO_NULL, &last_file); MPI_File_set_view(last_file, global_grid.m*local_nrows*sizeof(double)*coord[0] + local_ncols*sizeof(double)*coord[1], MPI_DOUBLE, d_matrix, "native", MPI_INFO_NULL); // déjà, il y a un grid_strat en trop, d_type ou MPI_DOUBLE ? MPI_File_write_all(last_file, &(cells[next][0].u), 1, d_rmatrix, MPI_STATUS_IGNORE); MPI_File_close(&last_file); } if(instance.step == 3 && instance.sensors != NULL) { MPI_File sensor_file; MPI_File_open(comm, instance.sensors, MPI_MODE_WRONLY | MPI_MODE_CREATE, MPI_INFO_NULL, &sensor_file); MPI_Datatype string; MPI_Type_contiguous(1024, MPI_CHAR, &string); MPI_Type_commit(&string); char text[1024]; for(size_t i = 1; i < 1+local_nrows; i++) { for(size_t j = 1; j < 1+local_ncols; j++) { if(instance.step == 3 && cells[next][j+i*(2+local_ncols)].type == 2) { memset(text,0,sizeof(text)); sprintf(text, "%zu %zu %f\r\n", (i-1)+coord[0]*local_nrows, (j-1)+coord[1]*local_ncols, sensors[(j-1)+(i-1)*local_ncols]); MPI_File_write(sensor_file, text, 1, string, MPI_STATUS_IGNORE); } } } MPI_Type_free(&string); MPI_File_close(&sensor_file); } // Some cleaning free(cells); free(alldump); MPI_Type_free(&a_cell); MPI_Type_free(&p_cell); MPI_Type_free(&matrix); MPI_Type_free(&ematrix); MPI_Type_free(&d_type); MPI_Type_free(&d_matrix); MPI_Type_free(&d_rmatrix); MPI_Type_free(&l_row); MPI_Type_free(&l_col); }
/* * Open a file through the MPIIO interface. Setup file view. */ static void *MPIIO_Open(char *testFileName, IOR_param_t * param) { int fd_mode = (int)0, offsetFactor, tasksPerFile, transfersPerBlock = param->blockSize / param->transferSize; struct fileTypeStruct { int globalSizes[2], localSizes[2], startIndices[2]; } fileTypeStruct; MPI_File *fd; MPI_Comm comm; MPI_Info mpiHints = MPI_INFO_NULL; fd = (MPI_File *) malloc(sizeof(MPI_File)); if (fd == NULL) ERR("malloc failed()"); *fd = 0; /* set IOR file flags to MPIIO flags */ /* -- file open flags -- */ if (param->openFlags & IOR_RDONLY) { fd_mode |= MPI_MODE_RDONLY; } if (param->openFlags & IOR_WRONLY) { fd_mode |= MPI_MODE_WRONLY; } if (param->openFlags & IOR_RDWR) { fd_mode |= MPI_MODE_RDWR; } if (param->openFlags & IOR_APPEND) { fd_mode |= MPI_MODE_APPEND; } if (param->openFlags & IOR_CREAT) { fd_mode |= MPI_MODE_CREATE; } if (param->openFlags & IOR_EXCL) { fd_mode |= MPI_MODE_EXCL; } if (param->openFlags & IOR_TRUNC) { fprintf(stdout, "File truncation not implemented in MPIIO\n"); } if (param->openFlags & IOR_DIRECT) { fprintf(stdout, "O_DIRECT not implemented in MPIIO\n"); } /* * MPI_MODE_UNIQUE_OPEN mode optimization eliminates the overhead of file * locking. Only open a file in this mode when the file will not be con- * currently opened elsewhere, either inside or outside the MPI environment. */ fd_mode |= MPI_MODE_UNIQUE_OPEN; if (param->filePerProc) { comm = MPI_COMM_SELF; } else { comm = testComm; } SetHints(&mpiHints, param->hintsFileName); /* * note that with MP_HINTS_FILTERED=no, all key/value pairs will * be in the info object. The info object that is attached to * the file during MPI_File_open() will only contain those pairs * deemed valid by the implementation. */ /* show hints passed to file */ if (rank == 0 && param->showHints) { fprintf(stdout, "\nhints passed to MPI_File_open() {\n"); ShowHints(&mpiHints); fprintf(stdout, "}\n"); } MPI_CHECK(MPI_File_open(comm, testFileName, fd_mode, mpiHints, fd), "cannot open file"); /* show hints actually attached to file handle */ if (rank == 0 && param->showHints) { MPI_CHECK(MPI_File_get_info(*fd, &mpiHints), "cannot get file info"); fprintf(stdout, "\nhints returned from opened file {\n"); ShowHints(&mpiHints); fprintf(stdout, "}\n"); } /* preallocate space for file */ if (param->preallocate && param->open == WRITE) { MPI_CHECK(MPI_File_preallocate(*fd, (MPI_Offset) (param->segmentCount * param->blockSize * param->numTasks)), "cannot preallocate file"); } /* create file view */ if (param->useFileView) { /* create contiguous transfer datatype */ MPI_CHECK(MPI_Type_contiguous (param->transferSize / sizeof(IOR_size_t), MPI_LONG_LONG_INT, ¶m->transferType), "cannot create contiguous datatype"); MPI_CHECK(MPI_Type_commit(¶m->transferType), "cannot commit datatype"); if (param->filePerProc) { offsetFactor = 0; tasksPerFile = 1; } else { offsetFactor = (rank + rankOffset) % param->numTasks; tasksPerFile = param->numTasks; } /* * create file type using subarray */ fileTypeStruct.globalSizes[0] = 1; fileTypeStruct.globalSizes[1] = transfersPerBlock * tasksPerFile; fileTypeStruct.localSizes[0] = 1; fileTypeStruct.localSizes[1] = transfersPerBlock; fileTypeStruct.startIndices[0] = 0; fileTypeStruct.startIndices[1] = transfersPerBlock * offsetFactor; MPI_CHECK(MPI_Type_create_subarray (2, fileTypeStruct.globalSizes, fileTypeStruct.localSizes, fileTypeStruct.startIndices, MPI_ORDER_C, param->transferType, ¶m->fileType), "cannot create subarray"); MPI_CHECK(MPI_Type_commit(¶m->fileType), "cannot commit datatype"); MPI_CHECK(MPI_File_set_view(*fd, (MPI_Offset) 0, param->transferType, param->fileType, "native", (MPI_Info) MPI_INFO_NULL), "cannot set file view"); } return ((void *)fd); }
int main(int argc, char **argv) { int i, j, rank, nranks, peer, bufsize, errors; double *win_buf, *src_buf; MPI_Win buf_win; MTest_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &nranks); bufsize = XDIM * YDIM * sizeof(double); MPI_Alloc_mem(bufsize, MPI_INFO_NULL, &win_buf); MPI_Alloc_mem(bufsize, MPI_INFO_NULL, &src_buf); if (rank == 0) if (verbose) printf("MPI RMA Strided Accumulate Test:\n"); for (i = 0; i < XDIM*YDIM; i++) { *(win_buf + i) = -1.0; *(src_buf + i) = 1.0 + rank; } MPI_Win_create(win_buf, bufsize, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &buf_win); peer = (rank+1) % nranks; /* Perform ITERATIONS strided accumulate operations */ for (i = 0; i < ITERATIONS; i++) { int ndims = 2; int src_arr_sizes[2] = { XDIM, YDIM }; int src_arr_subsizes[2] = { SUB_XDIM, SUB_YDIM }; int src_arr_starts[2] = { 0, 0 }; int dst_arr_sizes[2] = { XDIM, YDIM }; int dst_arr_subsizes[2] = { SUB_XDIM, SUB_YDIM }; int dst_arr_starts[2] = { 0, 0 }; MPI_Datatype src_type, dst_type; MPI_Type_create_subarray(ndims, src_arr_sizes, src_arr_subsizes, src_arr_starts, MPI_ORDER_C, MPI_DOUBLE, &src_type); MPI_Type_create_subarray(ndims, dst_arr_sizes, dst_arr_subsizes, dst_arr_starts, MPI_ORDER_C, MPI_DOUBLE, &dst_type); MPI_Type_commit(&src_type); MPI_Type_commit(&dst_type); MPI_Win_lock(MPI_LOCK_EXCLUSIVE, peer, 0, buf_win); MPI_Accumulate(src_buf, 1, src_type, peer, 0, 1, dst_type, MPI_SUM, buf_win); MPI_Win_unlock(peer, buf_win); MPI_Type_free(&src_type); MPI_Type_free(&dst_type); } MPI_Barrier(MPI_COMM_WORLD); /* Verify that the results are correct */ MPI_Win_lock(MPI_LOCK_EXCLUSIVE, rank, 0, buf_win); errors = 0; for (i = 0; i < SUB_XDIM; i++) { for (j = 0; j < SUB_YDIM; j++) { const double actual = *(win_buf + i + j*XDIM); const double expected = -1.0 + (1.0 + ((rank+nranks-1)%nranks)) * (ITERATIONS); if (fabs(actual - expected) > 1.0e-10) { SQUELCH( printf("%d: Data validation failed at [%d, %d] expected=%f actual=%f\n", rank, j, i, expected, actual); ); errors++; fflush(stdout); } }
void mpi_type_create_subarray_(int *ndims,int *array_of_sizes, int *array_of_subsizes,int *array_of_starts, int *order,MPI_Datatype *oldtype, MPI_Datatype *newtype, int *__ierr ){ *__ierr = MPI_Type_create_subarray(*ndims,array_of_sizes,array_of_subsizes,array_of_starts,*order,*oldtype,newtype); }