void mpi_type_create_darray_(int *size,int *rank,int *ndims, int *array_of_gsizes,int *array_of_distribs, int *array_of_dargs,int *array_of_psizes, int *order,MPI_Datatype *oldtype, MPI_Datatype *newtype, int *__ierr ) { *__ierr = MPI_Type_create_darray(*size,*rank,*ndims,array_of_gsizes,array_of_distribs,array_of_dargs,array_of_psizes,*order,*oldtype,newtype); }
int loadmatrix_cols(MPI_File *fh, float *rbuf, int rank, int numtasks, int m, int n) { float data[1024]; MPI_Datatype darray; MPI_Status status; int gsizes[2] = {m, n}; int distribs[2] = {MPI_DISTRIBUTE_BLOCK, MPI_DISTRIBUTE_BLOCK}; int dargs[2] = {MPI_DISTRIBUTE_DFLT_DARG, MPI_DISTRIBUTE_DFLT_DARG}; int psizes[2] = {1, 4}; int i, j; int count = 0; int cols = 0; cols = n/numtasks; if(rank < n%numtasks) { cols++; } MPI_Type_create_darray(numtasks, rank, 2, gsizes, distribs, dargs, psizes, MPI_ORDER_C, MPI_FLOAT, &darray); MPI_Type_commit(&darray); MPI_File_set_view(*fh, 0, MPI_FLOAT, darray, "native", MPI_INFO_NULL); MPI_File_read_all(*fh, data, m*cols, MPI_FLOAT, &status); MPI_Get_count(&status, MPI_FLOAT, &count); //cols = count/m; for(i = 0; i < cols; i++) { for(j = 0; j < m; j++) { rbuf[i*m+j] = data[j*cols+i]; } } /* if(rank == 1) { for(i = 0; i < cols; i++) { printf("Proc %d row %d: ", rank, i); for(j = 0; j < m; j++) { printf("%f, ", rbuf[j*cols+i]); } printf("\n"); } } */ MPI_Type_free(&darray); return cols; }
FORTRAN_API void FORT_CALL void mpi_type_create_darray_(MPI_Fint * size, MPI_Fint * rank, MPI_Fint * ndims, MPI_Fint * array_of_gsizes, MPI_Fint * array_of_distribs, MPI_Fint * array_of_dargs, MPI_Fint * array_of_psizes, MPI_Fint * order, MPI_Fint * oldtype, MPI_Fint * newtype, MPI_Fint * ierr) { *ierr = MPI_Type_create_darray(*size, *rank, *ndims, array_of_gsizes, array_of_distribs, array_of_dargs, array_of_psizes, *order, *oldtype, newtype); }
void mpi_type_create_darray_(int *size,int *rank,int *ndims, int *array_of_gsizes,int *array_of_distribs, int *array_of_dargs,int *array_of_psizes, int *order, MPI_Fint *oldtype, MPI_Fint *newtype, int *__ierr ) { MPI_Datatype oldtype_c, newtype_c; oldtype_c = MPI_Type_f2c(*oldtype); *__ierr = MPI_Type_create_darray(*size,*rank,*ndims,array_of_gsizes,array_of_distribs,array_of_dargs,array_of_psizes,*order,oldtype_c,&newtype_c); *newtype = MPI_Type_c2f(newtype_c); }
int savematrix_rows(MPI_File *fh, float *data, int numrows, int rank, int numtasks, int m, int n) { MPI_Datatype darray; MPI_Status status; int gsizes[2] = {m, n}; int distribs[2] = {MPI_DISTRIBUTE_BLOCK, MPI_DISTRIBUTE_BLOCK}; int dargs[2] = {MPI_DISTRIBUTE_DFLT_DARG, MPI_DISTRIBUTE_DFLT_DARG}; int psizes[2] = {4, 1}; MPI_Type_create_darray(numtasks, rank, 2, gsizes, distribs, dargs, psizes, MPI_ORDER_C, MPI_FLOAT, &darray); MPI_Type_commit(&darray); MPI_File_set_view(*fh, 0, MPI_FLOAT, darray, "native", MPI_INFO_NULL); MPI_File_write_all(*fh, data, numrows*n, MPI_FLOAT, &status); MPI_Type_free(&darray); return 0; }
void ompi_type_create_darray_f(MPI_Fint *size, MPI_Fint *rank, MPI_Fint *ndims, MPI_Fint *gsize_array, MPI_Fint *distrib_array, MPI_Fint *darg_array, MPI_Fint *psize_array, MPI_Fint *order, MPI_Fint *oldtype, MPI_Fint *newtype, MPI_Fint *ierr) { int c_ierr; MPI_Datatype c_old = MPI_Type_f2c(*oldtype); MPI_Datatype c_new; OMPI_ARRAY_NAME_DECL(gsize_array); OMPI_ARRAY_NAME_DECL(distrib_array); OMPI_ARRAY_NAME_DECL(darg_array); OMPI_ARRAY_NAME_DECL(psize_array); OMPI_ARRAY_FINT_2_INT(gsize_array, *ndims); OMPI_ARRAY_FINT_2_INT(distrib_array, *ndims); OMPI_ARRAY_FINT_2_INT(darg_array, *ndims); OMPI_ARRAY_FINT_2_INT(psize_array, *ndims); c_ierr = MPI_Type_create_darray(OMPI_FINT_2_INT(*size), OMPI_FINT_2_INT(*rank), OMPI_FINT_2_INT(*ndims), OMPI_ARRAY_NAME_CONVERT(gsize_array), OMPI_ARRAY_NAME_CONVERT(distrib_array), OMPI_ARRAY_NAME_CONVERT(darg_array), OMPI_ARRAY_NAME_CONVERT(psize_array), OMPI_FINT_2_INT(*order), c_old, &c_new); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); OMPI_ARRAY_FINT_2_INT_CLEANUP(gsize_array); OMPI_ARRAY_FINT_2_INT_CLEANUP(distrib_array); OMPI_ARRAY_FINT_2_INT_CLEANUP(darg_array); OMPI_ARRAY_FINT_2_INT_CLEANUP(psize_array); if (MPI_SUCCESS == c_ierr) { *newtype = MPI_Type_c2f(c_new); } }
int main(int argc, char *argv[]) { int i; int rank, size; MPI_Datatype darray; int distrib[1] = { MPI_DISTRIBUTE_CYCLIC }; int bsize[1] = { 1 }; int gsize[1] = { 10 }; int psize[1] = { 2 }; int tsize; MPI_Aint lb, extent; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &size); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Type_create_darray(size, rank, 1, gsize, distrib, bsize, psize, MPI_ORDER_C, MPI_DOUBLE, &darray); MPI_Type_size(darray, &tsize); MPI_Type_get_extent(darray, &lb, &extent); for(i = 0; i < size; i++) { MPI_Barrier(MPI_COMM_WORLD); if(rank == i) { printf("Rank %i, size=%i, extent=%i, lb=%i\n", rank, tsize, (int)extent, (int)lb); } } MPI_Finalize(); return 0; }
int main(int argc, char* argv[]) { int i, rank, npes, bug=0; int buf[ng]; MPI_File thefile; MPI_Status status; MPI_Datatype filetype; MPI_Comm new_comm; MPI_Offset offset=0; MPI_Info info=MPI_INFO_NULL; int gsize[D],distrib[D],dargs[D],psize[D]; int dims[D],periods[D],reorder; double t1,t2,mbs; double to1,to2,tc1,tc2; double et,eto,etc; double max_mbs,min_mbs,avg_mbs; double max_et,min_et,avg_et; double max_eto,min_eto,avg_eto; double max_etc,min_etc,avg_etc; char process_name[MPI_MAX_PROCESSOR_NAME + 1]; char rr_blank[] = {" "}; char rr_empty[] = {"???????"}; int count; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &npes); if ( rank == 0 ) { if ( argc < 2 ) { printf(" ERROR: no filename given\n"); bug++; } if ( npes == np ) { printf(" file name: %s\n",argv[1]); printf(" total number of PE's: %3d\n",np); printf(" number of PE's in x direction: %4d\n",npx); printf(" number of PE's in y direction: %4d\n",npy); printf(" number of PE's in z direction: %4d\n",npz); printf(" global grid size: %dx%dx%d 4 byte integers (total %lld)\n",X,Y,Z,(unsigned long)X*Y*Z); printf(" local grid size: %dx%dx%d 4 byte integers (total %d)\n",nx,ny,nz,ng); } else { printf(" ERROR: total number of PE's must be %d\n",np); printf(" actual number of PE's was %d\n",npes); bug++; } if ( bug ) { MPI_Abort(MPI_COMM_WORLD,-1); } } if ( MPI_Get_processor_name(process_name, &count) != MPI_SUCCESS) { sprintf(process_name, rr_empty); } else { if (count < MAX_RR_NAME) strncat(&process_name[count],rr_blank,MAX_RR_NAME-count); process_name[MAX_RR_NAME] = '\0'; } MPI_Barrier(MPI_COMM_WORLD); MPI_Info_create(&info); /* allow multiple writers to write to the file concurrently */ /*MPI_Info_set(info,"panfs_concurrent_write","1");*/ /* use data aggregation */ /*MPI_Info_set(info,"romio_cb_write","enable"); */ /*MPI_Info_set(info,"romio_cb_write","disable");*/ /*MPI_Info_set(info,"romio_cb_read","enable"); */ /*MPI_Info_set(info,"romio_cb_read","disable");*/ /* use one aggregator/writer per node */ /*MPI_Info_set(info,"cb_config_list","*:1");*/ /* aggregators/writers per allocation: use this or the above (both work) */ /*i = ((npes-1)/8) + 1; sprintf(awpa,"%d",i); MPI_Info_set (info,"cb_nodes",awpa);*/ for ( i=0; i<ng; i++ ) buf[i] = rank*10000 + (i+1)%1024; for ( i=0; i<D; i++ ) { periods[i] = 1; /* true */ } reorder = 1; /* true */ dims[0] = npx; dims[1] = npy; dims[2] = npz; MPI_Cart_create(MPI_COMM_WORLD, D, dims, periods, reorder, &new_comm); for ( i=0; i<D; i++ ) { distrib[i] = MPI_DISTRIBUTE_BLOCK; dargs[i] = MPI_DISTRIBUTE_DFLT_DARG; /* psize[i] = 0; */ } gsize[0] = X; gsize[1] = Y; gsize[2] = Z; psize[0] = npx; psize[1] = npy; psize[2] = npz; /* MPI_Dims_create(npes, D, psize); printf("psize %d %d %d\n",psize[0],psize[1],psize[2]); */ MPI_Type_create_darray(npes, rank, D, gsize, distrib, dargs, psize, MPI_ORDER_FORTRAN, MPI_INT, &filetype); /*MPI_Type_create_darray(npes, rank, D, gsize, distrib, dargs, psize, MPI_ORDER_C, MPI_INT, &filetype); don't do this */ MPI_Type_commit(&filetype); to1 = MPI_Wtime(); MPI_File_open(new_comm, argv[1], MPI_MODE_WRONLY | MPI_MODE_CREATE, info, &thefile); to2 = MPI_Wtime(); MPI_File_set_size(thefile, offset); MPI_File_set_view(thefile, offset, MPI_INT, filetype, "native", MPI_INFO_NULL); t1 = MPI_Wtime(); for ( i=0; i<LOOP; i++) { MPI_File_write_all(thefile, buf, ng, MPI_INT, &status); } t2 = MPI_Wtime(); tc1 = MPI_Wtime(); MPI_File_close(&thefile); tc2 = MPI_Wtime(); et = (t2 - t1)/LOOP; eto = (to2 - to1)/LOOP; etc = (tc2 - tc1)/LOOP; mbs = (((double)(LOOP*X*Y*Z)*sizeof(int)))/(1000000.0*(t2-t1)); /*printf(" %s[%3d] ET %8.2f %8.2f %8.2f %8.1f mbs\n", process_name, rank, t1, t2, t2-t1, mbs);*/ MPI_Barrier(MPI_COMM_WORLD); MPI_Reduce(&mbs, &avg_mbs, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(&mbs, &min_mbs, 1, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD); MPI_Reduce(&mbs, &max_mbs, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); MPI_Reduce(&et, &avg_et, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(&et, &min_et, 1, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD); MPI_Reduce(&et, &max_et, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); MPI_Reduce(&eto, &avg_eto, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(&eto, &min_eto, 1, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD); MPI_Reduce(&eto, &max_eto, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); MPI_Reduce(&etc, &avg_etc, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(&etc, &min_etc, 1, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD); MPI_Reduce(&etc, &max_etc, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); fflush(stdout); if ( rank == 0 ) { mbs = avg_mbs/npes; printf("\n average write rate: %9.1f mbs\n", mbs); printf(" minimum write rate: %9.1f mbs\n", min_mbs); printf(" maximum write rate: %9.1f mbs\n\n", max_mbs); avg_eto = avg_eto/npes; avg_et = avg_et/npes; avg_etc = avg_etc/npes; printf(" open time: %9.3f min %9.3f avg %9.3f max\n",min_eto,avg_eto,max_eto); printf(" write time: %9.3f min %9.3f avg %9.3f max\n",min_et,avg_et,max_et); printf(" close time: %9.3f min %9.3f avg %9.3f max\n\n",min_etc,avg_etc,max_etc); fflush(stdout); } MPI_Finalize(); return 0; }
int main(int argc, char *argv[]) { int i, j, nerrors=0, total_errors=0; int rank, size; int bpos; MPI_Datatype darray; MPI_Status status; MPI_File mpi_fh; /* Define array distribution A 2x2 block size works with ROMIO, a 3x3 block size breaks it. */ int distrib[2] = { MPI_DISTRIBUTE_CYCLIC, MPI_DISTRIBUTE_CYCLIC }; int bsize[2] = { NBLOCK, NBLOCK }; int gsize[2] = { NSIDE, NSIDE }; int psize[2] = { NPROC, NPROC }; double data[NSIDE*NSIDE]; double *ldata, *pdata; int tsize, nelem; MPI_File dfile; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &size); MPI_Comm_rank(MPI_COMM_WORLD, &rank); /* Set up type */ CHECK(MPI_Type_create_darray(size, rank, 2, gsize, distrib, bsize, psize, MPI_ORDER_FORTRAN, MPI_DOUBLE, &darray)); CHECK(MPI_Type_commit(&darray)); CHECK(MPI_Type_size(darray, &tsize)); nelem = tsize / sizeof(double); for(i = 0; i < (NSIDE*NSIDE); i++) data[i] = i; if (rank == 0) { CHECK(MPI_File_open(MPI_COMM_SELF, argv[1], MPI_MODE_CREATE|MPI_MODE_WRONLY, MPI_INFO_NULL, &dfile)); CHECK(MPI_File_write(dfile, data, NSIDE*NSIDE, MPI_DOUBLE, &status)); CHECK(MPI_File_close(&dfile)); } MPI_Barrier(MPI_COMM_WORLD); /* Allocate buffer */ ldata = (double *)malloc(tsize); pdata = (double *)malloc(tsize); /* Use Pack to pull out array */ bpos = 0; CHECK(MPI_Pack(data, 1, darray, pdata, tsize, &bpos, MPI_COMM_WORLD)); MPI_Barrier(MPI_COMM_WORLD); /* Read in array from file. */ CHECK(MPI_File_open(MPI_COMM_WORLD, argv[1], MPI_MODE_RDONLY, MPI_INFO_NULL, &mpi_fh)); CHECK(MPI_File_set_view(mpi_fh, 0, MPI_DOUBLE, darray, "native", MPI_INFO_NULL)); CHECK(MPI_File_read_all(mpi_fh, ldata, nelem, MPI_DOUBLE, &status)); CHECK(MPI_File_close(&mpi_fh)); for(i = 0; i < size; i++) { #ifdef VERBOSE MPI_Barrier(MPI_COMM_WORLD); if(rank == i) { printf("=== Rank %i === (%i elements) \nPacked: ", rank, nelem); for(j = 0; j < nelem; j++) { printf("%4.1f ", pdata[j]); fflush(stdout); } printf("\nRead: "); for(j = 0; j < nelem; j++) { printf("%4.1f ", ldata[j]); fflush(stdout); } printf("\n\n"); fflush(stdout); } #endif if(rank == i) { for (j=0; j< nelem; j++) { if (pdata[j] != ldata[j]) { fprintf(stderr, "rank %d at index %d: packbuf %4.1f filebuf %4.1f\n", rank, j, pdata[j], ldata[j]); nerrors++; } } } } MPI_Allreduce(&nerrors, &total_errors, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); if (rank == 0 && total_errors == 0) printf(" No Errors\n"); free(ldata); free(pdata); MPI_Type_free(&darray); MPI_Finalize(); exit(total_errors); }
FORT_DLL_SPEC void FORT_CALL mpi_type_create_darray_ ( MPI_Fint *v1, MPI_Fint *v2, MPI_Fint *v3, MPI_Fint v4[], MPI_Fint v5[], MPI_Fint v6[], MPI_Fint v7[], MPI_Fint *v8, MPI_Fint *v9, MPI_Fint *v10, MPI_Fint *ierr ){ *ierr = MPI_Type_create_darray( *v1, *v2, *v3, v4, v5, v6, v7, *v8, (MPI_Datatype)(*v9), (MPI_Datatype *)(v10) ); }
int main(int argc, char **argv) { int info, i, j, pcol, Adim; double *D; int *DESCD; CSRdouble BT_i, B_j, Xsparse, Zsparse, Btsparse; /*BT_i.allocate(0,0,0); B_j.allocate(0,0,0); Xsparse.allocate(0,0,0); Zsparse.allocate(0,0,0); Btsparse.allocate(0,0,0);*/ //Initialise MPI and some MPI-variables info = MPI_Init ( &argc, &argv ); if ( info != 0 ) { printf ( "Error in MPI initialisation: %d\n",info ); return info; } position= ( int* ) calloc ( 2,sizeof ( int ) ); if ( position==NULL ) { printf ( "unable to allocate memory for processor position coordinate\n" ); return EXIT_FAILURE; } dims= ( int* ) calloc ( 2,sizeof ( int ) ); if ( dims==NULL ) { printf ( "unable to allocate memory for grid dimensions coordinate\n" ); return EXIT_FAILURE; } //BLACS is the interface used by PBLAS and ScaLAPACK on top of MPI blacs_pinfo_ ( &iam,&size ); //determine the number of processes involved info=MPI_Dims_create ( size, 2, dims ); //determine the best 2D cartesian grid with the number of processes if ( info != 0 ) { printf ( "Error in MPI creation of dimensions: %d\n",info ); return info; } //Until now the code can only work with square process grids //So we try to get the biggest square grid possible with the number of processes involved if (*dims != *(dims+1)) { while (*dims * *dims > size) *dims -=1; *(dims+1)= *dims; if (iam==0) printf("WARNING: %d processor(s) unused due to reformatting to a square process grid\n", size - (*dims * *dims)); size = *dims * *dims; //cout << "New size of process grid: " << size << endl; } blacs_get_ ( &i_negone,&i_zero,&ICTXT2D ); //Initialisation of the BLACS process grid, which is referenced as ICTXT2D blacs_gridinit_ ( &ICTXT2D,"R",dims, dims+1 ); if (iam < size) { //The rank (iam) of the process is mapped to a 2D grid: position= (process row, process column) blacs_pcoord_ ( &ICTXT2D,&iam,position, position+1 ); if ( *position ==-1 ) { printf ( "Error in proces grid\n" ); return -1; } //Filenames, dimensions of all matrices and other important variables are read in as global variables (see src/readinput.cpp) info=read_input ( *++argv ); if ( info!=0 ) { printf ( "Something went wrong when reading input file for processor %d\n",iam ); return -1; } //blacs_barrier is used to stop any process of going beyond this point before all processes have made it up to this point. blacs_barrier_ ( &ICTXT2D,"ALL" ); if ( * ( position+1 ) ==0 && *position==0 ) printf ( "Reading of input-file succesful\n" ); if ( * ( position+1 ) ==0 && *position==0 ) { printf("\nA linear mixed model with %d observations, %d genotypes, %d random effects and %d fixed effects\n", n,k,m,l); printf("was analyzed using %d (%d x %d) processors\n",size,*dims,*(dims+1)); } //Dimension of A (sparse matrix) is the number of fixed effects(m) + the sparse random effects (l) Adim=m+l; //Dimension of D (dense matrix) is the number of dense effects (k) Ddim=k; pcol= * ( position+1 ); //Define number of blocks needed to store a complete column/row of D Dblocks= Ddim%blocksize==0 ? Ddim/blocksize : Ddim/blocksize +1; //Define the number of rowblocks needed by the current process to store its part of the dense matrix D Drows= ( Dblocks - *position ) % *dims == 0 ? ( Dblocks- *position ) / *dims : ( Dblocks- *position ) / *dims +1; Drows= Drows<1? 1 : Drows; //Define the number of columnblocks needed by the current process to store its part of the dense matrix D Dcols= ( Dblocks - pcol ) % * ( dims+1 ) == 0 ? ( Dblocks- pcol ) / * ( dims+1 ) : ( Dblocks- pcol ) / * ( dims+1 ) +1; Dcols=Dcols<1? 1 : Dcols; //Define the local leading dimension of D (keeping in mind that matrices are always stored column-wise) lld_D=Drows*blocksize; //Initialise the descriptor of the dense distributed matrix DESCD= ( int* ) malloc ( DLEN_ * sizeof ( int ) ); if ( DESCD==NULL ) { printf ( "unable to allocate memory for descriptor for C\n" ); return -1; } //D with dimensions (Ddim,Ddim) is distributed over all processes in ICTXT2D, with the first element in process (0,0) //D is distributed into blocks of size (blocksize,blocksize), having a local leading dimension lld_D in this specific process descinit_ ( DESCD, &Ddim, &Ddim, &blocksize, &blocksize, &i_zero, &i_zero, &ICTXT2D, &lld_D, &info ); if ( info!=0 ) { printf ( "Descriptor of matrix C returns info: %d\n",info ); return info; } //Allocate the space necessary to store the part of D that is held into memory of this process. D = ( double* ) calloc ( Drows * blocksize * Dcols * blocksize,sizeof ( double ) ); if ( D==NULL ) { printf ( "unable to allocate memory for Matrix D (required: %ld bytes)\n", Drows * blocksize * Dcols * blocksize * sizeof ( double ) ); return EXIT_FAILURE; } blacs_barrier_ ( &ICTXT2D,"ALL" ); if (iam==0) printf ( "Start set up of B & D\n" ); blacs_barrier_ ( &ICTXT2D,"ALL" ); //set_up_BD is declared in readdist.cpp and constructs the parts of matrices B & D in each processor //which are necessary to create the distributed Schur complement of D info = set_up_BD ( DESCD, D, BT_i, B_j, Btsparse ); //printdense(Drows*blocksize, Dcols * blocksize,D,"matrix_D.txt"); blacs_barrier_ ( &ICTXT2D,"ALL" ); if (iam==0) printf ( "Matrices B & D set up\n" ); if(printD_bool) { int array_of_gsizes[2], array_of_distribs[2], array_of_dargs[2], array_of_psize[2] ; int buffersize; MPI_Datatype file_type; MPI_File fh; MPI_Status status; array_of_gsizes[0]=Dblocks * blocksize; array_of_gsizes[1]=Dblocks * blocksize; array_of_distribs[0]=MPI_DISTRIBUTE_CYCLIC; array_of_distribs[1]=MPI_DISTRIBUTE_CYCLIC; array_of_dargs[0]=blocksize; array_of_dargs[1]=blocksize; array_of_psize[0]=*dims; array_of_psize[1]=*(dims + 1); MPI_Type_create_darray(size,iam,2,array_of_gsizes, array_of_distribs, array_of_dargs, array_of_psize, MPI_ORDER_FORTRAN, MPI_DOUBLE, &file_type); MPI_Type_commit(&file_type); info = MPI_File_open(MPI_COMM_WORLD, filenameD, MPI_MODE_CREATE | MPI_MODE_WRONLY, MPI_INFO_NULL, &fh); /*if ( ( Drows-1 ) % *(dims+1) == *position && ( Dcols-1 ) % *(dims) == pcol && Ddim%blocksize !=0 ) buffersize=((Drows-1) * blocksize + Ddim % blocksize) * ((Dcols-1) * blocksize + Ddim % blocksize); else if ( ( Drows-1 ) % *(dims+1) == *position && Ddim%blocksize !=0 ) buffersize=((Drows-1) * blocksize + Ddim % blocksize) * Dcols * blocksize; else if ( ( Dcols-1 ) % *(dims) == *position && Ddim%blocksize !=0 ) buffersize=((Dcols-1) * blocksize + Ddim % blocksize) * Drows * blocksize; else*/ buffersize= Dcols * Drows * blocksize * blocksize; MPI_File_set_view(fh, 0, MPI_DOUBLE, file_type, "native", MPI_INFO_NULL); info =MPI_File_write_all(fh, D,buffersize, MPI_DOUBLE, &status); MPI_File_close(&fh); if(iam==0) { printf("Matrix D (dimension %d) is printed in file %s\n", Dblocks*blocksize,filenameD); } if(filenameD != NULL) free(filenameD); filenameD=NULL; //delete[] array_of_gsizes, delete[] array_of_distribs, delete[] array_of_dargs, delete[] array_of_psize; } //Now every matrix has to set up the sparse matrix A, consisting of X'X, X'Z, Z'X and Z'Z + lambda*I Xsparse.loadFromFile ( filenameX ); Zsparse.loadFromFile ( filenameZ ); if(filenameX != NULL) free(filenameX); filenameX=NULL; if(filenameZ != NULL) free(filenameZ); filenameZ=NULL; smat_t *X_smat, *Z_smat; X_smat= (smat_t *) calloc(1,sizeof(smat_t)); Z_smat= (smat_t *) calloc(1,sizeof(smat_t)); X_smat = smat_new_from ( Xsparse.nrows,Xsparse.ncols,Xsparse.pRows,Xsparse.pCols,Xsparse.pData,0,0 ); Z_smat = smat_new_from ( Zsparse.nrows,Zsparse.ncols,Zsparse.pRows,Zsparse.pCols,Zsparse.pData,0,0 ); smat_t *Xt_smat, *Zt_smat; Xt_smat= (smat_t *) calloc(1,sizeof(smat_t)); Zt_smat= (smat_t *) calloc(1,sizeof(smat_t)); Xt_smat = smat_copy_trans ( X_smat ); Zt_smat = smat_copy_trans ( Z_smat ); CSRdouble Asparse; smat_t *XtX_smat, *XtZ_smat, *ZtZ_smat, *lambda_smat, *ZtZlambda_smat; XtX_smat= (smat_t *) calloc(1,sizeof(smat_t)); XtZ_smat= (smat_t *) calloc(1,sizeof(smat_t)); ZtZ_smat= (smat_t *) calloc(1,sizeof(smat_t)); XtX_smat = smat_matmul ( Xt_smat, X_smat ); XtZ_smat = smat_matmul ( Xt_smat, Z_smat ); ZtZ_smat = smat_matmul ( Zt_smat,Z_smat ); Xsparse.clear(); Zsparse.clear(); smat_free(Xt_smat); smat_free(Zt_smat); /*smat_free(X_smat); smat_free(Z_smat);*/ CSRdouble Imat; makeIdentity ( l, Imat ); lambda_smat= (smat_t *) calloc(1,sizeof(smat_t)); lambda_smat = smat_new_from ( Imat.nrows,Imat.ncols,Imat.pRows,Imat.pCols,Imat.pData,0,0 ); smat_scale_diag ( lambda_smat, -lambda ); ZtZlambda_smat= (smat_t *) calloc(1,sizeof(smat_t)); ZtZlambda_smat = smat_add ( lambda_smat, ZtZ_smat ); smat_free(ZtZ_smat); //smat_free(lambda_smat); smat_to_symmetric_structure ( XtX_smat ); smat_to_symmetric_structure ( ZtZlambda_smat ); CSRdouble XtX_sparse, XtZ_sparse, ZtZ_sparse; XtX_sparse.make2 ( XtX_smat->m,XtX_smat->n,XtX_smat->nnz,XtX_smat->ia,XtX_smat->ja,XtX_smat->a ); XtZ_sparse.make2 ( XtZ_smat->m,XtZ_smat->n,XtZ_smat->nnz,XtZ_smat->ia,XtZ_smat->ja,XtZ_smat->a ); ZtZ_sparse.make2 ( ZtZlambda_smat->m,ZtZlambda_smat->n,ZtZlambda_smat->nnz,ZtZlambda_smat->ia,ZtZlambda_smat->ja,ZtZlambda_smat->a ); /*smat_free(XtX_smat); smat_free(XtZ_smat); smat_free(ZtZlambda_smat);*/ Imat.clear(); if (iam==0) { cout << "*** [ t t ] *** " << endl; cout << "*** [ X X X Z ] *** " << endl; cout << "*** [ ] *** " << endl; cout << "*** G e n e r a t i n g m a t r i x A = [ ] *** " << endl; cout << "*** [ t t ] *** " << endl; cout << "*** [ Z X Z Z ] *** " << endl; } //Sparse matrix A only contains the upper triangular part of A create2x2SymBlockMatrix ( XtX_sparse, XtZ_sparse, ZtZ_sparse, Asparse ); //Asparse.writeToFile("A_sparse.csr"); smat_free(XtX_smat); smat_free(XtZ_smat); smat_free(ZtZlambda_smat); XtX_sparse.clear(); XtZ_sparse.clear(); ZtZ_sparse.clear(); blacs_barrier_ ( &ICTXT2D,"ALL" ); if(printsparseC_bool) { CSRdouble Dmat, Dblock, Csparse; Dblock.nrows=Dblocks * blocksize; Dblock.ncols=Dblocks * blocksize; Dblock.allocate(Dblocks * blocksize, Dblocks * blocksize, 0); Dmat.allocate(0,0,0); for (i=0; i<Drows; ++i) { for(j=0; j<Dcols; ++j) { dense2CSR_sub(D + i * blocksize + j * lld_D * blocksize,blocksize,blocksize,lld_D,Dblock,( * ( dims) * i + *position ) *blocksize, ( * ( dims+1 ) * j + pcol ) *blocksize); if ( Dblock.nonzeros>0 ) { if ( Dmat.nonzeros==0 ) { Dmat.make2 ( Dblock.nrows,Dblock.ncols,Dblock.nonzeros,Dblock.pRows,Dblock.pCols,Dblock.pData ); } else { Dmat.addBCSR ( Dblock ); } } Dblock.clear(); } } blacs_barrier_(&ICTXT2D,"A"); if ( iam!=0 ) { //Each process other than root sends its Dmat to the root process. MPI_Send ( & ( Dmat.nonzeros ),1, MPI_INT,0,iam,MPI_COMM_WORLD ); MPI_Send ( & ( Dmat.pRows[0] ),Dmat.nrows + 1, MPI_INT,0,iam+size,MPI_COMM_WORLD ); MPI_Send ( & ( Dmat.pCols[0] ),Dmat.nonzeros, MPI_INT,0,iam+2*size,MPI_COMM_WORLD ); MPI_Send ( & ( Dmat.pData[0] ),Dmat.nonzeros, MPI_DOUBLE,0,iam+3*size,MPI_COMM_WORLD ); Dmat.clear(); } else { for ( i=1; i<size; ++i ) { // The root process receives parts of Dmat sequentially from all processes and directly adds them together. int nonzeroes, count; MPI_Recv ( &nonzeroes,1,MPI_INT,i,i,MPI_COMM_WORLD,&status ); /*MPI_Get_count(&status, MPI_INT, &count); printf("Process 0 received %d elements of process %d\n",count,i);*/ if(nonzeroes>0) { printf("Nonzeroes : %d\n ",nonzeroes); Dblock.allocate ( Dblocks * blocksize,Dblocks * blocksize,nonzeroes ); MPI_Recv ( & ( Dblock.pRows[0] ), Dblocks * blocksize + 1, MPI_INT,i,i+size,MPI_COMM_WORLD,&status ); /*MPI_Get_count(&status, MPI_INT, &count); printf("Process 0 received %d elements of process %d\n",count,i);*/ MPI_Recv ( & ( Dblock.pCols[0] ),nonzeroes, MPI_INT,i,i+2*size,MPI_COMM_WORLD,&status ); /*MPI_Get_count(&status, MPI_INT, &count); printf("Process 0 received %d elements of process %d\n",count,i);*/ MPI_Recv ( & ( Dblock.pData[0] ),nonzeroes, MPI_DOUBLE,i,i+3*size,MPI_COMM_WORLD,&status ); /*MPI_Get_count(&status, MPI_DOUBLE, &count); printf("Process 0 received %d elements of process %d\n",count,i);*/ Dmat.addBCSR ( Dblock ); } } //Dmat.writeToFile("D_sparse.csr"); Dmat.reduceSymmetric(); Btsparse.transposeIt(1); create2x2SymBlockMatrix(Asparse,Btsparse, Dmat, Csparse); Btsparse.clear(); Dmat.clear(); Csparse.writeToFile(filenameC); Csparse.clear(); if(filenameC != NULL) free(filenameC); filenameC=NULL; } } Btsparse.clear(); blacs_barrier_(&ICTXT2D,"A"); //AB_sol will contain the solution of A*X=B, distributed across the process rows. Processes in the same process row possess the same part of AB_sol double * AB_sol; int * DESCAB_sol; DESCAB_sol= ( int* ) malloc ( DLEN_ * sizeof ( int ) ); if ( DESCAB_sol==NULL ) { printf ( "unable to allocate memory for descriptor for AB_sol\n" ); return -1; } //AB_sol (Adim, Ddim) is distributed across all processes in ICTXT2D starting from process (0,0) into blocks of size (Adim, blocksize) descinit_ ( DESCAB_sol, &Adim, &Ddim, &Adim, &blocksize, &i_zero, &i_zero, &ICTXT2D, &Adim, &info ); if ( info!=0 ) { printf ( "Descriptor of matrix C returns info: %d\n",info ); return info; } AB_sol=(double *) calloc(Adim * Dcols*blocksize,sizeof(double)); // Each process calculates the Schur complement of the part of D at its disposal. (see src/schur.cpp) // The solution of A * Y = B_j is stored in AB_sol (= A^-1 * B_j) blacs_barrier_(&ICTXT2D,"A"); make_Sij_parallel_denseB ( Asparse, BT_i, B_j, D, lld_D, AB_sol ); BT_i.clear(); B_j.clear(); //From here on the Schur complement S of D is stored in D blacs_barrier_ ( &ICTXT2D,"ALL" ); //The Schur complement is factorised (by ScaLAPACK) pdpotrf_ ( "U",&k,D,&i_one,&i_one,DESCD,&info ); if ( info != 0 ) { printf ( "Cholesky decomposition of D was unsuccessful, error returned: %d\n",info ); return -1; } //From here on the factorization of the Schur complement S is stored in D blacs_barrier_ ( &ICTXT2D,"ALL" ); //The Schur complement is inverted (by ScaLAPACK) pdpotri_ ( "U",&k,D,&i_one,&i_one,DESCD,&info ); if ( info != 0 ) { printf ( "Inverse of D was unsuccessful, error returned: %d\n",info ); return -1; } //From here on the inverse of the Schur complement S is stored in D blacs_barrier_(&ICTXT2D,"A"); double* InvD_T_Block = ( double* ) calloc ( Dblocks * blocksize + Adim ,sizeof ( double ) ); //Diagonal elements of the (1,1) block of C^-1 are still distributed and here they are gathered in InvD_T_Block in the root process. if(*position == pcol) { for (i=0; i<Ddim; ++i) { if (pcol == (i/blocksize) % *dims) { int Dpos = i%blocksize + ((i/blocksize) / *dims) * blocksize ; *(InvD_T_Block + Adim +i) = *( D + Dpos + lld_D * Dpos); } } for ( i=0,j=0; i<Dblocks; ++i,++j ) { if ( j==*dims ) j=0; if ( *position==j ) { dgesd2d_ ( &ICTXT2D,&blocksize,&i_one,InvD_T_Block + Adim + i * blocksize,&blocksize,&i_zero,&i_zero ); } if ( *position==0 ) { dgerv2d_ ( &ICTXT2D,&blocksize,&i_one,InvD_T_Block + Adim + blocksize*i,&blocksize,&j,&j ); } } } blacs_barrier_(&ICTXT2D,"A"); //Only the root process performs a selected inversion of A. if (iam==0) { int pardiso_message_level = 1; int pardiso_mtype=-2; ParDiSO pardiso ( pardiso_mtype, pardiso_message_level ); int number_of_processors = 1; char* var = getenv("OMP_NUM_THREADS"); if(var != NULL) { sscanf( var, "%d", &number_of_processors ); } else { printf("Set environment OMP_NUM_THREADS to 1"); exit(1); } pardiso.iparm[2] = 2; pardiso.iparm[3] = number_of_processors; pardiso.iparm[8] = 0; pardiso.iparm[11] = 1; pardiso.iparm[13] = 0; pardiso.iparm[28] = 0; //This function calculates the factorisation of A once again so this might be optimized. pardiso.findInverseOfA ( Asparse ); printf("Processor %d inverted matrix A\n",iam); } blacs_barrier_(&ICTXT2D,"A"); // To minimize memory usage, and because only the diagonal elements of the inverse are needed, Y' * S is calculated row by rowblocks // the diagonal element is calculates as the dot product of this row and the corresponding column of Y. (Y is solution of AY=B) double* YSrow= ( double* ) calloc ( Dcols * blocksize,sizeof ( double ) ); int * DESCYSROW; DESCYSROW= ( int* ) malloc ( DLEN_ * sizeof ( int ) ); if ( DESCYSROW==NULL ) { printf ( "unable to allocate memory for descriptor for AB_sol\n" ); return -1; } //YSrow (1,Ddim) is distributed across processes of ICTXT2D starting from process (0,0) into blocks of size (1,blocksize) descinit_ ( DESCYSROW, &i_one, &Ddim, &i_one,&blocksize, &i_zero, &i_zero, &ICTXT2D, &i_one, &info ); if ( info!=0 ) { printf ( "Descriptor of matrix C returns info: %d\n",info ); return info; } blacs_barrier_(&ICTXT2D,"A"); //Calculating diagonal elements 1 by 1 of the (0,0)-block of C^-1. for (i=1; i<=Adim; ++i) { pdsymm_ ("R","U",&i_one,&Ddim,&d_one,D,&i_one,&i_one,DESCD,AB_sol,&i,&i_one,DESCAB_sol,&d_zero,YSrow,&i_one,&i_one,DESCYSROW); pddot_(&Ddim,InvD_T_Block+i-1,AB_sol,&i,&i_one,DESCAB_sol,&Adim,YSrow,&i_one,&i_one,DESCYSROW,&i_one); /*if(*position==1 && pcol==1) printf("Dot product in process (1,1) is: %g\n", *(InvD_T_Block+i-1)); if(*position==0 && pcol==1) printf("Dot product in process (0,1) is: %g\n",*(InvD_T_Block+i-1));*/ } blacs_barrier_(&ICTXT2D,"A"); if(YSrow != NULL) free(YSrow); YSrow = NULL; if(DESCYSROW != NULL) free(DESCYSROW); DESCYSROW = NULL; if(AB_sol != NULL) free(AB_sol); AB_sol = NULL; if(DESCAB_sol != NULL) free(DESCAB_sol); DESCAB_sol = NULL; if(D != NULL) free(D); D = NULL; if(DESCD != NULL) free(DESCD); DESCD = NULL; //Only in the root process we add the diagonal elements of A^-1 if (iam ==0) { for(i=0; i<Adim; ++i) { j=Asparse.pRows[i]; *(InvD_T_Block+i) += Asparse.pData[j]; } Asparse.clear(); printdense ( Adim+k,1,InvD_T_Block,"diag_inverse_C_parallel.txt" ); } if(InvD_T_Block != NULL) free(InvD_T_Block); InvD_T_Block = NULL; blacs_gridexit_(&ICTXT2D); } //cout << iam << " reached end before MPI_Barrier" << endl; MPI_Barrier(MPI_COMM_WORLD); //MPI_Finalize(); return 0; }
/* darray_4d_c_test1() * * Returns the number of errors encountered. */ int darray_4d_c_test1(void) { MPI_Datatype darray; int array[72]; int array_size[4] = { 6, 3, 2, 2 }; int array_distrib[4] = { MPI_DISTRIBUTE_BLOCK, MPI_DISTRIBUTE_BLOCK, MPI_DISTRIBUTE_NONE, MPI_DISTRIBUTE_NONE }; int array_dargs[4] = { MPI_DISTRIBUTE_DFLT_DARG, MPI_DISTRIBUTE_DFLT_DARG, MPI_DISTRIBUTE_DFLT_DARG, MPI_DISTRIBUTE_DFLT_DARG }; int array_psizes[4] = { 6, 3, 1, 1 }; int i, rank, err, errs = 0, sizeoftype; for (rank = 0; rank < 18; rank++) { /* set up array */ for (i = 0; i < 72; i++) { array[i] = i; } /* set up type */ err = MPI_Type_create_darray(18, /* size */ rank, 4, /* dims */ array_size, array_distrib, array_dargs, array_psizes, MPI_ORDER_C, MPI_INT, &darray); if (err != MPI_SUCCESS) { errs++; if (verbose) { fprintf(stderr, "error in MPI_Type_create_darray call; aborting after %d errors\n", errs); } MTestPrintError(err); return errs; } MPI_Type_commit(&darray); /* verify the size of the type */ MPI_Type_size(darray, &sizeoftype); if (sizeoftype != 4 * sizeof(int)) { errs++; if (verbose) fprintf(stderr, "size of type = %d; should be %d\n", sizeoftype, (int) (4 * sizeof(int))); return errs; } /* pack and unpack the type, zero'ing out all other values */ err = pack_and_unpack((char *) array, 1, darray, 72 * sizeof(int)); for (i = 0; i < 4 * rank; i++) { if (array[i] != 0) { errs++; if (verbose) fprintf(stderr, "[4d array rank=%d]:array[%d] = %d; should be %d\n", rank, i, array[i], 0); } } for (i = 4 * rank; i < 4 * rank + 4; i++) { if (array[i] != i) { errs++; if (verbose) fprintf(stderr, "[4d array rank=%d]:array[%d] = %d; should be %d\n", rank, i, array[i], i); } } for (i = 4 * rank + 4; i < 72; i++) { if (array[i] != 0) { errs++; if (verbose) fprintf(stderr, "[4d array rank=%d]:array[%d] = %d; should be %d\n", rank, i, array[i], 0); } } MPI_Type_free(&darray); } return errs; }
int main(int argc, char *argv[]) { int errs = 0; int wrank, wsize; int gsizes[3], distribs[3], dargs[3], psizes[3]; int px, py, nx, ny, rx, ry, bx, by; int *srcArray, *destArray; int i, j, ii, jj, loc; MPI_Datatype darraytype; MTest_Init(0, 0); MPI_Comm_rank(MPI_COMM_WORLD, &wrank); MPI_Comm_size(MPI_COMM_WORLD, &wsize); /* Test 1: Simple, 1-D cyclic decomposition */ if (AllocateGrid(1, 3 * wsize, &srcArray, &destArray)) { MPI_Abort(MPI_COMM_WORLD, 1); } /* Simple cyclic with 1-dim global array */ gsizes[0] = 3 * wsize; distribs[0] = MPI_DISTRIBUTE_CYCLIC; dargs[0] = 1; psizes[0] = wsize; MPI_Type_create_darray(wsize, wrank, 1, gsizes, distribs, dargs, psizes, MPI_ORDER_C, MPI_INT, &darraytype); /* Check the created datatype. Because cyclic, should represent * a strided type */ if (PackUnpack(darraytype, srcArray, destArray, 3)) { fprintf(stderr, "Error in pack/unpack check\n"); MPI_Abort(MPI_COMM_WORLD, 1); } /* Now, check for correct data */ for (i = 0; i < 3; i++) { if (destArray[i] != wrank + i * wsize) { fprintf(stderr, "1D: %d: Expected %d but saw %d\n", i, wrank + i * wsize, destArray[i]); errs++; } } free(destArray); free(srcArray); MPI_Type_free(&darraytype); /* Test 2: Simple, 1-D cyclic decomposition, with block size=2 */ if (AllocateGrid(1, 4 * wsize, &srcArray, &destArray)) { MPI_Abort(MPI_COMM_WORLD, 1); } /* Simple cyclic with 1-dim global array */ gsizes[0] = 4 * wsize; distribs[0] = MPI_DISTRIBUTE_CYCLIC; dargs[0] = 2; psizes[0] = wsize; MPI_Type_create_darray(wsize, wrank, 1, gsizes, distribs, dargs, psizes, MPI_ORDER_C, MPI_INT, &darraytype); /* Check the created datatype. Because cyclic, should represent * a strided type */ if (PackUnpack(darraytype, srcArray, destArray, 4)) { fprintf(stderr, "Error in pack/unpack check\n"); MPI_Abort(MPI_COMM_WORLD, 1); } loc = 0; /* for each cyclic element */ for (i = 0; i < 2; i++) { /* For each element in block */ for (j = 0; j < 2; j++) { if (destArray[loc] != 2 * wrank + i * 2 * wsize + j) { fprintf(stderr, "1D(2): %d: Expected %d but saw %d\n", i, 2 * wrank + i * 2 * wsize + j, destArray[loc]); errs++; } loc++; } } free(destArray); free(srcArray); MPI_Type_free(&darraytype); /* 2D: Create some 2-D decompositions */ px = wsize / 2; py = 2; rx = wrank % px; ry = wrank / px; if (px * py != wsize) { fprintf(stderr, "An even number of processes is required\n"); MPI_Abort(MPI_COMM_WORLD, 1); } /* Cyclic/Cyclic */ if (AllocateGrid(5 * px, 7 * py, &srcArray, &destArray)) { MPI_Abort(MPI_COMM_WORLD, 1); } /* Simple cyclic/cyclic. Note in C order, the [1] index varies most * rapidly */ gsizes[0] = ny = 7 * py; gsizes[1] = nx = 5 * px; distribs[0] = MPI_DISTRIBUTE_CYCLIC; distribs[1] = MPI_DISTRIBUTE_CYCLIC; dargs[0] = 1; dargs[1] = 1; psizes[0] = py; psizes[1] = px; MPI_Type_create_darray(wsize, wrank, 2, gsizes, distribs, dargs, psizes, MPI_ORDER_C, MPI_INT, &darraytype); /* Check the created datatype. Because cyclic, should represent * a strided type */ if (PackUnpack(darraytype, srcArray, destArray, 5 * 7)) { fprintf(stderr, "Error in pack/unpack check\n"); MPI_Abort(MPI_COMM_WORLD, 1); } loc = 0; for (j = 0; j < 7; j++) { for (i = 0; i < 5; i++) { int expected = rx + ry * nx + i * px + j * nx * py; if (destArray[loc] != expected) { errs++; fprintf(stderr, "2D(cc): [%d,%d] = %d, expected %d\n", i, j, destArray[loc], expected); } loc++; } } free(srcArray); free(destArray); MPI_Type_free(&darraytype); /* Cyclic(2)/Cyclic(3) */ if (AllocateGrid(6 * px, 4 * py, &srcArray, &destArray)) { MPI_Abort(MPI_COMM_WORLD, 1); } /* Block cyclic/cyclic. Note in C order, the [1] index varies most * rapidly */ gsizes[0] = ny = 4 * py; gsizes[1] = nx = 6 * px; distribs[0] = MPI_DISTRIBUTE_CYCLIC; distribs[1] = MPI_DISTRIBUTE_CYCLIC; dargs[0] = by = 2; dargs[1] = bx = 3; psizes[0] = py; psizes[1] = px; MPI_Type_create_darray(wsize, wrank, 2, gsizes, distribs, dargs, psizes, MPI_ORDER_C, MPI_INT, &darraytype); /* Check the created datatype. Because cyclic, should represent * a strided type */ if (PackUnpack(darraytype, srcArray, destArray, 4 * 6)) { fprintf(stderr, "Error in pack/unpack check\n"); MPI_Abort(MPI_COMM_WORLD, 1); } loc = 0; for (j = 0; j < 4 / by; j++) { for (jj = 0; jj < by; jj++) { for (i = 0; i < 6 / bx; i++) { for (ii = 0; ii < bx; ii++) { int expected = rx * bx + ry * by * nx + i * bx * px + ii + (j * by * py + jj) * nx; if (destArray[loc] != expected) { errs++; fprintf(stderr, "2D(c(2)c(3)): [%d,%d] = %d, expected %d\n", i * bx + ii, j * by + jj, destArray[loc], expected); } loc++; } } } } free(srcArray); free(destArray); MPI_Type_free(&darraytype); MTest_Finalize(errs); return MTestReturnValue(errs); }
int main (int argc, char **argv) { struct arguments arguments; /* Parse our arguments; every option seen by parse_opt will be reflected in arguments. */ argp_parse (&argp, argc, argv, 0, 0, &arguments); int run_type; run_type = 0; //default is serial if (sscanf (arguments.args[0], "%i", &run_type)!=1) {} int iterations; iterations = 0; //default is serial if (sscanf (arguments.args[1], "%i", &iterations)!=1) {} int count_when; count_when = 1000; if (sscanf (arguments.args[2], "%i", &count_when)!=1) {} char print_list[200]; //used for input list if (sscanf (arguments.args[3], "%s", &print_list)!=1) {} // printf("Print list = %s\n", print_list); //Extract animation list from arguments char char_array[20][12] = { NULL }; //seperated input list int animation_list[20][2] = { NULL }; //integer input list start,range char *tok = strtok(print_list, ","); //counters int i,j,k,x,y,ii,jj; ii = 0; jj = 0; //Loop over tokens parsing our commas int tok_len = 0; while (tok != NULL) { //first loop parses out commas tok_len = strlen(tok); for (jj=0;jj<tok_len;jj++) { char_array[ii][jj] = tok[jj]; } // printf("Tok = %s\n", char_array[ii]); tok = strtok(NULL, ","); ii++; } //looking for a range input, convert to ints int stop; for (ii=0;ii<20;ii++) { //convert first number to int tok = strtok(char_array[ii], "-"); if (tok != NULL) { animation_list[ii][0] = atoi(tok); tok = strtok(NULL, ","); } //look for second number, add to range if (tok != NULL) { stop = atoi(tok); animation_list[ii][1] = stop - animation_list[ii][0]; } // if (rank == 0) // { // printf("Animation_list = %i, %i\n", // animation_list[ii][0], animation_list[ii][1]); // } } //should an animation be generated //prints a bunch of .pgm files, have to hand //make the gif... int animation; animation = arguments.animation; //verbose? int verbose; verbose = arguments.verbose; // printf("VERBOSE = %i",verbose); if (verbose>0 && verbose<=10) { verbose = 1; } // Initialize the MPI environment MPI_Init(NULL, NULL); // Get the number of processes int world_size; MPI_Comm_size(MPI_COMM_WORLD, &world_size); // Get the rank of the process int rank; MPI_Comm_rank(MPI_COMM_WORLD, &rank); // Get the name of the processor char processor_name[MPI_MAX_PROCESSOR_NAME]; int name_len; MPI_Get_processor_name(processor_name, &name_len); //Print run information, exit on bad command line input if (rank == 0 && verbose == 1) { printf("Verbose=%i, RunType=%i, Iterations=%i, CountWhen=%i, Animation=%i\n", verbose,run_type,iterations,count_when, animation); } if (world_size>1 && run_type ==0) { printf("Runtype and processors count not consistant\n"); MPI_Finalize(); exit(0); } if (world_size==1 && run_type>0) { printf("Runtype and processors count not consistant\n"); MPI_Finalize(); exit(0); } if (count_when <= 0) { if (rank == 0) { printf("Invalid count interval, positive integers only\n"); } MPI_Finalize(); exit(0); } //serial if (world_size == 1 && run_type == 0) { ncols=1; nrows=1; } //Blocked else if (world_size>1 && run_type == 1) { ncols = 1; nrows = world_size; my_col = 0; my_row = rank; } //Checker else if (world_size>1 && run_type == 2) { ncols = (int)sqrt(world_size); nrows = (int)sqrt(world_size); my_row = rank/nrows; my_col = rank-my_row*nrows; if (ncols*nrows!=world_size) { if (rank == 0) { printf("Number of processors must be square, Exiting\n"); } MPI_Finalize(); exit(0); } } // if (verbose == 1) // { // printf("WR,row,col=%i,%i,%i\n",rank,my_row,my_col); // } //////////////////////READ IN INITIAL PGM//////////////////////////////// if(!readpgm("life.pgm")) { // printf("WR=%d,HERE2\n",rank); if( rank==0 ) { pprintf( "An error occured while reading the pgm file\n" ); } MPI_Finalize(); return 1; } // Count the life forms. Note that we count from [1,1] - [height+1,width+1]; // we need to ignore the ghost row! i = 0; for(y=1; y<local_height+1; y++ ) { for(x=1; x<local_width+1; x++ ) { if( field_a[ y * field_width + x ] ) { i++; } } } // pprintf( "%i local buggies\n", i ); int total; MPI_Allreduce( &i, &total, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD ); if( rank==0 && verbose == 1 ) { pprintf( "%i total buggies\n", total ); } // printf("WR=%d, Row=%d, Col=%d\n",rank,my_row,my_col); //Row and column size per processor int rsize, csize; rsize = local_width; csize = local_height; if (rank == 0 && verbose == 1) { printf("rsize,csize,NP = %d, %d, %d\n",rsize,csize,world_size); } //Create new derived datatype for writing to files MPI_Datatype submatrix; int array_of_gsizes[2]; int array_of_distribs[2]; int array_of_dargs[2]; int array_of_psize[2]; if (run_type == 1) { if (rank == 0) { printf("g0,g1 = %i,%i\n", local_height*ncols, local_width); printf("p0,p1 = %i,%i\n", nrows, ncols); } array_of_gsizes[0] = local_height*ncols; array_of_gsizes[1] = local_width; array_of_distribs[0] = MPI_DISTRIBUTE_BLOCK; array_of_distribs[1] = MPI_DISTRIBUTE_BLOCK; array_of_dargs[0] = MPI_DISTRIBUTE_DFLT_DARG; array_of_dargs[1] = MPI_DISTRIBUTE_DFLT_DARG; array_of_psize[0] = nrows; array_of_psize[1] = ncols; // int order = MPI_ORDER_C; //size,rank,ndims,array_gsizes,array_distribs,array_args,array_psizes //order,oldtype,*newtype MPI_Type_create_darray(world_size, rank, 2, array_of_gsizes, array_of_distribs, array_of_dargs, array_of_psize, MPI_ORDER_C, MPI_UNSIGNED_CHAR, &submatrix); MPI_Type_commit(&submatrix); } else if (run_type == 2) { if (rank == 0) { printf("g0,g1 = %i,%i\n", local_height*ncols, local_width*nrows); printf("p0,p1 = %i,%i\n", nrows, ncols); } array_of_gsizes[0] = local_height*ncols; array_of_gsizes[1] = local_width*nrows; array_of_distribs[0] = MPI_DISTRIBUTE_BLOCK; array_of_distribs[1] = MPI_DISTRIBUTE_BLOCK; array_of_dargs[0] = MPI_DISTRIBUTE_DFLT_DARG; array_of_dargs[1] = MPI_DISTRIBUTE_DFLT_DARG; array_of_psize[0] = nrows; array_of_psize[1] = ncols; // int order = MPI_ORDER_C; //size,rank,ndims,array_gsizes,array_distribs,array_args,array_psizes //order,oldtype,*newtype MPI_Type_create_darray(world_size, rank, 2, array_of_gsizes, array_of_distribs, array_of_dargs, array_of_psize, MPI_ORDER_C, MPI_UNSIGNED_CHAR, &submatrix); MPI_Type_commit(&submatrix); } MPI_Barrier(MPI_COMM_WORLD); //////////////////ALLOCATE ARRAYS, CREATE DATATYPES///////////////////// //Create new column derived datatype MPI_Datatype column; //count, blocklength, stride, oldtype, *newtype MPI_Type_hvector(csize, 1, sizeof(unsigned char), MPI_UNSIGNED_CHAR, &column); MPI_Type_commit(&column); //Create new row derived datatype MPI_Datatype row; //count, blocklength, stride, oldtype, *newtype MPI_Type_hvector(rsize, 1, sizeof(unsigned char), MPI_UNSIGNED_CHAR, &row); MPI_Type_commit(&row); //allocate arrays and corner storage unsigned char *section; unsigned char *neighbors; //to use unsigned char *top; unsigned char *bot; unsigned char *left; unsigned char *right; //to send unsigned char *ttop; unsigned char *tbot; unsigned char *tleft; unsigned char *tright; //MALLOC!! section = (unsigned char*)malloc(rsize*csize*sizeof(unsigned char)); neighbors = (unsigned char*)malloc(rsize*csize*sizeof(unsigned char)); top = (unsigned char*)malloc(rsize*sizeof(unsigned char)); bot = (unsigned char*)malloc(rsize*sizeof(unsigned char)); left = (unsigned char*)malloc(csize*sizeof(unsigned char)); right = (unsigned char*)malloc(csize*sizeof(unsigned char)); ttop = (unsigned char*)malloc(rsize*sizeof(unsigned char)); tbot = (unsigned char*)malloc(rsize*sizeof(unsigned char)); tleft = (unsigned char*)malloc(csize*sizeof(unsigned char)); tright = (unsigned char*)malloc(csize*sizeof(unsigned char)); //corners unsigned char topleft,topright,botleft,botright; //used in calculations unsigned char ttopleft,ttopright,tbotleft,tbotright; topleft = 255; topright = 255; botleft = 255; botright = 255; //used for animation, each process will put there own result in and then //each will send to process 1 which will add them up unsigned char* full_matrix; unsigned char* full_matrix_buffer; if (animation == 1) { int msize1 = rsize*ncols*csize*nrows; full_matrix = (unsigned char*)malloc(msize1*sizeof(unsigned char)); full_matrix_buffer = (unsigned char*)malloc(msize1*sizeof(unsigned char)); for (i=0; i<msize1; i++) { full_matrix[i] = 0; full_matrix_buffer[i] = 0; } } // printf("Rsize,Lsize,Fsize=%i %i %i,Csize,Lsize,Fsize=%i %i %i\n",rsize,local_width,field_width,csize,local_height,field_height); //Serial initialize vars int count = 0; if (world_size == 1 && run_type == 0) { for (i=0;i<csize;i++) { for (j=0;j<rsize;j++) { section[i*rsize + j] = 255; if (field_a[(i+1)*(2+rsize) + j + 1]) { section[i*rsize + j] = 0; count += 1; } else { section[i*rsize + j] = 255; } top[j] = 255; bot[j] = 255; ttop[j] = 255; tbot[j] = 255; } right[i] = 255; left[i] = 255; tright[i] = 255; tleft[i] = 255; } // printf("COUNT 4 = %d\n", count); } //Blocked/Checkered initializing variables else if (world_size > 1 && (run_type == 1 || run_type == 2)) { //initialize for (i=0;i<csize;i++) { for (j=0;j<rsize;j++) { section[i*rsize + j] = 255; if (field_a[(i+1)*(2+rsize) + j + 1]) { section[i*rsize + j] = 0; count += 1; } else { section[i*rsize + j] = 255; } top[j] = 255; bot[j] = 255; ttop[j] = 255; tbot[j] = 255; } right[i] = 255; left[i] = 255; tright[i] = 255; tleft[i] = 255; } // MPI_Allreduce( &count, &total, 1, MPI_UNSIGNED_CHAR, MPI_SUM, MPI_COMM_WORLD ); // if (rank == 0) // { // printf("COUNT 4 = %d\n", total); // } } //header/footer for mpio writes char header1[15]; header1[0] = 0x50; header1[1] = 0x35; header1[2] = 0x0a; header1[3] = 0x35; header1[4] = 0x31; header1[5] = 0x32; header1[6] = 0x20; header1[7] = 0x35; header1[8] = 0x31; header1[9] = 0x32; header1[10] = 0x0a; header1[11] = 0x32; header1[12] = 0x35; header1[13] = 0x35; header1[14] = 0x0a; char footer; footer = 0x0a; //make a frame or not? int create_frame = 0; //send to int send_to; int receive_from; int info[5]; info[2] = rank; info[3] = rsize; info[4] = csize; unsigned char info2[4]; info2[0] = topleft; info2[1] = topright; info2[2] = botleft; info2[3] = botright; int current_count; int location; //Gameplay for (k=0;k<iterations;k++) { //Count buggies if (k%count_when==0) { if (verbose == 1) { current_count = rsize*csize-count_buggies(rsize,csize,section); MPI_Allreduce( ¤t_count, &total, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD ); if (rank == 0) { printf("Iteration=%5d, Count=%6d\n", k,total); } ////corner debug // printf("WR,tl,tr,bl,br = %d %d %d %d %d\n", rank, topleft, topright, botleft, botright); } } //Write to file serially for comparison //If animation is requested if (animation == 1 && run_type == 0) { //Put smaller matrix part into larger matrix for (i=0; i<csize; i++) { for (j=0; j<rsize; j++) { location = (my_row*csize*rsize*ncols + my_col*rsize + i*rsize*ncols + j); full_matrix_buffer[location] = section[i*rsize+j]; } // if (rank == 0) // { // printf("Location = %d\n", location); // } } //Gather matrix MPI_Reduce(full_matrix_buffer, full_matrix, rsize*ncols*csize*nrows, MPI_UNSIGNED_CHAR, MPI_SUM, 0, MPI_COMM_WORLD); if (rank == 0 && run_type == 0) { write_matrix_to_pgm(k, rsize*ncols, csize*nrows, full_matrix); } } //mpio write pgm else if (animation == 1 && (run_type == 1 || run_type == 2)) { //default is no frame create_frame = 0; for (ii=0;ii<20;ii++) { for (jj=0;jj<animation_list[ii][1]+1;jj++) { // if (rank == 0) // { // printf("a,ii,j,k= %i,%i,%i,%i, Frame? = %i\n", // animation_list[ii][0],ii,jj,k,(animation_list[ii][0]+jj-k)==0); // } if ((animation_list[ii][0] + jj - k) == 0) { create_frame = 1; break; } } } if (create_frame == 1) { //dynamic filename with leading zeroes for easy conversion to gif char buffer[128]; snprintf(buffer, sizeof(char)*128, "Animation/frame%04d.pgm", k); /* open the file, and set the view */ MPI_File file; MPI_File_open(MPI_COMM_WORLD, buffer, MPI_MODE_CREATE|MPI_MODE_WRONLY, MPI_INFO_NULL, &file); MPI_File_set_view(file, 0, MPI_UNSIGNED_CHAR, MPI_UNSIGNED_CHAR, "native", MPI_INFO_NULL); //write header MPI_File_write(file, &header1, 15, MPI_CHAR, MPI_STATUS_IGNORE); //write matrix MPI_File_set_view(file, 15, MPI_UNSIGNED_CHAR, submatrix, "native", MPI_INFO_NULL); MPI_File_write_all(file, section, rsize*csize, MPI_UNSIGNED_CHAR, MPI_STATUS_IGNORE); //write footer (trailing newline) MPI_File_set_view(file, 15+rsize*ncols*csize*nrows, MPI_UNSIGNED_CHAR, MPI_UNSIGNED_CHAR, "native", MPI_INFO_NULL); MPI_File_write(file, &footer, 1, MPI_CHAR, MPI_STATUS_IGNORE); } } // BLOCKED COMMUNITATION // if (run_type == 1) { //change bot (send top) to account for middle area //alternate to avoid locking send_to = rank - 1; receive_from = rank + 1; //figure out what to send //top and bottom for (i=0;i<rsize;i++) { ttop[i] = section[i]; tbot[i] = section[rsize*(csize-1)+i]; } //left n right for (i=0;i<csize;i++) { tleft[i] = section[0 + rsize*i]; tright[i] = section[rsize-1 + rsize*i]; } //send top, receive bot if (rank%2==0) { if (send_to<world_size && send_to>=0) { MPI_Send(ttop, 1, row, send_to, 0, MPI_COMM_WORLD); } if (receive_from<world_size && receive_from >= 0) { MPI_Recv(bot, 1, row, receive_from, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } } else if (rank%2==1) { if (receive_from<world_size && receive_from >= 0) { MPI_Recv(bot, 1, row, receive_from, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } if (send_to<world_size && send_to>=0) { MPI_Send(ttop, 1, row, send_to, 0, MPI_COMM_WORLD); } } //change top to account for middle area //alternate to avoid locking send_to = rank + 1; receive_from = rank - 1; //send bot, receive top if (rank%2==0) { // printf("%d, %d, %d\n", rank, send_to, receive_from); if (send_to<world_size && send_to>=0) { MPI_Send(tbot, 1, row, send_to, 0, MPI_COMM_WORLD); } if (receive_from<world_size && receive_from >= 0) { MPI_Recv(top, 1, row, receive_from, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } } else if (rank%2==1) { // printf("%d, %d, %d\n", rank, send_to, receive_from); if (receive_from<world_size && receive_from >= 0) { //*data,count,type,from,tag,comm,mpi_status MPI_Recv(top, 1, row, receive_from, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } if (send_to<world_size && send_to>=0) { //*data,count,type,to,tag,comm MPI_Send(tbot, 1, row, send_to, 0, MPI_COMM_WORLD); } } } // CHECKERED COMMUNITATION // else if (run_type == 2) { //figure out what to send //top and bottom for (i=0;i<rsize;i++) { ttop[i] = section[i]; tbot[i] = section[rsize*(csize-1)+i]; } //left n right for (i=0;i<csize;i++) { tleft[i] = section[0 + rsize*i]; tright[i] = section[rsize-1 + rsize*i]; } //corners ttopleft = tleft[0]; tbotleft = tleft[csize-1]; ttopright = tright[0]; tbotright = tright[csize-1]; //Send top, receive bot send_to = rank - nrows; receive_from = rank + nrows; if (rank%2==0) { if (send_to<world_size && send_to>=0) { MPI_Send(ttop, 1, row, send_to, 0, MPI_COMM_WORLD); } if (receive_from<world_size && receive_from>=0) { MPI_Recv(bot, 1, row, receive_from, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } } else if (rank%2==1) { if (receive_from<world_size && receive_from>=0) { MPI_Recv(bot, 1, row, receive_from, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } if (send_to<world_size && send_to>=0) { MPI_Send(ttop, 1, row, send_to, 0, MPI_COMM_WORLD); } } //Send bot, receive top send_to = rank + nrows; receive_from = rank - nrows; if (rank%2==0) { if (send_to<world_size && send_to>=0) { MPI_Send(tbot, 1, row, send_to, 0, MPI_COMM_WORLD); } if (receive_from<world_size && receive_from>=0) { MPI_Recv(top, 1, row, receive_from, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } } else if (rank%2==1) { if (receive_from<world_size && receive_from>=0) { MPI_Recv(top, 1, row, receive_from, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } if (send_to<world_size && send_to>=0) { MPI_Send(tbot, 1, row, send_to, 0, MPI_COMM_WORLD); } } //Send left, receive right send_to = rank - 1; receive_from = rank + 1; if (rank%2==0) { if (send_to<world_size && send_to>=0 && send_to/nrows==my_row) { MPI_Send(tleft, 1, column, send_to, 0, MPI_COMM_WORLD); } if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row) { MPI_Recv(right, 1, column, receive_from, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } } else if (rank%2==1) { if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row) { MPI_Recv(right, 1, column, receive_from, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } if (send_to<world_size && send_to>=0 && send_to/nrows==my_row) { MPI_Send(tleft, 1, column, send_to, 0, MPI_COMM_WORLD); } } //Send right, receive left send_to = rank + 1; receive_from = rank - 1; if (rank%2==0) { if (send_to<world_size && send_to>=0 && send_to/nrows==my_row) { MPI_Send(tright, 1, row, send_to, 0, MPI_COMM_WORLD); } if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row) { MPI_Recv(left, 1, row, receive_from, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } } else if (rank%2==1) { if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row) { MPI_Recv(left, 1, row, receive_from, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } if (send_to<world_size && send_to>=0 && send_to/nrows==my_row) { MPI_Send(tright, 1, row, send_to, 0, MPI_COMM_WORLD); } } //Send topright, receive botleft send_to = rank - ncols + 1; receive_from = rank + ncols - 1; if (rank%2==0) { if (send_to<world_size && send_to>=0 && send_to/nrows==my_row-1) { MPI_Send(&ttopright, 1, MPI_UNSIGNED_CHAR, send_to, 0, MPI_COMM_WORLD); } if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row+1) { MPI_Recv(&botleft, 1, MPI_UNSIGNED_CHAR, receive_from, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } } else if (rank%2==1) { if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row+1) { MPI_Recv(&botleft, 1, MPI_UNSIGNED_CHAR, receive_from, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } if (send_to<world_size && send_to>=0 && send_to/nrows==my_row-1) { MPI_Send(&ttopright, 1, MPI_UNSIGNED_CHAR, send_to, 0, MPI_COMM_WORLD); } } //Send topleft, receive botright send_to = rank - ncols - 1; receive_from = rank + ncols + 1; if (rank%2==0) { if (send_to<world_size && send_to>=0 && send_to/nrows==my_row-1) { MPI_Send(&ttopleft, 1, MPI_UNSIGNED_CHAR, send_to, 0, MPI_COMM_WORLD); } if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row+1) { MPI_Recv(&botright, 1, MPI_UNSIGNED_CHAR, receive_from, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } } else if (rank%2==1) { if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row+1) { MPI_Recv(&botright, 1, MPI_UNSIGNED_CHAR, receive_from, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } if (send_to<world_size && send_to>=0 && send_to/nrows==my_row-1) { MPI_Send(&ttopleft, 1, MPI_UNSIGNED_CHAR, send_to, 0, MPI_COMM_WORLD); } } //Send botleft, receive topright send_to = rank + ncols - 1; receive_from = rank - ncols + 1; if (rank%2==0) { if (send_to<world_size && send_to>=0 && send_to/nrows==my_row+1) { MPI_Send(&tbotleft, 1, MPI_UNSIGNED_CHAR, send_to, 0, MPI_COMM_WORLD); } if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row-1) { MPI_Recv(&topright, 1, MPI_UNSIGNED_CHAR, receive_from, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } } else if (rank%2==1) { if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row-1) { MPI_Recv(&topright, 1, MPI_UNSIGNED_CHAR, receive_from, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } if (send_to<world_size && send_to>=0 && send_to/nrows==my_row+1) { MPI_Send(&tbotleft, 1, MPI_UNSIGNED_CHAR, send_to, 0, MPI_COMM_WORLD); } } //Send botright, receive topleft send_to = rank + ncols + 1; receive_from = rank - ncols - 1; if (rank%2==0) { if (send_to<world_size && send_to>=0 && send_to/nrows==my_row+1) { MPI_Send(&tbotright, 1, MPI_UNSIGNED_CHAR, send_to, 0, MPI_COMM_WORLD); } if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row-1) { MPI_Recv(&topleft, 1, MPI_UNSIGNED_CHAR, receive_from, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } } else if (rank%2==1) { if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row-1) { MPI_Recv(&topleft, 1, MPI_UNSIGNED_CHAR, receive_from, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } if (send_to<world_size && send_to>=0 && send_to/nrows==my_row+1) { MPI_Send(&tbotright, 1, MPI_UNSIGNED_CHAR, send_to, 0, MPI_COMM_WORLD); } } info2[0] = topleft; info2[1] = topright; info2[2] = botleft; info2[3] = botright; } // if (rank == 1){ // print_matrix(rsize, 1, top); // print_matrix(rsize, csize, section); // print_matrix(rsize, 1, bot); // printf("\n"); // } // printf("wr=%d,iteration=%d,maxval=%d, 11\n", rank, k,(csize-1)*rsize-1+rsize); /////////// CELL UPDATES ///////////////// //count neighbor for (i=0;i<csize;i++) { for (j=0; j<rsize; j++) { info[0] = i; info[1] = j; neighbors[i*rsize+j] = count_neighbors(info, info2, section, top, bot, left, right); // printf("%i",neighbors[i*rsize+j]); } // printf("\n"); } //update cells current_count = 0; for (i=0;i<csize;i++) { for (j=0; j<rsize; j++) { //cell currently alive if (section[i*rsize+j] == 0) { //2 or 3 neighbors lives, else die if (neighbors[i*rsize+j] < 2 || neighbors[i*rsize+j] > 3) { section[i*rsize+j] = 255; } } else { //Exactly 3 neighbors spawns new life if (neighbors[i*rsize+j] == 3) { section[i*rsize+j] = 0; } } } } } MPI_Barrier(MPI_COMM_WORLD); sleep(0.5); //free malloc stuff if( field_a != NULL ) free( field_a ); if( field_b != NULL ) free( field_b ); free(section); free(neighbors); free(top); free(bot); free(left); free(right); MPI_Finalize(); exit (0); }
int main(int argc, char **argv) { MPI_Datatype newtype; int i, ndims, array_of_gsizes[3], array_of_distribs[3]; int order, nprocs, j, len; int array_of_dargs[3], array_of_psizes[3]; int *readbuf, *writebuf, mynod, *tmpbuf, array_size; MPI_Count bufcount; char *filename; int errs = 0, toterrs; MPI_File fh; MPI_Status status; MPI_Request request; MPI_Info info = MPI_INFO_NULL; int errcode; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &mynod); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); /* process 0 broadcasts the file name to other processes */ if (!mynod) { filename = "testfile"; len = strlen(filename); MPI_Bcast(&len, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(filename, len + 1, MPI_CHAR, 0, MPI_COMM_WORLD); } else { MPI_Bcast(&len, 1, MPI_INT, 0, MPI_COMM_WORLD); filename = (char *)malloc(len + 1); MPI_Bcast(filename, len + 1, MPI_CHAR, 0, MPI_COMM_WORLD); } /* create the distributed array filetype */ ndims = 3; order = MPI_ORDER_C; array_of_gsizes[0] = 32; array_of_gsizes[1] = 32; array_of_gsizes[2] = 32; array_of_distribs[0] = MPI_DISTRIBUTE_BLOCK; array_of_distribs[1] = MPI_DISTRIBUTE_BLOCK; array_of_distribs[2] = MPI_DISTRIBUTE_BLOCK; array_of_dargs[0] = MPI_DISTRIBUTE_DFLT_DARG; array_of_dargs[1] = MPI_DISTRIBUTE_DFLT_DARG; array_of_dargs[2] = MPI_DISTRIBUTE_DFLT_DARG; for (i = 0; i < ndims; i++) array_of_psizes[i] = 0; MPI_Dims_create(nprocs, ndims, array_of_psizes); MPI_Type_create_darray(nprocs, mynod, ndims, array_of_gsizes, array_of_distribs, array_of_dargs, array_of_psizes, order, MPI_INT, &newtype); MPI_Type_commit(&newtype); /* initialize writebuf */ MPI_Type_size_x(newtype, &bufcount); bufcount = bufcount / sizeof(int); writebuf = (int *)malloc(bufcount * sizeof(int)); for (i = 0; i < bufcount; i++) writebuf[i] = 1; array_size = array_of_gsizes[0] * array_of_gsizes[1] * array_of_gsizes[2]; tmpbuf = (int *) calloc(array_size, sizeof(int)); MPI_Irecv(tmpbuf, 1, newtype, mynod, 10, MPI_COMM_WORLD, &request); MPI_Send(writebuf, bufcount, MPI_INT, mynod, 10, MPI_COMM_WORLD); MPI_Wait(&request, &status); j = 0; for (i = 0; i < array_size; i++) if (tmpbuf[i]) { writebuf[j] = i; j++; } free(tmpbuf); if (j != bufcount) { fprintf(stderr, "Error in initializing writebuf on process %d\n", mynod); MPI_Abort(MPI_COMM_WORLD, 1); } /* end of initialization */ /* write the array to the file */ errcode = MPI_File_open(MPI_COMM_WORLD, filename, MPI_MODE_CREATE | MPI_MODE_RDWR, info, &fh); if (errcode != MPI_SUCCESS) handle_error(errcode, "MPI_File_open"); errcode = MPI_File_set_view(fh, 0, MPI_INT, newtype, "native", info); if (errcode != MPI_SUCCESS) handle_error(errcode, "MPI_File_set_view"); errcode = MPI_File_iwrite_all(fh, writebuf, bufcount, MPI_INT, &request); if (errcode != MPI_SUCCESS) handle_error(errcode, "MPI_File_iwrite_all"); MPI_Wait(&request, &status); errcode = MPI_File_close(&fh); if (errcode != MPI_SUCCESS) handle_error(errcode, "MPI_File_close"); if (!mynod) { /* wkl suggests potential for false " No Errors" if both read * and write use the same file view */ /* solution: rank 0 reads entire file and checks write values */ errcode = MPI_File_open(MPI_COMM_SELF, filename, MPI_MODE_RDONLY, info, &fh); if (errcode != MPI_SUCCESS) handle_error(errcode, "MPI_File_open"); readbuf = (int *)malloc(array_size * sizeof(int)); errcode = MPI_File_read(fh, readbuf, array_size, MPI_INT, &status); if (errcode != MPI_SUCCESS) handle_error(errcode, "MPI_File_read"); errcode = MPI_File_close(&fh); if (errcode != MPI_SUCCESS) handle_error(errcode, "MPI_File_close"); for (i = 0; i < array_size; i++) if (readbuf[i] != i) { errs++; fprintf(stderr, "Error: write integer %d but read %d\n", i, readbuf[i]); break; } free(readbuf); } MPI_Barrier(MPI_COMM_WORLD); /* now read it back */ readbuf = (int *)malloc(bufcount * sizeof(int)); errcode = MPI_File_open(MPI_COMM_WORLD, filename, MPI_MODE_CREATE | MPI_MODE_RDWR, info, &fh); if (errcode != MPI_SUCCESS) handle_error(errcode, "MPI_File_open"); errcode = MPI_File_set_view(fh, 0, MPI_INT, newtype, "native", info); if (errcode != MPI_SUCCESS) handle_error(errcode, "MPI_File_set_view"); errcode = MPI_File_iread_all(fh, readbuf, bufcount, MPI_INT, &request); if (errcode != MPI_SUCCESS) handle_error(errcode, "MPI_File_iread_all"); MPI_Wait(&request, &status); errcode = MPI_File_close(&fh); if (errcode != MPI_SUCCESS) handle_error(errcode, "MPI_File_close"); /* check the data read */ for (i = 0; i < bufcount; i++) { if (readbuf[i] != writebuf[i]) { errs++; fprintf(stderr, "Process %d, readbuf %d, writebuf %d, i %d\n", mynod, readbuf[i], writebuf[i], i); } } MPI_Allreduce(&errs, &toterrs, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); if (mynod == 0) { if (toterrs > 0) { fprintf(stderr, "Found %d errors\n", toterrs); } else { fprintf(stdout, " No Errors\n"); } } MPI_Type_free(&newtype); free(readbuf); free(writebuf); if (mynod) free(filename); MPI_Finalize(); return 0; }
/* darray_2d_test1() * * Performs a sequence of tests building darrays with single-element * blocks, running through all the various positions that the element might * come from. * * Returns the number of errors encountered. */ int darray_2d_c_test1(void) { MPI_Datatype darray; int array[9]; /* initialized below */ int array_size[2] = { 3, 3 }; int array_distrib[2] = { MPI_DISTRIBUTE_BLOCK, MPI_DISTRIBUTE_BLOCK }; int array_dargs[2] = { MPI_DISTRIBUTE_DFLT_DARG, MPI_DISTRIBUTE_DFLT_DARG }; int array_psizes[2] = { 3, 3 }; int i, rank, err, errs = 0, sizeoftype; /* pretend we are each rank, one at a time */ for (rank = 0; rank < 9; rank++) { /* set up buffer */ for (i = 0; i < 9; i++) { array[i] = i; } /* set up type */ err = MPI_Type_create_darray(9, /* size */ rank, 2, /* dims */ array_size, array_distrib, array_dargs, array_psizes, MPI_ORDER_C, MPI_INT, &darray); if (err != MPI_SUCCESS) { errs++; if (verbose) { fprintf(stderr, "error in MPI_Type_create_darray call; aborting after %d errors\n", errs); } MTestPrintError(err); return errs; } MPI_Type_commit(&darray); MPI_Type_size(darray, &sizeoftype); if (sizeoftype != sizeof(int)) { errs++; if (verbose) fprintf(stderr, "size of type = %d; should be %d\n", sizeoftype, (int) sizeof(int)); return errs; } err = pack_and_unpack((char *) array, 1, darray, 9 * sizeof(int)); for (i = 0; i < 9; i++) { if ((i == rank) && (array[i] != rank)) { errs++; if (verbose) fprintf(stderr, "[2d array rank=%d]:array[%d] = %d; should be %d\n", rank, i, array[i], rank); } else if ((i != rank) && (array[i] != 0)) { errs++; if (verbose) fprintf(stderr, "[2d array rank=%d]:array[%d] = %d; should be %d\n", rank, i, array[i], 0); } } MPI_Type_free(&darray); } return errs; }
int main(int argc, char **argv) { MPI_Datatype newtype; int i, ndims, array_of_gsizes[3], array_of_distribs[3]; int order, nprocs, len, flag, err; int array_of_dargs[3], array_of_psizes[3]; int *readbuf, *writebuf, bufcount, mynod; char filename[1024]; MPI_File fh; MPI_Status status; MPI_Aint size_with_aint; MPI_Offset size_with_offset; MPI_Init(&argc,&argv); MPI_Comm_rank(MPI_COMM_WORLD, &mynod); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); /* process 0 takes the file name as a command-line argument and broadcasts it to other processes */ if (!mynod) { i = 1; while ((i < argc) && strcmp("-fname", *argv)) { i++; argv++; } if (i >= argc) { printf("\n*# Usage: large_array -fname filename\n\n"); MPI_Abort(MPI_COMM_WORLD, 1); } argv++; len = strlen(*argv); strcpy(filename, *argv); MPI_Bcast(&len, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(filename, len+1, MPI_CHAR, 0, MPI_COMM_WORLD); printf("This program creates a 4 Gbyte file. Don't run it if you don't have that much disk space!\n"); } else { MPI_Bcast(&len, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(filename, len+1, MPI_CHAR, 0, MPI_COMM_WORLD); } /* create the distributed array filetype */ ndims = 3; order = MPI_ORDER_C; array_of_gsizes[0] = 1024; array_of_gsizes[1] = 1024; array_of_gsizes[2] = 4*1024/sizeof(int); array_of_distribs[0] = MPI_DISTRIBUTE_BLOCK; array_of_distribs[1] = MPI_DISTRIBUTE_BLOCK; array_of_distribs[2] = MPI_DISTRIBUTE_BLOCK; array_of_dargs[0] = MPI_DISTRIBUTE_DFLT_DARG; array_of_dargs[1] = MPI_DISTRIBUTE_DFLT_DARG; array_of_dargs[2] = MPI_DISTRIBUTE_DFLT_DARG; for (i=0; i<ndims; i++) array_of_psizes[i] = 0; MPI_Dims_create(nprocs, ndims, array_of_psizes); /* check if MPI_Aint is large enough for size of global array. if not, complain. */ size_with_aint = sizeof(int); for (i=0; i<ndims; i++) size_with_aint *= array_of_gsizes[i]; size_with_offset = sizeof(int); for (i=0; i<ndims; i++) size_with_offset *= array_of_gsizes[i]; if (size_with_aint != size_with_offset) { printf("Can't use an array of this size unless the MPI implementation defines a 64-bit MPI_Aint\n"); MPI_Abort(MPI_COMM_WORLD, 1); } MPI_Type_create_darray(nprocs, mynod, ndims, array_of_gsizes, array_of_distribs, array_of_dargs, array_of_psizes, order, MPI_INT, &newtype); MPI_Type_commit(&newtype); /* initialize writebuf */ MPI_Type_size(newtype, &bufcount); bufcount = bufcount/sizeof(int); writebuf = (int *) malloc(bufcount * sizeof(int)); if (!writebuf) printf("Process %d, not enough memory for writebuf\n", mynod); for (i=0; i<bufcount; i++) writebuf[i] = mynod*1024 + i; /* write the array to the file */ MPI_File_open(MPI_COMM_WORLD, filename, MPI_MODE_CREATE | MPI_MODE_RDWR, MPI_INFO_NULL, &fh); MPI_File_set_view(fh, 0, MPI_INT, newtype, "native", MPI_INFO_NULL); MPI_File_write_all(fh, writebuf, bufcount, MPI_INT, &status); MPI_File_close(&fh); free(writebuf); /* now read it back */ readbuf = (int *) calloc(bufcount, sizeof(int)); if (!readbuf) printf("Process %d, not enough memory for readbuf\n", mynod); MPI_File_open(MPI_COMM_WORLD, filename, MPI_MODE_CREATE | MPI_MODE_RDWR, MPI_INFO_NULL, &fh); MPI_File_set_view(fh, 0, MPI_INT, newtype, "native", MPI_INFO_NULL); MPI_File_read_all(fh, readbuf, bufcount, MPI_INT, &status); MPI_File_close(&fh); /* check the data read */ flag = 0; for (i=0; i<bufcount; i++) if (readbuf[i] != mynod*1024 + i) { printf("Process %d, readbuf=%d, writebuf=%d\n", mynod, readbuf[i], mynod*1024 + i); flag = 1; } if (!flag) printf("Process %d: data read back is correct\n", mynod); MPI_Type_free(&newtype); free(readbuf); MPI_Barrier(MPI_COMM_WORLD); if (!mynod) { err = MPI_File_delete(filename, MPI_INFO_NULL); if (err == MPI_SUCCESS) printf("file deleted\n"); } MPI_Finalize(); return 0; }
/* Definitions of Fortran Wrapper routines */ EXPORT_MPI_API void FORTRAN_API mpi_type_create_darray_(MPI_Fint *size, MPI_Fint *rank, MPI_Fint *ndims, MPI_Fint *array_of_gsizes, MPI_Fint *array_of_distribs, MPI_Fint *array_of_dargs, MPI_Fint *array_of_psizes, MPI_Fint *order, MPI_Fint *oldtype, MPI_Fint *newtype, MPI_Fint *__ierr ) { int i; int *l_array_of_gsizes; int local_l_array_of_gsizes[MPIR_USE_LOCAL_ARRAY]; int *l_array_of_distribs; int local_l_array_of_distribs[MPIR_USE_LOCAL_ARRAY]; int *l_array_of_dargs; int local_l_array_of_dargs[MPIR_USE_LOCAL_ARRAY]; int *l_array_of_psizes; int local_l_array_of_psizes[MPIR_USE_LOCAL_ARRAY]; MPI_Datatype oldtype_c, newtype_c; oldtype_c = MPI_Type_f2c(*oldtype); if ((int)*ndims > 0) { if ((int)*ndims > MPIR_USE_LOCAL_ARRAY) { MPIR_FALLOC(l_array_of_gsizes,(int *) MALLOC( *ndims * sizeof(int) ), MPIR_COMM_WORLD, MPI_ERR_EXHAUSTED, "MPI_TYPE_CREATE_DARRAY" ); MPIR_FALLOC(l_array_of_distribs,(int *) MALLOC( *ndims * sizeof(int) ), MPIR_COMM_WORLD, MPI_ERR_EXHAUSTED, "MPI_TYPE_CREATE_DARRAY" ); MPIR_FALLOC(l_array_of_dargs,(int *) MALLOC( *ndims * sizeof(int) ), MPIR_COMM_WORLD, MPI_ERR_EXHAUSTED, "MPI_TYPE_CREATE_DARRAY" ); MPIR_FALLOC(l_array_of_psizes,(int *) MALLOC( *ndims * sizeof(int) ), MPIR_COMM_WORLD, MPI_ERR_EXHAUSTED, "MPI_TYPE_CREATE_DARRAY" ); } else { l_array_of_gsizes = local_l_array_of_gsizes; l_array_of_distribs = local_l_array_of_distribs; l_array_of_dargs = local_l_array_of_dargs; l_array_of_psizes = local_l_array_of_psizes; } for (i=0; i<(int)*ndims; i++) { l_array_of_gsizes[i] = (int)array_of_gsizes[i]; l_array_of_distribs[i] = (int)array_of_distribs[i]; l_array_of_dargs[i] = (int)array_of_dargs[i]; l_array_of_psizes[i] = (int)array_of_psizes[i]; } } *__ierr = MPI_Type_create_darray((int)*size, (int)*rank, (int)*ndims, l_array_of_gsizes, l_array_of_distribs, l_array_of_dargs, l_array_of_psizes, (int)*order, oldtype_c, &newtype_c); if ((int)*ndims > MPIR_USE_LOCAL_ARRAY) { FREE( l_array_of_gsizes ); FREE( l_array_of_distribs ); FREE( l_array_of_dargs ); FREE( l_array_of_psizes ); } *newtype = MPI_Type_c2f(newtype_c); }
//when called with stage = 0, initializes rdump_buffer //when called with stage = 1, initializes dump_buffer and gdump_buffer void initialize_parallel_write(int stage) { #if MPI && DO_PARALLEL_WRITE size_t nvars_dump, nvars_gdump, nvars_gdump2, nvars_rdump, nvars_fdump; size_t max_buffer_size_bytes, dump_buffer_size_bytes, gdump_buffer_size_bytes, gdump2_buffer_size_bytes, rdump_buffer_size_bytes, fdump_buffer_size_bytes; int array_of_distribs[NDIM], array_of_dargs[NDIM]; int dim; int is_dry_run = 1; //figure out the amount of memory needed to hold each dump type if (stage) { //check if various dumps fit into the above-allocated buffer nvars_dump = dump(0, is_dry_run); nvars_gdump = gdump(is_dry_run); nvars_gdump2 = gdump2(is_dry_run); nvars_fdump = NIMG; dump_buffer_size = nvars_dump*N1*N2*N3; gdump_buffer_size = nvars_gdump*N1*N2*N3; gdump2_buffer_size = nvars_gdump2*N1*N2*N3; fdump_buffer_size = nvars_fdump*N1*N2*N3; dump_buffer_size_bytes = dump_buffer_size*sizeof(dumptype); gdump_buffer_size_bytes = gdump_buffer_size*sizeof(gdumptype); gdump2_buffer_size_bytes = gdump2_buffer_size*sizeof(gdump2type); fdump_buffer_size_bytes = fdump_buffer_size*sizeof(fdumptype); if (i_am_the_master) { printf("dump size = %.2lg GB\n", dump_buffer_size_bytes*mpi_dims[1]*mpi_dims[2]*mpi_dims[3]/(1024.*1024.*1024.)); printf("gdump size = %.2lg GB\n", gdump_buffer_size_bytes*mpi_dims[1]*mpi_dims[2]*mpi_dims[3]/(1024.*1024.*1024.)); printf("gdump2 size = %.2lg GB\n", gdump2_buffer_size_bytes*mpi_dims[1]*mpi_dims[2]*mpi_dims[3]/(1024.*1024.*1024.)); printf("fdump size = %.2lg GB\n", fdump_buffer_size_bytes*mpi_dims[1]*mpi_dims[2]*mpi_dims[3]/(1024.*1024.*1024.)); } max_buffer_size_bytes = MY_MAX(dump_buffer_size_bytes,gdump_buffer_size_bytes); max_buffer_size_bytes = MY_MAX(max_buffer_size_bytes,gdump2_buffer_size_bytes); max_buffer_size_bytes = MY_MAX(max_buffer_size_bytes,fdump_buffer_size_bytes); } else { nvars_rdump = NPR; rdump_buffer_size = nvars_rdump*N1*N2*N3; rdump_buffer_size_bytes = rdump_buffer_size*sizeof(rdumptype); max_buffer_size_bytes = rdump_buffer_size_bytes; } //if already allocated, free memory if(mpi_file_buffer) { free(mpi_file_buffer); mpi_file_buffer = NULL; } //and then allocate anew to make sure the largest of the dumps fits byte-wise mpi_file_buffer = (void*) malloc(max_buffer_size_bytes); if (!mpi_file_buffer) { fprintf(stderr,"Rank %d could not allocate %ld bytes for holding mpi_file_buffer", mpi_rank, max_buffer_size_bytes); MPI_Abort(MPI_COMM_WORLD, errno); } //all arrays now will share the same memory //this possible because different types of dumps are written out in sequence dump_buffer = (dumptype*)mpi_file_buffer; gdump_buffer = (gdumptype*)mpi_file_buffer; gdump2_buffer = (gdump2type*)mpi_file_buffer; rdump_buffer = (rdumptype*)mpi_file_buffer; fdump_buffer = (fdumptype*)failimage; //can write directly since already contiguous array (because no ghost cells) //create MPI file types for each of dump types //initialize MPI arrays for (dim=0; dim<NDIM; dim++) { array_of_distribs[dim] = MPI_DISTRIBUTE_BLOCK; array_of_dargs[dim] = MPI_DISTRIBUTE_DFLT_DARG; } if (stage) { //create new cell and file types for GDUMP MPI_Type_contiguous(nvars_gdump, MPI_GDUMP_TYPE, &gdump_cell_type); MPI_Type_commit(&gdump_cell_type); MPI_Type_create_darray(mpi_numtasks, mpi_rank, 3, mpi_ntot+1, array_of_distribs+1, array_of_dargs+1, mpi_dims+1, MPI_ORDER_C, gdump_cell_type, &gdump_file_type); MPI_Type_commit(&gdump_file_type); //create new cell and file types for GDUMP2 MPI_Type_contiguous(nvars_gdump2, MPI_GDUMP2_TYPE, &gdump2_cell_type); MPI_Type_commit(&gdump2_cell_type); MPI_Type_create_darray(mpi_numtasks, mpi_rank, 3, mpi_ntot+1, array_of_distribs+1, array_of_dargs+1, mpi_dims+1, MPI_ORDER_C, gdump2_cell_type, &gdump2_file_type); MPI_Type_commit(&gdump2_file_type); //create new cell and types for DUMP MPI_Type_contiguous(nvars_dump, MPI_DUMP_TYPE, &dump_cell_type); MPI_Type_commit(&dump_cell_type); MPI_Type_create_darray(mpi_numtasks, mpi_rank, 3, mpi_ntot+1, array_of_distribs+1, array_of_dargs+1, mpi_dims+1, MPI_ORDER_C, dump_cell_type, &dump_file_type); MPI_Type_commit(&dump_file_type); //create new cell and types for FDUMP MPI_Type_contiguous(nvars_fdump, MPI_FDUMP_TYPE, &fdump_cell_type); MPI_Type_commit(&fdump_cell_type); MPI_Type_create_darray(mpi_numtasks, mpi_rank, 3, mpi_ntot+1, array_of_distribs+1, array_of_dargs+1, mpi_dims+1, MPI_ORDER_C, fdump_cell_type, &fdump_file_type); MPI_Type_commit(&fdump_file_type); //if(i_am_the_master) fprintf(stderr, "dump_buffer_size = %ld bytes, nvars_dump = %ld\n", (long int)dump_buffer_size, (long int)nvars_dump); //if(i_am_the_master) fprintf(stderr, "gdump_buffer_size = %ld bytes, nvars_gdump = %ld\n", (long int)gdump_buffer_size, (long int)nvars_gdump); } else { //create new cell and types for RDUMP MPI_Type_contiguous(nvars_rdump, MPI_RDUMP_TYPE, &rdump_cell_type); MPI_Type_commit(&rdump_cell_type); MPI_Type_create_darray(mpi_numtasks, mpi_rank, 3, mpi_ntot+1, array_of_distribs+1, array_of_dargs+1, mpi_dims+1, MPI_ORDER_C, rdump_cell_type, &rdump_file_type); MPI_Type_commit(&rdump_file_type); //if(i_am_the_master) fprintf(stderr, "rdump_buffer_size = %ld bytes, nvars_rdump = %ld\n", (long int)rdump_buffer_size, (long int)nvars_rdump); } #endif }