예제 #1
0
void mpi_type_create_darray_(int *size,int *rank,int *ndims,
                         int *array_of_gsizes,int *array_of_distribs,
                           int *array_of_dargs,int *array_of_psizes,
                           int *order,MPI_Datatype *oldtype,
                           MPI_Datatype *newtype, int *__ierr )
{
    *__ierr = MPI_Type_create_darray(*size,*rank,*ndims,array_of_gsizes,array_of_distribs,array_of_dargs,array_of_psizes,*order,*oldtype,newtype);
}
예제 #2
0
int loadmatrix_cols(MPI_File *fh, float *rbuf, int rank, int numtasks, int m, int n)
{
	float data[1024];
	MPI_Datatype darray;
	MPI_Status status;
	int gsizes[2] = {m, n};
	int distribs[2] = {MPI_DISTRIBUTE_BLOCK, MPI_DISTRIBUTE_BLOCK};
	int dargs[2] = {MPI_DISTRIBUTE_DFLT_DARG, MPI_DISTRIBUTE_DFLT_DARG};
	int psizes[2] = {1, 4};
	int i, j;
	int count = 0;
	int cols = 0;
	
	cols = n/numtasks;
	if(rank < n%numtasks)
	{
		cols++;
	}
	
	MPI_Type_create_darray(numtasks, rank, 2, gsizes, distribs, dargs, psizes, MPI_ORDER_C, MPI_FLOAT, &darray);
	MPI_Type_commit(&darray);
	
	MPI_File_set_view(*fh, 0, MPI_FLOAT, darray, "native", MPI_INFO_NULL);
	
	MPI_File_read_all(*fh, data, m*cols, MPI_FLOAT, &status);
	
	MPI_Get_count(&status, MPI_FLOAT, &count);
	//cols = count/m;
	
	for(i = 0; i < cols; i++)
	{
		for(j = 0; j < m; j++)
		{
			rbuf[i*m+j] = data[j*cols+i];
		}
	}
	
	/*
	if(rank == 1)
	{
		for(i = 0; i < cols; i++)
		{
			printf("Proc %d row %d: ", rank, i);
			for(j = 0; j < m; j++)
			{
				printf("%f, ", rbuf[j*cols+i]);
			}
			printf("\n");
		}
	}
	*/
	
	MPI_Type_free(&darray);

	return cols;
}
예제 #3
0
FORTRAN_API void FORT_CALL void mpi_type_create_darray_(MPI_Fint * size, MPI_Fint * rank,
                                                        MPI_Fint * ndims,
                                                        MPI_Fint * array_of_gsizes,
                                                        MPI_Fint * array_of_distribs,
                                                        MPI_Fint * array_of_dargs,
                                                        MPI_Fint * array_of_psizes,
                                                        MPI_Fint * order, MPI_Fint * oldtype,
                                                        MPI_Fint * newtype, MPI_Fint * ierr)
{
    *ierr =
        MPI_Type_create_darray(*size, *rank, *ndims, array_of_gsizes, array_of_distribs,
                               array_of_dargs, array_of_psizes, *order, *oldtype, newtype);
}
예제 #4
0
void mpi_type_create_darray_(int *size,int *rank,int *ndims,
                           int *array_of_gsizes,int *array_of_distribs,
                           int *array_of_dargs,int *array_of_psizes,
                           int *order, MPI_Fint *oldtype,
                           MPI_Fint *newtype, int *__ierr )
{
    MPI_Datatype oldtype_c, newtype_c;

    oldtype_c = MPI_Type_f2c(*oldtype);

    *__ierr = MPI_Type_create_darray(*size,*rank,*ndims,array_of_gsizes,array_of_distribs,array_of_dargs,array_of_psizes,*order,oldtype_c,&newtype_c);

    *newtype = MPI_Type_c2f(newtype_c);
}
예제 #5
0
int savematrix_rows(MPI_File *fh, float *data, int numrows, int rank, int numtasks, int m, int n)
{
	MPI_Datatype darray;
	MPI_Status status;
	int gsizes[2] = {m, n};
	int distribs[2] = {MPI_DISTRIBUTE_BLOCK, MPI_DISTRIBUTE_BLOCK};
	int dargs[2] = {MPI_DISTRIBUTE_DFLT_DARG, MPI_DISTRIBUTE_DFLT_DARG};
	int psizes[2] = {4, 1};
	
	MPI_Type_create_darray(numtasks, rank, 2, gsizes, distribs, dargs, psizes, MPI_ORDER_C, MPI_FLOAT, &darray);
	MPI_Type_commit(&darray);
	
	MPI_File_set_view(*fh, 0, MPI_FLOAT, darray, "native", MPI_INFO_NULL);
	
	MPI_File_write_all(*fh, data, numrows*n, MPI_FLOAT, &status);
	
	MPI_Type_free(&darray);
	
	return 0;
}
void ompi_type_create_darray_f(MPI_Fint *size, MPI_Fint *rank,
			      MPI_Fint *ndims, MPI_Fint *gsize_array, 
			      MPI_Fint *distrib_array, MPI_Fint *darg_array,
			      MPI_Fint *psize_array, MPI_Fint *order, 
			      MPI_Fint *oldtype, MPI_Fint *newtype,
			      MPI_Fint *ierr)
{
    int c_ierr;
    MPI_Datatype c_old = MPI_Type_f2c(*oldtype);
    MPI_Datatype c_new;
    OMPI_ARRAY_NAME_DECL(gsize_array);
    OMPI_ARRAY_NAME_DECL(distrib_array);
    OMPI_ARRAY_NAME_DECL(darg_array);
    OMPI_ARRAY_NAME_DECL(psize_array);

    OMPI_ARRAY_FINT_2_INT(gsize_array, *ndims);
    OMPI_ARRAY_FINT_2_INT(distrib_array, *ndims);
    OMPI_ARRAY_FINT_2_INT(darg_array, *ndims);
    OMPI_ARRAY_FINT_2_INT(psize_array, *ndims);

    c_ierr = MPI_Type_create_darray(OMPI_FINT_2_INT(*size),
                                    OMPI_FINT_2_INT(*rank),
                                    OMPI_FINT_2_INT(*ndims),
                                    OMPI_ARRAY_NAME_CONVERT(gsize_array), 
                                    OMPI_ARRAY_NAME_CONVERT(distrib_array),
                                    OMPI_ARRAY_NAME_CONVERT(darg_array),
                                    OMPI_ARRAY_NAME_CONVERT(psize_array),
                                    OMPI_FINT_2_INT(*order), c_old, &c_new);
    if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr);

    OMPI_ARRAY_FINT_2_INT_CLEANUP(gsize_array);
    OMPI_ARRAY_FINT_2_INT_CLEANUP(distrib_array);
    OMPI_ARRAY_FINT_2_INT_CLEANUP(darg_array);
    OMPI_ARRAY_FINT_2_INT_CLEANUP(psize_array);

    if (MPI_SUCCESS == c_ierr) {
      *newtype = MPI_Type_c2f(c_new);
    }
}
예제 #7
0
int main(int argc, char *argv[]) 
{ 
  int i; 

  int rank, size;

  MPI_Datatype darray;

  int distrib[1] = { MPI_DISTRIBUTE_CYCLIC };
  int bsize[1] = { 1 };
  int gsize[1] = { 10 };
  int psize[1] = { 2 };

  int tsize;
  MPI_Aint lb, extent;
 
  MPI_Init(&argc, &argv); 

  MPI_Comm_size(MPI_COMM_WORLD, &size);
  MPI_Comm_rank(MPI_COMM_WORLD, &rank);

  MPI_Type_create_darray(size, rank, 1, gsize, distrib,
			 bsize, psize, MPI_ORDER_C, MPI_DOUBLE, &darray);

  MPI_Type_size(darray, &tsize);
  MPI_Type_get_extent(darray, &lb, &extent);

  for(i = 0; i < size; i++) {
    MPI_Barrier(MPI_COMM_WORLD);
    if(rank == i) {
      printf("Rank %i, size=%i, extent=%i, lb=%i\n", rank, tsize, (int)extent, (int)lb);
    }
  }

  MPI_Finalize();

  return 0;

} 
예제 #8
0
int
main(int argc, char* argv[])
{
  int  i, rank, npes, bug=0;
  int buf[ng];
  MPI_File     thefile;
  MPI_Status   status;
  MPI_Datatype filetype;
  MPI_Comm     new_comm;
  MPI_Offset   offset=0;
  MPI_Info     info=MPI_INFO_NULL;
  int gsize[D],distrib[D],dargs[D],psize[D];
  int dims[D],periods[D],reorder;
  double t1,t2,mbs;
  double to1,to2,tc1,tc2;
  double et,eto,etc;
  double max_mbs,min_mbs,avg_mbs;
  double max_et,min_et,avg_et;
  double max_eto,min_eto,avg_eto;
  double max_etc,min_etc,avg_etc;
  char process_name[MPI_MAX_PROCESSOR_NAME + 1];
  char rr_blank[] = {"       "};
  char rr_empty[] = {"???????"};
  int  count;

  MPI_Init(&argc, &argv);
  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  MPI_Comm_size(MPI_COMM_WORLD, &npes);
  if ( rank == 0 )
    {
     if ( argc < 2 )
       {
        printf(" ERROR: no filename given\n");
        bug++;
       }
     if ( npes == np )
       {
        printf(" file name: %s\n",argv[1]);
        printf(" total number of PE's: %3d\n",np);
        printf(" number of PE's in x direction: %4d\n",npx);
        printf(" number of PE's in y direction: %4d\n",npy);
        printf(" number of PE's in z direction: %4d\n",npz);
        printf(" global grid size: %dx%dx%d 4 byte integers (total %lld)\n",X,Y,Z,(unsigned long)X*Y*Z);
        printf("  local grid size: %dx%dx%d 4 byte integers (total %d)\n",nx,ny,nz,ng);
       }
     else
       {
        printf(" ERROR: total number of PE's must be %d\n",np);
        printf("        actual number of PE's was %d\n",npes);
        bug++;
       }
     if ( bug )
       {
        MPI_Abort(MPI_COMM_WORLD,-1);
       }
    }
 if ( MPI_Get_processor_name(process_name, &count) != MPI_SUCCESS)
   {
    sprintf(process_name, rr_empty);
   }
 else
   {
    if (count < MAX_RR_NAME) strncat(&process_name[count],rr_blank,MAX_RR_NAME-count);
    process_name[MAX_RR_NAME] = '\0';
   }

  MPI_Barrier(MPI_COMM_WORLD);

  MPI_Info_create(&info);

/* allow multiple writers to write to the file concurrently */

/*MPI_Info_set(info,"panfs_concurrent_write","1");*/

/* use data aggregation */

/*MPI_Info_set(info,"romio_cb_write","enable"); */
/*MPI_Info_set(info,"romio_cb_write","disable");*/
/*MPI_Info_set(info,"romio_cb_read","enable"); */
/*MPI_Info_set(info,"romio_cb_read","disable");*/

/* use one aggregator/writer per node */

/*MPI_Info_set(info,"cb_config_list","*:1");*/

/* aggregators/writers per allocation: use this or the above (both work) */

/*i = ((npes-1)/8) + 1;
  sprintf(awpa,"%d",i);
  MPI_Info_set (info,"cb_nodes",awpa);*/

    
  for ( i=0; i<ng; i++ ) buf[i] = rank*10000 + (i+1)%1024;

  for ( i=0; i<D; i++ )
    {
     periods[i] = 1;  /* true */
    }

  reorder = 1;        /* true */

  dims[0] = npx;
  dims[1] = npy;
  dims[2] = npz;
     
  MPI_Cart_create(MPI_COMM_WORLD, D, dims, periods, reorder, &new_comm);

  for ( i=0; i<D; i++ )
    {
     distrib[i] = MPI_DISTRIBUTE_BLOCK;
     dargs[i]   = MPI_DISTRIBUTE_DFLT_DARG;
/*   psize[i]   = 0; */
    }

  gsize[0] = X;
  gsize[1] = Y;
  gsize[2] = Z;

  psize[0] = npx;
  psize[1] = npy;
  psize[2] = npz;

/*
  MPI_Dims_create(npes, D, psize);  

  printf("psize %d %d %d\n",psize[0],psize[1],psize[2]);
*/

  MPI_Type_create_darray(npes, rank, D, gsize, distrib, dargs, psize, MPI_ORDER_FORTRAN, MPI_INT, &filetype);
/*MPI_Type_create_darray(npes, rank, D, gsize, distrib, dargs, psize, MPI_ORDER_C, MPI_INT, &filetype);              don't do this */

  MPI_Type_commit(&filetype);

  to1 = MPI_Wtime();
  MPI_File_open(new_comm, argv[1], MPI_MODE_WRONLY | MPI_MODE_CREATE, info, &thefile);
  to2 = MPI_Wtime();

  MPI_File_set_size(thefile, offset);

  MPI_File_set_view(thefile, offset, MPI_INT, filetype, "native", MPI_INFO_NULL);

  t1 = MPI_Wtime();
  for ( i=0; i<LOOP; i++)
    {
     MPI_File_write_all(thefile, buf, ng, MPI_INT, &status);
    }
  t2 = MPI_Wtime();

  tc1 = MPI_Wtime();
  MPI_File_close(&thefile);
  tc2 = MPI_Wtime();

  et  = (t2  - t1)/LOOP;
  eto = (to2 - to1)/LOOP;
  etc = (tc2 - tc1)/LOOP;

  mbs = (((double)(LOOP*X*Y*Z)*sizeof(int)))/(1000000.0*(t2-t1));

/*printf(" %s[%3d]    ET  %8.2f  %8.2f  %8.2f         %8.1f mbs\n", process_name, rank, t1, t2, t2-t1, mbs);*/

  MPI_Barrier(MPI_COMM_WORLD);

  MPI_Reduce(&mbs, &avg_mbs, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
  MPI_Reduce(&mbs, &min_mbs, 1, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD);
  MPI_Reduce(&mbs, &max_mbs, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);

  MPI_Reduce(&et, &avg_et, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
  MPI_Reduce(&et, &min_et, 1, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD);
  MPI_Reduce(&et, &max_et, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);

  MPI_Reduce(&eto, &avg_eto, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
  MPI_Reduce(&eto, &min_eto, 1, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD);
  MPI_Reduce(&eto, &max_eto, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);

  MPI_Reduce(&etc, &avg_etc, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
  MPI_Reduce(&etc, &min_etc, 1, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD);
  MPI_Reduce(&etc, &max_etc, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);

  fflush(stdout);

  if ( rank == 0 )
    {
     mbs = avg_mbs/npes;
     printf("\n     average write rate: %9.1f mbs\n", mbs);
     printf("     minimum write rate: %9.1f mbs\n", min_mbs);
     printf("     maximum write rate: %9.1f mbs\n\n", max_mbs);
     avg_eto = avg_eto/npes;
     avg_et  = avg_et/npes;
     avg_etc = avg_etc/npes;
     printf("     open time:  %9.3f min %9.3f avg %9.3f max\n",min_eto,avg_eto,max_eto);  
     printf("     write time: %9.3f min %9.3f avg %9.3f max\n",min_et,avg_et,max_et);  
     printf("     close time: %9.3f min %9.3f avg %9.3f max\n\n",min_etc,avg_etc,max_etc);  
     fflush(stdout);
    }

  MPI_Finalize();
  
  return 0;
}
예제 #9
0
int main(int argc, char *argv[]) 
{ 
  int i, j, nerrors=0, total_errors=0; 

  int rank, size;
  int bpos;

  MPI_Datatype darray;
  MPI_Status status;
  MPI_File mpi_fh;

  /* Define array distribution
      A 2x2 block size works with ROMIO, a 3x3 block size breaks it. */
  int distrib[2] = { MPI_DISTRIBUTE_CYCLIC, MPI_DISTRIBUTE_CYCLIC };
  int bsize[2] = { NBLOCK, NBLOCK };
  int gsize[2] = { NSIDE, NSIDE };
  int psize[2] = { NPROC, NPROC };

  double data[NSIDE*NSIDE];
  double *ldata, *pdata;

  int tsize, nelem;

  MPI_File dfile;
 
  MPI_Init(&argc, &argv);

  MPI_Comm_size(MPI_COMM_WORLD, &size);
  MPI_Comm_rank(MPI_COMM_WORLD, &rank);

  /* Set up type */
  CHECK(MPI_Type_create_darray(size, rank, 2, gsize, distrib,
			 bsize, psize, MPI_ORDER_FORTRAN, MPI_DOUBLE, &darray));
  CHECK(MPI_Type_commit(&darray));
  CHECK(MPI_Type_size(darray, &tsize));
  nelem = tsize / sizeof(double);

  for(i = 0; i < (NSIDE*NSIDE); i++) data[i] = i;

  if (rank == 0) {
    CHECK(MPI_File_open(MPI_COMM_SELF, argv[1],
		MPI_MODE_CREATE|MPI_MODE_WRONLY, MPI_INFO_NULL, &dfile));
    CHECK(MPI_File_write(dfile, data, NSIDE*NSIDE, MPI_DOUBLE, &status));
    CHECK(MPI_File_close(&dfile));
  }
  MPI_Barrier(MPI_COMM_WORLD);

  /* Allocate buffer */
  ldata = (double *)malloc(tsize);
  pdata = (double *)malloc(tsize);

  /* Use Pack to pull out array */
  bpos = 0;
  CHECK(MPI_Pack(data, 1, darray, pdata, tsize, &bpos, MPI_COMM_WORLD));

  MPI_Barrier(MPI_COMM_WORLD);

  /* Read in array from file.  */
  CHECK(MPI_File_open(MPI_COMM_WORLD, argv[1], MPI_MODE_RDONLY, MPI_INFO_NULL, &mpi_fh));
  CHECK(MPI_File_set_view(mpi_fh, 0, MPI_DOUBLE, darray, "native", MPI_INFO_NULL));
  CHECK(MPI_File_read_all(mpi_fh, ldata, nelem, MPI_DOUBLE, &status));
  CHECK(MPI_File_close(&mpi_fh));

  for(i = 0; i < size; i++) {
#ifdef VERBOSE
    MPI_Barrier(MPI_COMM_WORLD);
    if(rank == i) {
      printf("=== Rank %i === (%i elements) \nPacked: ", rank, nelem);
      for(j = 0; j < nelem; j++) {
        printf("%4.1f ", pdata[j]);
        fflush(stdout);
      }
      printf("\nRead:   ");
      for(j = 0; j < nelem; j++) {
        printf("%4.1f ", ldata[j]);
        fflush(stdout);
      }
      printf("\n\n");
      fflush(stdout);
    }
#endif
    if(rank == i) {
	for (j=0; j< nelem; j++) {
	    if (pdata[j] != ldata[j]) {
		fprintf(stderr, "rank %d at index %d: packbuf %4.1f filebuf %4.1f\n",
			rank, j, pdata[j], ldata[j]);
		nerrors++;
	    }
	}
    }
  }
  MPI_Allreduce(&nerrors, &total_errors, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
  if (rank == 0 && total_errors == 0)
      printf(" No Errors\n");

  free(ldata);
  free(pdata);
  MPI_Type_free(&darray);
  MPI_Finalize();

  exit(total_errors);

} 
예제 #10
0
FORT_DLL_SPEC void FORT_CALL mpi_type_create_darray_ ( MPI_Fint *v1, MPI_Fint *v2, MPI_Fint *v3, MPI_Fint v4[], MPI_Fint v5[], MPI_Fint v6[], MPI_Fint v7[], MPI_Fint *v8, MPI_Fint *v9, MPI_Fint *v10, MPI_Fint *ierr ){
    *ierr = MPI_Type_create_darray( *v1, *v2, *v3, v4, v5, v6, v7, *v8, (MPI_Datatype)(*v9), (MPI_Datatype *)(v10) );
}
예제 #11
0
파일: main.cpp 프로젝트: arnedc/sparsedense
int main(int argc, char **argv) {
    int info, i, j, pcol, Adim;
    double *D;
    int *DESCD;
    CSRdouble BT_i, B_j, Xsparse, Zsparse, Btsparse;

    /*BT_i.allocate(0,0,0);
    B_j.allocate(0,0,0);
    Xsparse.allocate(0,0,0);
    Zsparse.allocate(0,0,0);
    Btsparse.allocate(0,0,0);*/

    //Initialise MPI and some MPI-variables
    info = MPI_Init ( &argc, &argv );
    if ( info != 0 ) {
        printf ( "Error in MPI initialisation: %d\n",info );
        return info;
    }

    position= ( int* ) calloc ( 2,sizeof ( int ) );
    if ( position==NULL ) {
        printf ( "unable to allocate memory for processor position coordinate\n" );
        return EXIT_FAILURE;
    }

    dims= ( int* ) calloc ( 2,sizeof ( int ) );
    if ( dims==NULL ) {
        printf ( "unable to allocate memory for grid dimensions coordinate\n" );
        return EXIT_FAILURE;
    }

    //BLACS is the interface used by PBLAS and ScaLAPACK on top of MPI

    blacs_pinfo_ ( &iam,&size ); 				//determine the number of processes involved
    info=MPI_Dims_create ( size, 2, dims );			//determine the best 2D cartesian grid with the number of processes
    if ( info != 0 ) {
        printf ( "Error in MPI creation of dimensions: %d\n",info );
        return info;
    }

    //Until now the code can only work with square process grids
    //So we try to get the biggest square grid possible with the number of processes involved
    if (*dims != *(dims+1)) {
        while (*dims * *dims > size)
            *dims -=1;
        *(dims+1)= *dims;
        if (iam==0)
            printf("WARNING: %d processor(s) unused due to reformatting to a square process grid\n", size - (*dims * *dims));
        size = *dims * *dims;
        //cout << "New size of process grid: " << size << endl;
    }

    blacs_get_ ( &i_negone,&i_zero,&ICTXT2D );

    //Initialisation of the BLACS process grid, which is referenced as ICTXT2D
    blacs_gridinit_ ( &ICTXT2D,"R",dims, dims+1 );

    if (iam < size) {

        //The rank (iam) of the process is mapped to a 2D grid: position= (process row, process column)
        blacs_pcoord_ ( &ICTXT2D,&iam,position, position+1 );
        if ( *position ==-1 ) {
            printf ( "Error in proces grid\n" );
            return -1;
        }

        //Filenames, dimensions of all matrices and other important variables are read in as global variables (see src/readinput.cpp)
        info=read_input ( *++argv );
        if ( info!=0 ) {
            printf ( "Something went wrong when reading input file for processor %d\n",iam );
            return -1;
        }

        //blacs_barrier is used to stop any process of going beyond this point before all processes have made it up to this point.
        blacs_barrier_ ( &ICTXT2D,"ALL" );
        if ( * ( position+1 ) ==0 && *position==0 )
            printf ( "Reading of input-file succesful\n" );

        if ( * ( position+1 ) ==0 && *position==0 ) {
            printf("\nA linear mixed model with %d observations, %d genotypes, %d random effects and %d fixed effects\n", n,k,m,l);
            printf("was analyzed using %d (%d x %d) processors\n",size,*dims,*(dims+1));
        }

        //Dimension of A (sparse matrix) is the number of fixed effects(m) + the sparse random effects (l)
        Adim=m+l;

        //Dimension of D (dense matrix) is the number of dense effects (k)
        Ddim=k;

        pcol= * ( position+1 );

        //Define number of blocks needed to store a complete column/row of D
        Dblocks= Ddim%blocksize==0 ? Ddim/blocksize : Ddim/blocksize +1;

        //Define the number of rowblocks needed by the current process to store its part of the dense matrix D
        Drows= ( Dblocks - *position ) % *dims == 0 ? ( Dblocks- *position ) / *dims : ( Dblocks- *position ) / *dims +1;
        Drows= Drows<1? 1 : Drows;

        //Define the number of columnblocks needed by the current process to store its part of the dense matrix D
        Dcols= ( Dblocks - pcol ) % * ( dims+1 ) == 0 ? ( Dblocks- pcol ) / * ( dims+1 ) : ( Dblocks- pcol ) / * ( dims+1 ) +1;
        Dcols=Dcols<1? 1 : Dcols;

        //Define the local leading dimension of D (keeping in mind that matrices are always stored column-wise)
        lld_D=Drows*blocksize;

        //Initialise the descriptor of the dense distributed matrix
        DESCD= ( int* ) malloc ( DLEN_ * sizeof ( int ) );
        if ( DESCD==NULL ) {
            printf ( "unable to allocate memory for descriptor for C\n" );
            return -1;
        }

        //D with dimensions (Ddim,Ddim) is distributed over all processes in ICTXT2D, with the first element in process (0,0)
        //D is distributed into blocks of size (blocksize,blocksize), having a local leading dimension lld_D in this specific process
        descinit_ ( DESCD, &Ddim, &Ddim, &blocksize, &blocksize, &i_zero, &i_zero, &ICTXT2D, &lld_D, &info );
        if ( info!=0 ) {
            printf ( "Descriptor of matrix C returns info: %d\n",info );
            return info;
        }

        //Allocate the space necessary to store the part of D that is held into memory of this process.
        D = ( double* ) calloc ( Drows * blocksize * Dcols * blocksize,sizeof ( double ) );
        if ( D==NULL ) {
            printf ( "unable to allocate memory for Matrix D  (required: %ld bytes)\n", Drows * blocksize * Dcols * blocksize * sizeof ( double ) );
            return EXIT_FAILURE;
        }

        blacs_barrier_ ( &ICTXT2D,"ALL" );
        if (iam==0)
            printf ( "Start set up of B & D\n" );

        blacs_barrier_ ( &ICTXT2D,"ALL" );
        //set_up_BD is declared in readdist.cpp and constructs the parts of matrices B & D in each processor
        //which are necessary to create the distributed Schur complement of D
        info = set_up_BD ( DESCD, D, BT_i, B_j, Btsparse );

        //printdense(Drows*blocksize, Dcols * blocksize,D,"matrix_D.txt");

        blacs_barrier_ ( &ICTXT2D,"ALL" );
        if (iam==0)
            printf ( "Matrices B & D set up\n" );

        if(printD_bool) {

            int array_of_gsizes[2], array_of_distribs[2], array_of_dargs[2], array_of_psize[2] ;
            int buffersize;
            MPI_Datatype file_type;
            MPI_File fh;
            MPI_Status status;
            array_of_gsizes[0]=Dblocks * blocksize;
            array_of_gsizes[1]=Dblocks * blocksize;
            array_of_distribs[0]=MPI_DISTRIBUTE_CYCLIC;
            array_of_distribs[1]=MPI_DISTRIBUTE_CYCLIC;
            array_of_dargs[0]=blocksize;
            array_of_dargs[1]=blocksize;
            array_of_psize[0]=*dims;
            array_of_psize[1]=*(dims + 1);

            MPI_Type_create_darray(size,iam,2,array_of_gsizes, array_of_distribs,
                                   array_of_dargs, array_of_psize, MPI_ORDER_FORTRAN,
                                   MPI_DOUBLE, &file_type);
            MPI_Type_commit(&file_type);
            info = MPI_File_open(MPI_COMM_WORLD, filenameD,
                                 MPI_MODE_CREATE | MPI_MODE_WRONLY,
                                 MPI_INFO_NULL, &fh);
            /*if ( ( Drows-1 ) % *(dims+1) == *position && ( Dcols-1 ) % *(dims) == pcol && Ddim%blocksize !=0 )
                buffersize=((Drows-1) * blocksize + Ddim % blocksize) * ((Dcols-1) * blocksize + Ddim % blocksize);
            else if ( ( Drows-1 ) % *(dims+1) == *position && Ddim%blocksize !=0 )
                buffersize=((Drows-1) * blocksize + Ddim % blocksize) * Dcols * blocksize;
            else if ( ( Dcols-1 ) % *(dims) == *position && Ddim%blocksize !=0 )
                buffersize=((Dcols-1) * blocksize + Ddim % blocksize) * Drows * blocksize;
            else*/
            buffersize= Dcols * Drows * blocksize * blocksize;

            MPI_File_set_view(fh, 0, MPI_DOUBLE, file_type, "native", MPI_INFO_NULL);
            info =MPI_File_write_all(fh, D,buffersize, MPI_DOUBLE,
                                     &status);
	    MPI_File_close(&fh);
            if(iam==0) {
                printf("Matrix D (dimension %d) is printed in file %s\n", Dblocks*blocksize,filenameD);
            }
            if(filenameD != NULL)
                free(filenameD);
            filenameD=NULL;
            //delete[] array_of_gsizes, delete[] array_of_distribs, delete[] array_of_dargs, delete[] array_of_psize;
        }



        //Now every matrix has to set up the sparse matrix A, consisting of X'X, X'Z, Z'X and Z'Z + lambda*I
        Xsparse.loadFromFile ( filenameX );
        Zsparse.loadFromFile ( filenameZ );

        if(filenameX != NULL)
            free(filenameX);
        filenameX=NULL;
        if(filenameZ != NULL)
            free(filenameZ);
        filenameZ=NULL;

        smat_t *X_smat, *Z_smat;

        X_smat= (smat_t *) calloc(1,sizeof(smat_t));
        Z_smat= (smat_t *) calloc(1,sizeof(smat_t));

        X_smat = smat_new_from ( Xsparse.nrows,Xsparse.ncols,Xsparse.pRows,Xsparse.pCols,Xsparse.pData,0,0 );
        Z_smat = smat_new_from ( Zsparse.nrows,Zsparse.ncols,Zsparse.pRows,Zsparse.pCols,Zsparse.pData,0,0 );

        smat_t *Xt_smat, *Zt_smat;
        Xt_smat= (smat_t *) calloc(1,sizeof(smat_t));
        Zt_smat= (smat_t *) calloc(1,sizeof(smat_t));
        Xt_smat = smat_copy_trans ( X_smat );
        Zt_smat = smat_copy_trans ( Z_smat );

        CSRdouble Asparse;
        smat_t *XtX_smat, *XtZ_smat, *ZtZ_smat, *lambda_smat, *ZtZlambda_smat;

        XtX_smat= (smat_t *) calloc(1,sizeof(smat_t));
        XtZ_smat= (smat_t *) calloc(1,sizeof(smat_t));
        ZtZ_smat= (smat_t *) calloc(1,sizeof(smat_t));


        XtX_smat = smat_matmul ( Xt_smat, X_smat );
        XtZ_smat = smat_matmul ( Xt_smat, Z_smat );
        ZtZ_smat = smat_matmul ( Zt_smat,Z_smat );

        Xsparse.clear();
        Zsparse.clear();
        smat_free(Xt_smat);
        smat_free(Zt_smat);
        /*smat_free(X_smat);
        smat_free(Z_smat);*/

        CSRdouble Imat;

        makeIdentity ( l, Imat );

        lambda_smat= (smat_t *) calloc(1,sizeof(smat_t));

        lambda_smat = smat_new_from ( Imat.nrows,Imat.ncols,Imat.pRows,Imat.pCols,Imat.pData,0,0 );

        smat_scale_diag ( lambda_smat, -lambda );

        ZtZlambda_smat= (smat_t *) calloc(1,sizeof(smat_t));

        ZtZlambda_smat = smat_add ( lambda_smat, ZtZ_smat );

        smat_free(ZtZ_smat);
        //smat_free(lambda_smat);


        smat_to_symmetric_structure ( XtX_smat );
        smat_to_symmetric_structure ( ZtZlambda_smat );

        CSRdouble XtX_sparse, XtZ_sparse, ZtZ_sparse;

        XtX_sparse.make2 ( XtX_smat->m,XtX_smat->n,XtX_smat->nnz,XtX_smat->ia,XtX_smat->ja,XtX_smat->a );
        XtZ_sparse.make2 ( XtZ_smat->m,XtZ_smat->n,XtZ_smat->nnz,XtZ_smat->ia,XtZ_smat->ja,XtZ_smat->a );
        ZtZ_sparse.make2 ( ZtZlambda_smat->m,ZtZlambda_smat->n,ZtZlambda_smat->nnz,ZtZlambda_smat->ia,ZtZlambda_smat->ja,ZtZlambda_smat->a );

        /*smat_free(XtX_smat);
        smat_free(XtZ_smat);
        smat_free(ZtZlambda_smat);*/
        Imat.clear();

        if (iam==0) {
            cout << "***                                           [  t     t  ] *** " << endl;
            cout << "***                                           [ X X   X Z ] *** " << endl;
            cout << "***                                           [           ] *** " << endl;
            cout << "*** G e n e r a t i n g    m a t r i x    A = [           ] *** " << endl;
            cout << "***                                           [  t     t  ] *** " << endl;
            cout << "***                                           [ Z X   Z Z ] *** " << endl;
        }

        //Sparse matrix A only contains the upper triangular part of A
        create2x2SymBlockMatrix ( XtX_sparse, XtZ_sparse, ZtZ_sparse, Asparse );
        //Asparse.writeToFile("A_sparse.csr");

        smat_free(XtX_smat);
        smat_free(XtZ_smat);
        smat_free(ZtZlambda_smat);
        XtX_sparse.clear();
        XtZ_sparse.clear();
        ZtZ_sparse.clear();

        blacs_barrier_ ( &ICTXT2D,"ALL" );

        if(printsparseC_bool) {
            CSRdouble Dmat, Dblock, Csparse;
            Dblock.nrows=Dblocks * blocksize;
            Dblock.ncols=Dblocks * blocksize;
            Dblock.allocate(Dblocks * blocksize, Dblocks * blocksize, 0);
            Dmat.allocate(0,0,0);
            for (i=0; i<Drows; ++i) {
                for(j=0; j<Dcols; ++j) {
                    dense2CSR_sub(D + i * blocksize + j * lld_D * blocksize,blocksize,blocksize,lld_D,Dblock,( * ( dims) * i + *position ) *blocksize,
                                  ( * ( dims+1 ) * j + pcol ) *blocksize);
                    if ( Dblock.nonzeros>0 ) {
                        if ( Dmat.nonzeros==0 ) {
                            Dmat.make2 ( Dblock.nrows,Dblock.ncols,Dblock.nonzeros,Dblock.pRows,Dblock.pCols,Dblock.pData );
                        }
                        else {
                            Dmat.addBCSR ( Dblock );
                        }
                    }

                    Dblock.clear();
                }
            }
            blacs_barrier_(&ICTXT2D,"A");
            if ( iam!=0 ) {
                //Each process other than root sends its Dmat to the root process.
                MPI_Send ( & ( Dmat.nonzeros ),1, MPI_INT,0,iam,MPI_COMM_WORLD );
                MPI_Send ( & ( Dmat.pRows[0] ),Dmat.nrows + 1, MPI_INT,0,iam+size,MPI_COMM_WORLD );
                MPI_Send ( & ( Dmat.pCols[0] ),Dmat.nonzeros, MPI_INT,0,iam+2*size,MPI_COMM_WORLD );
                MPI_Send ( & ( Dmat.pData[0] ),Dmat.nonzeros, MPI_DOUBLE,0,iam+3*size,MPI_COMM_WORLD );
                Dmat.clear();
            }
            else {
                for ( i=1; i<size; ++i ) {
                    // The root process receives parts of Dmat sequentially from all processes and directly adds them together.
                    int nonzeroes, count;
                    MPI_Recv ( &nonzeroes,1,MPI_INT,i,i,MPI_COMM_WORLD,&status );
                    /*MPI_Get_count(&status, MPI_INT, &count);
                    printf("Process 0 received %d elements of process %d\n",count,i);*/
                    if(nonzeroes>0) {
                        printf("Nonzeroes : %d\n ",nonzeroes);
                        Dblock.allocate ( Dblocks * blocksize,Dblocks * blocksize,nonzeroes );
                        MPI_Recv ( & ( Dblock.pRows[0] ), Dblocks * blocksize + 1, MPI_INT,i,i+size,MPI_COMM_WORLD,&status );
                        /*MPI_Get_count(&status, MPI_INT, &count);
                        printf("Process 0 received %d elements of process %d\n",count,i);*/
                        MPI_Recv ( & ( Dblock.pCols[0] ),nonzeroes, MPI_INT,i,i+2*size,MPI_COMM_WORLD,&status );
                        /*MPI_Get_count(&status, MPI_INT, &count);
                        printf("Process 0 received %d elements of process %d\n",count,i);*/
                        MPI_Recv ( & ( Dblock.pData[0] ),nonzeroes, MPI_DOUBLE,i,i+3*size,MPI_COMM_WORLD,&status );
                        /*MPI_Get_count(&status, MPI_DOUBLE, &count);
                        printf("Process 0 received %d elements of process %d\n",count,i);*/
                        Dmat.addBCSR ( Dblock );
                    }
                }
                //Dmat.writeToFile("D_sparse.csr");
                Dmat.reduceSymmetric();
                Btsparse.transposeIt(1);
                create2x2SymBlockMatrix(Asparse,Btsparse, Dmat, Csparse);
                Btsparse.clear();
                Dmat.clear();
                Csparse.writeToFile(filenameC);
                Csparse.clear();
                if(filenameC != NULL)
                    free(filenameC);
                filenameC=NULL;
            }
        }
        Btsparse.clear();
        blacs_barrier_(&ICTXT2D,"A");

        //AB_sol will contain the solution of A*X=B, distributed across the process rows. Processes in the same process row possess the same part of AB_sol
        double * AB_sol;
        int * DESCAB_sol;
        DESCAB_sol= ( int* ) malloc ( DLEN_ * sizeof ( int ) );
        if ( DESCAB_sol==NULL ) {
            printf ( "unable to allocate memory for descriptor for AB_sol\n" );
            return -1;
        }
        //AB_sol (Adim, Ddim) is distributed across all processes in ICTXT2D starting from process (0,0) into blocks of size (Adim, blocksize)
        descinit_ ( DESCAB_sol, &Adim, &Ddim, &Adim, &blocksize, &i_zero, &i_zero, &ICTXT2D, &Adim, &info );
        if ( info!=0 ) {
            printf ( "Descriptor of matrix C returns info: %d\n",info );
            return info;
        }

        AB_sol=(double *) calloc(Adim * Dcols*blocksize,sizeof(double));

        // Each process calculates the Schur complement of the part of D at its disposal. (see src/schur.cpp)
        // The solution of A * Y = B_j is stored in AB_sol (= A^-1 * B_j)
        blacs_barrier_(&ICTXT2D,"A");
        make_Sij_parallel_denseB ( Asparse, BT_i, B_j, D, lld_D, AB_sol );
        BT_i.clear();
        B_j.clear();

        //From here on the Schur complement S of D is stored in D

        blacs_barrier_ ( &ICTXT2D,"ALL" );

        //The Schur complement is factorised (by ScaLAPACK)
        pdpotrf_ ( "U",&k,D,&i_one,&i_one,DESCD,&info );
        if ( info != 0 ) {
            printf ( "Cholesky decomposition of D was unsuccessful, error returned: %d\n",info );
            return -1;
        }

        //From here on the factorization of the Schur complement S is stored in D

        blacs_barrier_ ( &ICTXT2D,"ALL" );

        //The Schur complement is inverted (by ScaLAPACK)
        pdpotri_ ( "U",&k,D,&i_one,&i_one,DESCD,&info );
        if ( info != 0 ) {
            printf ( "Inverse of D was unsuccessful, error returned: %d\n",info );
            return -1;
        }

        //From here on the inverse of the Schur complement S is stored in D

        blacs_barrier_(&ICTXT2D,"A");

        double* InvD_T_Block = ( double* ) calloc ( Dblocks * blocksize + Adim ,sizeof ( double ) );

        //Diagonal elements of the (1,1) block of C^-1 are still distributed and here they are gathered in InvD_T_Block in the root process.
        if(*position == pcol) {
            for (i=0; i<Ddim; ++i) {
                if (pcol == (i/blocksize) % *dims) {
                    int Dpos = i%blocksize + ((i/blocksize) / *dims) * blocksize ;
                    *(InvD_T_Block + Adim +i) = *( D + Dpos + lld_D * Dpos);
                }
            }
            for ( i=0,j=0; i<Dblocks; ++i,++j ) {
                if ( j==*dims )
                    j=0;
                if ( *position==j ) {
                    dgesd2d_ ( &ICTXT2D,&blocksize,&i_one,InvD_T_Block + Adim + i * blocksize,&blocksize,&i_zero,&i_zero );
                }
                if ( *position==0 ) {
                    dgerv2d_ ( &ICTXT2D,&blocksize,&i_one,InvD_T_Block + Adim + blocksize*i,&blocksize,&j,&j );
                }
            }
        }

        blacs_barrier_(&ICTXT2D,"A");

        //Only the root process performs a selected inversion of A.
        if (iam==0) {

            int pardiso_message_level = 1;

            int pardiso_mtype=-2;

            ParDiSO pardiso ( pardiso_mtype, pardiso_message_level );
            int number_of_processors = 1;
            char* var = getenv("OMP_NUM_THREADS");
            if(var != NULL) {
                sscanf( var, "%d", &number_of_processors );
            }
            else {
                printf("Set environment OMP_NUM_THREADS to 1");
                exit(1);
            }

            pardiso.iparm[2]  = 2;
            pardiso.iparm[3]  = number_of_processors;
            pardiso.iparm[8]  = 0;
            pardiso.iparm[11] = 1;
            pardiso.iparm[13]  = 0;
            pardiso.iparm[28]  = 0;

            //This function calculates the factorisation of A once again so this might be optimized.
            pardiso.findInverseOfA ( Asparse );

            printf("Processor %d inverted matrix A\n",iam);
        }
        blacs_barrier_(&ICTXT2D,"A");

        // To minimize memory usage, and because only the diagonal elements of the inverse are needed, Y' * S is calculated row by rowblocks
        // the diagonal element is calculates as the dot product of this row and the corresponding column of Y. (Y is solution of AY=B)
        double* YSrow= ( double* ) calloc ( Dcols * blocksize,sizeof ( double ) );
        int * DESCYSROW;
        DESCYSROW= ( int* ) malloc ( DLEN_ * sizeof ( int ) );
        if ( DESCYSROW==NULL ) {
            printf ( "unable to allocate memory for descriptor for AB_sol\n" );
            return -1;
        }
        //YSrow (1,Ddim) is distributed across processes of ICTXT2D starting from process (0,0) into blocks of size (1,blocksize)
        descinit_ ( DESCYSROW, &i_one, &Ddim, &i_one,&blocksize, &i_zero, &i_zero, &ICTXT2D, &i_one, &info );
        if ( info!=0 ) {
            printf ( "Descriptor of matrix C returns info: %d\n",info );
            return info;
        }

        blacs_barrier_(&ICTXT2D,"A");

        //Calculating diagonal elements 1 by 1 of the (0,0)-block of C^-1.
        for (i=1; i<=Adim; ++i) {
            pdsymm_ ("R","U",&i_one,&Ddim,&d_one,D,&i_one,&i_one,DESCD,AB_sol,&i,&i_one,DESCAB_sol,&d_zero,YSrow,&i_one,&i_one,DESCYSROW);
            pddot_(&Ddim,InvD_T_Block+i-1,AB_sol,&i,&i_one,DESCAB_sol,&Adim,YSrow,&i_one,&i_one,DESCYSROW,&i_one);
            /*if(*position==1 && pcol==1)
            printf("Dot product in process (1,1) is: %g\n", *(InvD_T_Block+i-1));
            if(*position==0 && pcol==1)
            printf("Dot product in process (0,1) is: %g\n",*(InvD_T_Block+i-1));*/
        }
        blacs_barrier_(&ICTXT2D,"A");
        if(YSrow != NULL)
            free(YSrow);
        YSrow = NULL;
        if(DESCYSROW != NULL)
            free(DESCYSROW);
        DESCYSROW = NULL;
        if(AB_sol != NULL)
            free(AB_sol);
        AB_sol = NULL;
        if(DESCAB_sol != NULL)
            free(DESCAB_sol);
        DESCAB_sol = NULL;
        if(D != NULL)
            free(D);
        D = NULL;
        if(DESCD != NULL)
            free(DESCD);
        DESCD = NULL;

        //Only in the root process we add the diagonal elements of A^-1
        if (iam ==0) {
            for(i=0; i<Adim; ++i) {
                j=Asparse.pRows[i];
                *(InvD_T_Block+i) += Asparse.pData[j];
            }
            Asparse.clear();
            printdense ( Adim+k,1,InvD_T_Block,"diag_inverse_C_parallel.txt" );
        }
        if(InvD_T_Block != NULL)
            free(InvD_T_Block);
        InvD_T_Block = NULL;
	blacs_gridexit_(&ICTXT2D);
    }
    //cout << iam << " reached end before MPI_Barrier" << endl;
    MPI_Barrier(MPI_COMM_WORLD);
    //MPI_Finalize();

    return 0;
}
예제 #12
0
/* darray_4d_c_test1()
 *
 * Returns the number of errors encountered.
 */
int darray_4d_c_test1(void)
{
    MPI_Datatype darray;
    int array[72];
    int array_size[4] = { 6, 3, 2, 2 };
    int array_distrib[4] = { MPI_DISTRIBUTE_BLOCK,
        MPI_DISTRIBUTE_BLOCK,
        MPI_DISTRIBUTE_NONE,
        MPI_DISTRIBUTE_NONE
    };
    int array_dargs[4] = { MPI_DISTRIBUTE_DFLT_DARG,
        MPI_DISTRIBUTE_DFLT_DARG,
        MPI_DISTRIBUTE_DFLT_DARG,
        MPI_DISTRIBUTE_DFLT_DARG
    };
    int array_psizes[4] = { 6, 3, 1, 1 };

    int i, rank, err, errs = 0, sizeoftype;

    for (rank = 0; rank < 18; rank++) {
        /* set up array */
        for (i = 0; i < 72; i++) {
            array[i] = i;
        }

        /* set up type */
        err = MPI_Type_create_darray(18,        /* size */
                                     rank, 4,   /* dims */
                                     array_size,
                                     array_distrib,
                                     array_dargs, array_psizes, MPI_ORDER_C, MPI_INT, &darray);
        if (err != MPI_SUCCESS) {
            errs++;
            if (verbose) {
                fprintf(stderr,
                        "error in MPI_Type_create_darray call; aborting after %d errors\n", errs);
            }
            MTestPrintError(err);
            return errs;
        }

        MPI_Type_commit(&darray);

        /* verify the size of the type */
        MPI_Type_size(darray, &sizeoftype);
        if (sizeoftype != 4 * sizeof(int)) {
            errs++;
            if (verbose)
                fprintf(stderr, "size of type = %d; should be %d\n",
                        sizeoftype, (int) (4 * sizeof(int)));
            return errs;
        }

        /* pack and unpack the type, zero'ing out all other values */
        err = pack_and_unpack((char *) array, 1, darray, 72 * sizeof(int));

        for (i = 0; i < 4 * rank; i++) {
            if (array[i] != 0) {
                errs++;
                if (verbose)
                    fprintf(stderr, "[4d array rank=%d]:array[%d] = %d; should be %d\n",
                            rank, i, array[i], 0);
            }
        }

        for (i = 4 * rank; i < 4 * rank + 4; i++) {
            if (array[i] != i) {
                errs++;
                if (verbose)
                    fprintf(stderr, "[4d array rank=%d]:array[%d] = %d; should be %d\n",
                            rank, i, array[i], i);
            }
        }
        for (i = 4 * rank + 4; i < 72; i++) {
            if (array[i] != 0) {
                errs++;
                if (verbose)
                    fprintf(stderr, "[4d array rank=%d]:array[%d] = %d; should be %d\n",
                            rank, i, array[i], 0);
            }
        }

        MPI_Type_free(&darray);
    }
    return errs;
}
예제 #13
0
int main(int argc, char *argv[])
{
    int errs = 0;
    int wrank, wsize;
    int gsizes[3], distribs[3], dargs[3], psizes[3];
    int px, py, nx, ny, rx, ry, bx, by;
    int *srcArray, *destArray;
    int i, j, ii, jj, loc;
    MPI_Datatype darraytype;

    MTest_Init(0, 0);
    MPI_Comm_rank(MPI_COMM_WORLD, &wrank);
    MPI_Comm_size(MPI_COMM_WORLD, &wsize);

    /* Test 1: Simple, 1-D cyclic decomposition */
    if (AllocateGrid(1, 3 * wsize, &srcArray, &destArray)) {
        MPI_Abort(MPI_COMM_WORLD, 1);
    }

    /* Simple cyclic with 1-dim global array */
    gsizes[0] = 3 * wsize;
    distribs[0] = MPI_DISTRIBUTE_CYCLIC;
    dargs[0] = 1;
    psizes[0] = wsize;
    MPI_Type_create_darray(wsize, wrank, 1,
                           gsizes, distribs, dargs, psizes, MPI_ORDER_C, MPI_INT, &darraytype);

    /* Check the created datatype.  Because cyclic, should represent
     * a strided type */
    if (PackUnpack(darraytype, srcArray, destArray, 3)) {
        fprintf(stderr, "Error in pack/unpack check\n");
        MPI_Abort(MPI_COMM_WORLD, 1);
    }
    /* Now, check for correct data */
    for (i = 0; i < 3; i++) {
        if (destArray[i] != wrank + i * wsize) {
            fprintf(stderr, "1D: %d: Expected %d but saw %d\n", i, wrank + i * wsize, destArray[i]);
            errs++;
        }
    }

    free(destArray);
    free(srcArray);
    MPI_Type_free(&darraytype);

    /* Test 2: Simple, 1-D cyclic decomposition, with block size=2 */
    if (AllocateGrid(1, 4 * wsize, &srcArray, &destArray)) {
        MPI_Abort(MPI_COMM_WORLD, 1);
    }

    /* Simple cyclic with 1-dim global array */
    gsizes[0] = 4 * wsize;
    distribs[0] = MPI_DISTRIBUTE_CYCLIC;
    dargs[0] = 2;
    psizes[0] = wsize;
    MPI_Type_create_darray(wsize, wrank, 1,
                           gsizes, distribs, dargs, psizes, MPI_ORDER_C, MPI_INT, &darraytype);

    /* Check the created datatype.  Because cyclic, should represent
     * a strided type */
    if (PackUnpack(darraytype, srcArray, destArray, 4)) {
        fprintf(stderr, "Error in pack/unpack check\n");
        MPI_Abort(MPI_COMM_WORLD, 1);
    }
    loc = 0;
    /* for each cyclic element */
    for (i = 0; i < 2; i++) {
        /* For each element in block */
        for (j = 0; j < 2; j++) {
            if (destArray[loc] != 2 * wrank + i * 2 * wsize + j) {
                fprintf(stderr, "1D(2): %d: Expected %d but saw %d\n",
                        i, 2 * wrank + i * 2 * wsize + j, destArray[loc]);
                errs++;
            }
            loc++;
        }
    }

    free(destArray);
    free(srcArray);
    MPI_Type_free(&darraytype);

    /* 2D: Create some 2-D decompositions */
    px = wsize / 2;
    py = 2;
    rx = wrank % px;
    ry = wrank / px;

    if (px * py != wsize) {
        fprintf(stderr, "An even number of processes is required\n");
        MPI_Abort(MPI_COMM_WORLD, 1);
    }

    /* Cyclic/Cyclic */
    if (AllocateGrid(5 * px, 7 * py, &srcArray, &destArray)) {
        MPI_Abort(MPI_COMM_WORLD, 1);
    }

    /* Simple cyclic/cyclic. Note in C order, the [1] index varies most
     * rapidly */
    gsizes[0] = ny = 7 * py;
    gsizes[1] = nx = 5 * px;
    distribs[0] = MPI_DISTRIBUTE_CYCLIC;
    distribs[1] = MPI_DISTRIBUTE_CYCLIC;
    dargs[0] = 1;
    dargs[1] = 1;
    psizes[0] = py;
    psizes[1] = px;
    MPI_Type_create_darray(wsize, wrank, 2,
                           gsizes, distribs, dargs, psizes, MPI_ORDER_C, MPI_INT, &darraytype);

    /* Check the created datatype.  Because cyclic, should represent
     * a strided type */
    if (PackUnpack(darraytype, srcArray, destArray, 5 * 7)) {
        fprintf(stderr, "Error in pack/unpack check\n");
        MPI_Abort(MPI_COMM_WORLD, 1);
    }

    loc = 0;
    for (j = 0; j < 7; j++) {
        for (i = 0; i < 5; i++) {
            int expected = rx + ry * nx + i * px + j * nx * py;
            if (destArray[loc] != expected) {
                errs++;
                fprintf(stderr, "2D(cc): [%d,%d] = %d, expected %d\n",
                        i, j, destArray[loc], expected);
            }
            loc++;
        }
    }

    free(srcArray);
    free(destArray);
    MPI_Type_free(&darraytype);

    /* Cyclic(2)/Cyclic(3) */
    if (AllocateGrid(6 * px, 4 * py, &srcArray, &destArray)) {
        MPI_Abort(MPI_COMM_WORLD, 1);
    }

    /* Block cyclic/cyclic. Note in C order, the [1] index varies most
     * rapidly */
    gsizes[0] = ny = 4 * py;
    gsizes[1] = nx = 6 * px;
    distribs[0] = MPI_DISTRIBUTE_CYCLIC;
    distribs[1] = MPI_DISTRIBUTE_CYCLIC;
    dargs[0] = by = 2;
    dargs[1] = bx = 3;
    psizes[0] = py;
    psizes[1] = px;
    MPI_Type_create_darray(wsize, wrank, 2,
                           gsizes, distribs, dargs, psizes, MPI_ORDER_C, MPI_INT, &darraytype);

    /* Check the created datatype.  Because cyclic, should represent
     * a strided type */
    if (PackUnpack(darraytype, srcArray, destArray, 4 * 6)) {
        fprintf(stderr, "Error in pack/unpack check\n");
        MPI_Abort(MPI_COMM_WORLD, 1);
    }

    loc = 0;
    for (j = 0; j < 4 / by; j++) {
        for (jj = 0; jj < by; jj++) {
            for (i = 0; i < 6 / bx; i++) {
                for (ii = 0; ii < bx; ii++) {
                    int expected = rx * bx + ry * by * nx + i * bx * px + ii +
                        (j * by * py + jj) * nx;
                    if (destArray[loc] != expected) {
                        errs++;
                        fprintf(stderr, "2D(c(2)c(3)): [%d,%d] = %d, expected %d\n",
                                i * bx + ii, j * by + jj, destArray[loc], expected);
                    }
                    loc++;
                }
            }
        }
    }

    free(srcArray);
    free(destArray);
    MPI_Type_free(&darraytype);

    MTest_Finalize(errs);

    return MTestReturnValue(errs);
}
예제 #14
0
int main (int argc, char **argv)
{
    struct arguments arguments;

    /* Parse our arguments; every option seen by parse_opt will
       be reflected in arguments. */
    argp_parse (&argp, argc, argv, 0, 0, &arguments); 

    int run_type;
    run_type = 0; //default is serial
    if (sscanf (arguments.args[0], "%i", &run_type)!=1) {}

    int iterations;
    iterations = 0; //default is serial
    if (sscanf (arguments.args[1], "%i", &iterations)!=1) {}

    int count_when;
    count_when = 1000;
    if (sscanf (arguments.args[2], "%i", &count_when)!=1) {}

    char print_list[200]; //used for input list
    if (sscanf (arguments.args[3], "%s", &print_list)!=1) {}

    // printf("Print list = %s\n", print_list);

    //Extract animation list from arguments
    char char_array[20][12] = { NULL };   //seperated input list
    int animation_list[20][2] = { NULL }; //integer input list start,range
    char *tok = strtok(print_list, ",");

    //counters
    int i,j,k,x,y,ii,jj;
    ii = 0;
    jj = 0;

    //Loop over tokens parsing our commas
    int tok_len = 0;
    while (tok != NULL)
    {
        //first loop parses out commas
        tok_len = strlen(tok);
        for (jj=0;jj<tok_len;jj++)
        {
            char_array[ii][jj] = tok[jj];
        }

        // printf("Tok = %s\n", char_array[ii]);
        tok = strtok(NULL, ",");
        ii++;
    }

    //looking for a range input, convert to ints
    int stop;
    for (ii=0;ii<20;ii++)
    {
        //convert first number to int
        tok = strtok(char_array[ii], "-");
        if (tok != NULL)
        {
            animation_list[ii][0] = atoi(tok);
            tok = strtok(NULL, ",");
        }
        
        //look for second number, add to range
        if (tok != NULL)
        {
            stop = atoi(tok);
            animation_list[ii][1] = stop - animation_list[ii][0];
        }

        // if (rank == 0)
        // {
        //     printf("Animation_list = %i, %i\n", 
        //         animation_list[ii][0], animation_list[ii][1]);

        // }
    }
    
    
    

    //should an animation be generated
    //prints a bunch of .pgm files, have to hand
    //make the gif...
    int animation;
    animation = arguments.animation;

    //verbose?
    int verbose;
    verbose = arguments.verbose;
    // printf("VERBOSE = %i",verbose);
    if (verbose>0 && verbose<=10)
    {
        verbose = 1;
    }

    

    // Initialize the MPI environment
    MPI_Init(NULL, NULL);

    // Get the number of processes
    int world_size;
    MPI_Comm_size(MPI_COMM_WORLD, &world_size);

    // Get the rank of the process
    int rank;
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);

    // Get the name of the processor
    char processor_name[MPI_MAX_PROCESSOR_NAME];
    int name_len;
    MPI_Get_processor_name(processor_name, &name_len);

    //Print run information, exit on bad command line input
    if (rank == 0 && verbose == 1)
    {
        printf("Verbose=%i, RunType=%i, Iterations=%i, CountWhen=%i, Animation=%i\n",
            verbose,run_type,iterations,count_when, animation);
    }
    if (world_size>1 && run_type ==0)
    {
        printf("Runtype and processors count not consistant\n");
        MPI_Finalize();
        exit(0);
    }
    if (world_size==1 && run_type>0)
    {
        printf("Runtype and processors count not consistant\n");
        MPI_Finalize();
        exit(0);
    }
    if (count_when <= 0)
    {
        if (rank == 0)
        {
            printf("Invalid count interval, positive integers only\n");
        }
        MPI_Finalize();
        exit(0);
    }

     //serial
    if (world_size == 1 && run_type == 0)
    {

        ncols=1;
        nrows=1;
    }
    //Blocked
    else if (world_size>1 && run_type == 1)
    {
        ncols = 1;
        nrows = world_size;
        my_col = 0;
        my_row = rank;
    }
    //Checker
    else if (world_size>1 && run_type == 2)
    {
        ncols = (int)sqrt(world_size);
        nrows = (int)sqrt(world_size);

        my_row = rank/nrows;
        my_col = rank-my_row*nrows;

        if (ncols*nrows!=world_size)
        {
            if (rank == 0)
            {
                printf("Number of processors must be square, Exiting\n");
            }
            MPI_Finalize();
            exit(0);
        }
    }

    // if (verbose == 1)
    // {
    //     printf("WR,row,col=%i,%i,%i\n",rank,my_row,my_col);
    // }

    
    //////////////////////READ IN INITIAL PGM////////////////////////////////
    if(!readpgm("life.pgm"))
    {
        // printf("WR=%d,HERE2\n",rank);
        if( rank==0 )
        {
            pprintf( "An error occured while reading the pgm file\n" );
        }
        MPI_Finalize();
        return 1;
    }

    // Count the life forms. Note that we count from [1,1] - [height+1,width+1];
    // we need to ignore the ghost row!
    i = 0;
    for(y=1; y<local_height+1; y++ )
    {
        for(x=1; x<local_width+1; x++ )
        {
            if( field_a[ y * field_width + x ] )
            {
                i++;
            }
        }
    }
    // pprintf( "%i local buggies\n", i );

    int total;
    MPI_Allreduce( &i, &total, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD );
    if( rank==0  && verbose == 1 )
    {
        pprintf( "%i total buggies\n", total );
    }
    

    
    // printf("WR=%d, Row=%d, Col=%d\n",rank,my_row,my_col);

    //Row and column size per processor
    int rsize, csize; 
    rsize = local_width;
    csize = local_height;


    if (rank == 0 && verbose == 1)
    {
        printf("rsize,csize,NP = %d, %d, %d\n",rsize,csize,world_size);
    }
    
    //Create new derived datatype for writing to files
    MPI_Datatype submatrix;

    int array_of_gsizes[2];
    int array_of_distribs[2];
    int array_of_dargs[2];
    int array_of_psize[2];

    if (run_type == 1)
    {
        if (rank == 0)
        {
            printf("g0,g1 = %i,%i\n", local_height*ncols, local_width);
            printf("p0,p1 = %i,%i\n", nrows, ncols);
        }
        array_of_gsizes[0] = local_height*ncols;
        array_of_gsizes[1] = local_width;
        array_of_distribs[0] = MPI_DISTRIBUTE_BLOCK;
        array_of_distribs[1] = MPI_DISTRIBUTE_BLOCK;
        array_of_dargs[0] = MPI_DISTRIBUTE_DFLT_DARG;
        array_of_dargs[1] = MPI_DISTRIBUTE_DFLT_DARG;
        array_of_psize[0] = nrows;
        array_of_psize[1] = ncols;
        // int order = MPI_ORDER_C;

        //size,rank,ndims,array_gsizes,array_distribs,array_args,array_psizes
        //order,oldtype,*newtype
        MPI_Type_create_darray(world_size, rank, 2, array_of_gsizes, array_of_distribs,
                array_of_dargs, array_of_psize, MPI_ORDER_C, MPI_UNSIGNED_CHAR, &submatrix);
        MPI_Type_commit(&submatrix);
    }
    else if (run_type == 2)
    {
        if (rank == 0)
        {
            printf("g0,g1 = %i,%i\n", local_height*ncols, local_width*nrows);
            printf("p0,p1 = %i,%i\n", nrows, ncols);
        }
        array_of_gsizes[0] = local_height*ncols;
        array_of_gsizes[1] = local_width*nrows;
        array_of_distribs[0] = MPI_DISTRIBUTE_BLOCK;
        array_of_distribs[1] = MPI_DISTRIBUTE_BLOCK;
        array_of_dargs[0] = MPI_DISTRIBUTE_DFLT_DARG;
        array_of_dargs[1] = MPI_DISTRIBUTE_DFLT_DARG;
        array_of_psize[0] = nrows;
        array_of_psize[1] = ncols;
        // int order = MPI_ORDER_C;

        //size,rank,ndims,array_gsizes,array_distribs,array_args,array_psizes
        //order,oldtype,*newtype
        MPI_Type_create_darray(world_size, rank, 2, array_of_gsizes, array_of_distribs,
                array_of_dargs, array_of_psize, MPI_ORDER_C, MPI_UNSIGNED_CHAR, &submatrix);
        MPI_Type_commit(&submatrix);
    }



    MPI_Barrier(MPI_COMM_WORLD);

    //////////////////ALLOCATE ARRAYS, CREATE DATATYPES/////////////////////

    //Create new column derived datatype
    MPI_Datatype column;
    //count, blocklength, stride, oldtype, *newtype
    MPI_Type_hvector(csize, 1, sizeof(unsigned char), MPI_UNSIGNED_CHAR, &column);
    MPI_Type_commit(&column);

    //Create new row derived datatype
    MPI_Datatype row;
    //count, blocklength, stride, oldtype, *newtype
    MPI_Type_hvector(rsize, 1, sizeof(unsigned char), MPI_UNSIGNED_CHAR, &row);
    MPI_Type_commit(&row);

    //allocate arrays and corner storage
    unsigned char *section;
    unsigned char *neighbors;
    //to use
    unsigned char *top;
    unsigned char *bot;
    unsigned char *left;
    unsigned char *right;
    //to send
    unsigned char *ttop;
    unsigned char *tbot;
    unsigned char *tleft;
    unsigned char *tright;
    //MALLOC!!
    section = (unsigned char*)malloc(rsize*csize*sizeof(unsigned char));
    neighbors = (unsigned char*)malloc(rsize*csize*sizeof(unsigned char));
    top = (unsigned char*)malloc(rsize*sizeof(unsigned char));
    bot = (unsigned char*)malloc(rsize*sizeof(unsigned char));
    left = (unsigned char*)malloc(csize*sizeof(unsigned char));
    right = (unsigned char*)malloc(csize*sizeof(unsigned char));
    ttop = (unsigned char*)malloc(rsize*sizeof(unsigned char));
    tbot = (unsigned char*)malloc(rsize*sizeof(unsigned char));
    tleft = (unsigned char*)malloc(csize*sizeof(unsigned char));
    tright = (unsigned char*)malloc(csize*sizeof(unsigned char));

    //corners
    unsigned char topleft,topright,botleft,botright; //used in calculations
    unsigned char ttopleft,ttopright,tbotleft,tbotright; 
    topleft = 255;
    topright = 255;
    botleft = 255;
    botright = 255;

    //used for animation, each process will put there own result in and then
    //each will send to process 1 which will add them up
    unsigned char* full_matrix;
    unsigned char* full_matrix_buffer;
    if (animation == 1)
    {
        int msize1 = rsize*ncols*csize*nrows;
        full_matrix = (unsigned char*)malloc(msize1*sizeof(unsigned char));
        full_matrix_buffer = (unsigned char*)malloc(msize1*sizeof(unsigned char));
        for (i=0; i<msize1; i++)
        {
            full_matrix[i] = 0;
            full_matrix_buffer[i] = 0;
        }
    }

    
    // printf("Rsize,Lsize,Fsize=%i %i %i,Csize,Lsize,Fsize=%i %i %i\n",rsize,local_width,field_width,csize,local_height,field_height);

    //Serial initialize vars
    int count = 0;
    if (world_size == 1 && run_type == 0)
    {
        for (i=0;i<csize;i++)
        {
            for (j=0;j<rsize;j++)
            {
                section[i*rsize + j] = 255;
                
                if (field_a[(i+1)*(2+rsize) + j + 1])
                {
                    section[i*rsize + j] = 0;
                    count += 1;
                }
                else
                {
                    section[i*rsize + j] = 255;
                }

                top[j] = 255;
                bot[j] = 255;
                ttop[j] = 255;
                tbot[j] = 255;
            }
            right[i] = 255;
            left[i] = 255;
            tright[i] = 255;
            tleft[i] = 255;
        }
        // printf("COUNT 4 = %d\n", count);
    }

    //Blocked/Checkered initializing variables
    else if (world_size > 1 && (run_type == 1 || run_type == 2))
    {
        //initialize
        for (i=0;i<csize;i++)
        {
            for (j=0;j<rsize;j++)
            {
                section[i*rsize + j] = 255;
                
                if (field_a[(i+1)*(2+rsize) + j + 1])
                {
                    section[i*rsize + j] = 0;
                    count += 1;
                }
                else
                {
                    section[i*rsize + j] = 255;
                }

                top[j] = 255;
                bot[j] = 255;
                ttop[j] = 255;
                tbot[j] = 255;
            }
            right[i] = 255;
            left[i] = 255;
            tright[i] = 255;
            tleft[i] = 255;
        }

        // MPI_Allreduce( &count, &total, 1, MPI_UNSIGNED_CHAR, MPI_SUM, MPI_COMM_WORLD );
        // if (rank == 0)
        // {
        //     printf("COUNT 4 = %d\n", total);
        // }
        
    }


    //header/footer for mpio writes
    char header1[15];
    header1[0] = 0x50;
    header1[1] = 0x35;
    header1[2] = 0x0a;
    header1[3] = 0x35;
    header1[4] = 0x31;
    header1[5] = 0x32;
    header1[6] = 0x20;
    header1[7] = 0x35;
    header1[8] = 0x31;
    header1[9] = 0x32;
    header1[10] = 0x0a;
    header1[11] = 0x32;
    header1[12] = 0x35;
    header1[13] = 0x35;
    header1[14] = 0x0a;

    char footer;
    footer = 0x0a;

    //make a frame or not?
    int create_frame = 0;

    //send to 
    int send_to;
    int receive_from;
    int info[5];
    info[2] = rank;
    info[3] = rsize;
    info[4] = csize;
    unsigned char info2[4];
    info2[0] = topleft;
    info2[1] = topright;
    info2[2] = botleft;
    info2[3] = botright;

    int current_count;
    int location;

    //Gameplay
    for (k=0;k<iterations;k++)
    {
        //Count buggies
        if (k%count_when==0)
        {
            if (verbose == 1)
            {
                current_count = rsize*csize-count_buggies(rsize,csize,section);
                MPI_Allreduce( &current_count, &total, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD );
                if (rank == 0)
                {
                    printf("Iteration=%5d,  Count=%6d\n", k,total);
                }
                ////corner debug
                // printf("WR,tl,tr,bl,br = %d %d %d %d %d\n", rank, topleft, topright, botleft, botright);
            }
        }

        
        //Write to file serially for comparison
        //If animation is requested
        if (animation == 1 && run_type == 0)
        {
            //Put smaller matrix part into larger matrix
            for (i=0; i<csize; i++)
            {
                for (j=0; j<rsize; j++)
                {
                    location = (my_row*csize*rsize*ncols + my_col*rsize + 
                                    i*rsize*ncols + j);

                    full_matrix_buffer[location] = section[i*rsize+j];
                }
                // if (rank == 0)
                // {
                //     printf("Location = %d\n", location);
                // }
            }

            //Gather matrix
            MPI_Reduce(full_matrix_buffer, full_matrix, rsize*ncols*csize*nrows, 
                MPI_UNSIGNED_CHAR, MPI_SUM, 0, MPI_COMM_WORLD);

            
            if (rank == 0 && run_type == 0)
            {
                write_matrix_to_pgm(k, rsize*ncols, csize*nrows, full_matrix);
            }
        }
        //mpio write pgm
        else if (animation == 1 && (run_type == 1 || run_type == 2))
        {
            //default is no frame
            create_frame = 0;
            for (ii=0;ii<20;ii++)
            {
                for (jj=0;jj<animation_list[ii][1]+1;jj++)
                {
                    // if (rank == 0)
                    // {
                    //     printf("a,ii,j,k= %i,%i,%i,%i, Frame? = %i\n",
                    //         animation_list[ii][0],ii,jj,k,(animation_list[ii][0]+jj-k)==0);
                    // }
                    if ((animation_list[ii][0] + jj - k) == 0)
                    {

                        create_frame = 1;
                        break;
                    }
                }
            }

            if (create_frame == 1)
            {
               //dynamic filename with leading zeroes for easy conversion to gif
                char buffer[128];
                snprintf(buffer, sizeof(char)*128, "Animation/frame%04d.pgm", k);

                /* open the file, and set the view */
                MPI_File file;
                MPI_File_open(MPI_COMM_WORLD, buffer, 
                              MPI_MODE_CREATE|MPI_MODE_WRONLY,
                              MPI_INFO_NULL, &file);

                MPI_File_set_view(file, 0,  MPI_UNSIGNED_CHAR, MPI_UNSIGNED_CHAR, 
                                       "native", MPI_INFO_NULL);

                //write header
                MPI_File_write(file, &header1, 15, MPI_CHAR, MPI_STATUS_IGNORE);

                //write matrix
                MPI_File_set_view(file, 15,  MPI_UNSIGNED_CHAR, submatrix, 
                                       "native", MPI_INFO_NULL);

                MPI_File_write_all(file, section, rsize*csize, 
                        MPI_UNSIGNED_CHAR, MPI_STATUS_IGNORE);

                //write footer (trailing newline)
                MPI_File_set_view(file, 15+rsize*ncols*csize*nrows,  
                        MPI_UNSIGNED_CHAR, MPI_UNSIGNED_CHAR, 
                        "native", MPI_INFO_NULL);

                MPI_File_write(file, &footer, 1, MPI_CHAR, MPI_STATUS_IGNORE); 
            } 
        }


        // BLOCKED COMMUNITATION //
        if (run_type == 1)
        {
            //change bot (send top) to account for middle area
            //alternate to avoid locking
            send_to = rank - 1;
            receive_from = rank + 1;

            //figure out what to send
            //top and bottom
            for (i=0;i<rsize;i++)
            {
                ttop[i] = section[i];
                tbot[i] = section[rsize*(csize-1)+i];
            }

            //left n right
            for (i=0;i<csize;i++)
            {
                tleft[i] = section[0 + rsize*i];
                tright[i] = section[rsize-1 + rsize*i];
            }

            //send top, receive bot
            if (rank%2==0)
            {
                if (send_to<world_size && send_to>=0)
                {
                    MPI_Send(ttop, 1, row, send_to, 0, MPI_COMM_WORLD);
                }
                if (receive_from<world_size && receive_from >= 0)
                {
                    MPI_Recv(bot, 1, row, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }
            }
            else if (rank%2==1)
            {

                if (receive_from<world_size && receive_from >= 0)
                {
                    MPI_Recv(bot, 1, row, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }
                if (send_to<world_size && send_to>=0)
                {
                    MPI_Send(ttop, 1, row, send_to, 0, MPI_COMM_WORLD);
                }
            }

            //change top to account for middle area
            //alternate to avoid locking
            send_to = rank + 1;
            receive_from = rank - 1;

            //send bot, receive top
            if (rank%2==0)
            {
                // printf("%d, %d, %d\n", rank, send_to, receive_from);
                if (send_to<world_size && send_to>=0)
                {
                    MPI_Send(tbot, 1, row, send_to, 0, MPI_COMM_WORLD);
                }
                
                if (receive_from<world_size && receive_from >= 0)
                {
                    MPI_Recv(top, 1, row, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }
            }
            else if (rank%2==1)
            {
                // printf("%d, %d, %d\n", rank, send_to, receive_from);
                if (receive_from<world_size && receive_from >= 0)
                {
                    //*data,count,type,from,tag,comm,mpi_status
                    MPI_Recv(top, 1, row, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }

                if (send_to<world_size && send_to>=0)
                {
                    //*data,count,type,to,tag,comm
                    MPI_Send(tbot, 1, row, send_to, 0, MPI_COMM_WORLD);
                }
            }
        }

        // CHECKERED COMMUNITATION //
        else if (run_type == 2)
        {
            //figure out what to send
            //top and bottom
            for (i=0;i<rsize;i++)
            {
                ttop[i] = section[i];
                tbot[i] = section[rsize*(csize-1)+i];
            }

            //left n right
            for (i=0;i<csize;i++)
            {
                tleft[i] = section[0 + rsize*i];
                tright[i] = section[rsize-1 + rsize*i];
            }

            //corners
            ttopleft = tleft[0];
            tbotleft = tleft[csize-1];
            ttopright = tright[0];
            tbotright = tright[csize-1];

            //Send top, receive bot
            send_to = rank - nrows;
            receive_from = rank + nrows;
            if (rank%2==0)
            {
                if (send_to<world_size && send_to>=0)
                {
                    MPI_Send(ttop, 1, row, send_to, 0, MPI_COMM_WORLD);
                }
                if (receive_from<world_size && receive_from>=0)
                {
                    MPI_Recv(bot, 1, row, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }
            }
            else if (rank%2==1)
            {

                if (receive_from<world_size && receive_from>=0)
                {
                    MPI_Recv(bot, 1, row, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }
                if (send_to<world_size && send_to>=0)
                {
                    MPI_Send(ttop, 1, row, send_to, 0, MPI_COMM_WORLD);
                }
            }

            //Send bot, receive top
            send_to = rank + nrows;
            receive_from = rank - nrows;
            if (rank%2==0)
            {
                if (send_to<world_size && send_to>=0)
                {
                    MPI_Send(tbot, 1, row, send_to, 0, MPI_COMM_WORLD);
                }
                if (receive_from<world_size && receive_from>=0)
                {
                    MPI_Recv(top, 1, row, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }
            }
            else if (rank%2==1)
            {

                if (receive_from<world_size && receive_from>=0)
                {
                    MPI_Recv(top, 1, row, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }
                if (send_to<world_size && send_to>=0)
                {
                    MPI_Send(tbot, 1, row, send_to, 0, MPI_COMM_WORLD);
                }
            }

            //Send left, receive right
            send_to = rank - 1;
            receive_from = rank + 1;

            if (rank%2==0)
            {
                if (send_to<world_size && send_to>=0 && send_to/nrows==my_row)
                {
                    MPI_Send(tleft, 1, column, send_to, 0, MPI_COMM_WORLD);
                }
                if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row)
                {
                    MPI_Recv(right, 1, column, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }
            }
            else if (rank%2==1)
            {
                if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row)
                {
                    MPI_Recv(right, 1, column, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }
                if (send_to<world_size && send_to>=0 && send_to/nrows==my_row)
                {
                    MPI_Send(tleft, 1, column, send_to, 0, MPI_COMM_WORLD);
                }
            }

            //Send right, receive left
            send_to = rank + 1;
            receive_from = rank - 1;

            if (rank%2==0)
            {
                if (send_to<world_size && send_to>=0 && send_to/nrows==my_row)
                {
                    MPI_Send(tright, 1, row, send_to, 0, MPI_COMM_WORLD);
                }
                if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row)
                {
                    MPI_Recv(left, 1, row, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }
            }
            else if (rank%2==1)
            {
                if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row)
                {
                    MPI_Recv(left, 1, row, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }
                if (send_to<world_size && send_to>=0 && send_to/nrows==my_row)
                {
                    MPI_Send(tright, 1, row, send_to, 0, MPI_COMM_WORLD);
                }
            }

            //Send topright, receive botleft
            send_to = rank - ncols + 1;
            receive_from = rank + ncols - 1;

            if (rank%2==0)
            {
                if (send_to<world_size && send_to>=0 && send_to/nrows==my_row-1)
                {
                    MPI_Send(&ttopright, 1, MPI_UNSIGNED_CHAR, send_to, 0, MPI_COMM_WORLD);
                }
                if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row+1)
                {
                    MPI_Recv(&botleft, 1, MPI_UNSIGNED_CHAR, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }
            }
            else if (rank%2==1)
            {
                if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row+1)
                {
                    MPI_Recv(&botleft, 1, MPI_UNSIGNED_CHAR, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }
                if (send_to<world_size && send_to>=0 && send_to/nrows==my_row-1)
                {
                    MPI_Send(&ttopright, 1, MPI_UNSIGNED_CHAR, send_to, 0, MPI_COMM_WORLD);
                }
            }

            //Send topleft, receive botright
            send_to = rank - ncols - 1;
            receive_from = rank + ncols + 1;

            if (rank%2==0)
            {
                if (send_to<world_size && send_to>=0 && send_to/nrows==my_row-1)
                {
                    MPI_Send(&ttopleft, 1, MPI_UNSIGNED_CHAR, send_to, 0, MPI_COMM_WORLD);
                }
                if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row+1)
                {
                    MPI_Recv(&botright, 1, MPI_UNSIGNED_CHAR, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }
            }
            else if (rank%2==1)
            {
                if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row+1)
                {
                    MPI_Recv(&botright, 1, MPI_UNSIGNED_CHAR, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }
                if (send_to<world_size && send_to>=0 && send_to/nrows==my_row-1)
                {
                    MPI_Send(&ttopleft, 1, MPI_UNSIGNED_CHAR, send_to, 0, MPI_COMM_WORLD);
                }
            }

            //Send botleft, receive topright
            send_to = rank + ncols - 1;
            receive_from = rank - ncols + 1;

            if (rank%2==0)
            {
                if (send_to<world_size && send_to>=0 && send_to/nrows==my_row+1)
                {
                    MPI_Send(&tbotleft, 1, MPI_UNSIGNED_CHAR, send_to, 0, MPI_COMM_WORLD);
                }
                if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row-1)
                {
                    MPI_Recv(&topright, 1, MPI_UNSIGNED_CHAR, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }
            }
            else if (rank%2==1)
            {
                if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row-1)
                {
                    MPI_Recv(&topright, 1, MPI_UNSIGNED_CHAR, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }
                if (send_to<world_size && send_to>=0 && send_to/nrows==my_row+1)
                {
                    MPI_Send(&tbotleft, 1, MPI_UNSIGNED_CHAR, send_to, 0, MPI_COMM_WORLD);
                }
            }

            //Send botright, receive topleft
            send_to = rank + ncols + 1;
            receive_from = rank - ncols - 1;

            if (rank%2==0)
            {
                if (send_to<world_size && send_to>=0 && send_to/nrows==my_row+1)
                {
                    MPI_Send(&tbotright, 1, MPI_UNSIGNED_CHAR, send_to, 0, MPI_COMM_WORLD);
                }
                if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row-1)
                {
                    MPI_Recv(&topleft, 1, MPI_UNSIGNED_CHAR, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }
            }
            else if (rank%2==1)
            {
                if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row-1)
                {
                    MPI_Recv(&topleft, 1, MPI_UNSIGNED_CHAR, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }
                if (send_to<world_size && send_to>=0 && send_to/nrows==my_row+1)
                {
                    MPI_Send(&tbotright, 1, MPI_UNSIGNED_CHAR, send_to, 0, MPI_COMM_WORLD);
                }
            }


            info2[0] = topleft;
            info2[1] = topright;
            info2[2] = botleft;
            info2[3] = botright;

        }
 
        // if (rank == 1){
        //     print_matrix(rsize, 1, top);
        //     print_matrix(rsize, csize, section);
        //     print_matrix(rsize, 1, bot);
        //     printf("\n");
        // }
        // printf("wr=%d,iteration=%d,maxval=%d, 11\n", rank, k,(csize-1)*rsize-1+rsize);
        


        /////////// CELL UPDATES /////////////////
        //count neighbor
        for (i=0;i<csize;i++)
        {
            for (j=0; j<rsize; j++)
            {
                info[0] = i;
                info[1] = j;
                neighbors[i*rsize+j] = count_neighbors(info, info2, section, 
                                    top, bot, left, right);
                // printf("%i",neighbors[i*rsize+j]);
            }
            // printf("\n");
        }

        //update cells
        current_count = 0;
        for (i=0;i<csize;i++)
        {
            for (j=0; j<rsize; j++)
            {
                //cell currently alive
                if (section[i*rsize+j] == 0)
                {
                    //2 or 3 neighbors lives, else die
                    if (neighbors[i*rsize+j] < 2 || 
                        neighbors[i*rsize+j] > 3)
                    {
                        section[i*rsize+j] = 255;
                    }
                }
                else
                {
                    //Exactly 3 neighbors spawns new life
                    if (neighbors[i*rsize+j] == 3)
                    {
                        section[i*rsize+j] = 0;
                    }
                }
            }
        }
    }

    MPI_Barrier(MPI_COMM_WORLD);
    sleep(0.5);
    //free malloc stuff
    if( field_a != NULL ) free( field_a );
    if( field_b != NULL ) free( field_b );
    free(section);
    free(neighbors);
    free(top);
    free(bot);
    free(left);
    free(right);

    MPI_Finalize();
    exit (0);
}    
예제 #15
0
int main(int argc, char **argv)
{
    MPI_Datatype newtype;
    int i, ndims, array_of_gsizes[3], array_of_distribs[3];
    int order, nprocs, j, len;
    int array_of_dargs[3], array_of_psizes[3];
    int *readbuf, *writebuf, mynod, *tmpbuf, array_size;
    MPI_Count bufcount;
    char *filename;
    int errs = 0, toterrs;
    MPI_File fh;
    MPI_Status status;
    MPI_Request request;
    MPI_Info info = MPI_INFO_NULL;
    int errcode;

    MPI_Init(&argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &mynod);
    MPI_Comm_size(MPI_COMM_WORLD, &nprocs);

    /* process 0 broadcasts the file name to other processes */
    if (!mynod) {
        filename = "testfile";
        len = strlen(filename);
        MPI_Bcast(&len, 1, MPI_INT, 0, MPI_COMM_WORLD);
        MPI_Bcast(filename, len + 1, MPI_CHAR, 0, MPI_COMM_WORLD);
    }
    else {
        MPI_Bcast(&len, 1, MPI_INT, 0, MPI_COMM_WORLD);
        filename = (char *)malloc(len + 1);
        MPI_Bcast(filename, len + 1, MPI_CHAR, 0, MPI_COMM_WORLD);
    }


    /* create the distributed array filetype */
    ndims = 3;
    order = MPI_ORDER_C;

    array_of_gsizes[0] = 32;
    array_of_gsizes[1] = 32;
    array_of_gsizes[2] = 32;

    array_of_distribs[0] = MPI_DISTRIBUTE_BLOCK;
    array_of_distribs[1] = MPI_DISTRIBUTE_BLOCK;
    array_of_distribs[2] = MPI_DISTRIBUTE_BLOCK;

    array_of_dargs[0] = MPI_DISTRIBUTE_DFLT_DARG;
    array_of_dargs[1] = MPI_DISTRIBUTE_DFLT_DARG;
    array_of_dargs[2] = MPI_DISTRIBUTE_DFLT_DARG;

    for (i = 0; i < ndims; i++) array_of_psizes[i] = 0;
    MPI_Dims_create(nprocs, ndims, array_of_psizes);

    MPI_Type_create_darray(nprocs, mynod, ndims, array_of_gsizes,
                           array_of_distribs, array_of_dargs,
                           array_of_psizes, order, MPI_INT, &newtype);
    MPI_Type_commit(&newtype);

    /* initialize writebuf */

    MPI_Type_size_x(newtype, &bufcount);
    bufcount = bufcount / sizeof(int);
    writebuf = (int *)malloc(bufcount * sizeof(int));
    for (i = 0; i < bufcount; i++) writebuf[i] = 1;

    array_size = array_of_gsizes[0] * array_of_gsizes[1] * array_of_gsizes[2];
    tmpbuf = (int *) calloc(array_size, sizeof(int));
    MPI_Irecv(tmpbuf, 1, newtype, mynod, 10, MPI_COMM_WORLD, &request);
    MPI_Send(writebuf, bufcount, MPI_INT, mynod, 10, MPI_COMM_WORLD);
    MPI_Wait(&request, &status);

    j = 0;
    for (i = 0; i < array_size; i++)
        if (tmpbuf[i]) {
            writebuf[j] = i;
            j++;
        }
    free(tmpbuf);

    if (j != bufcount) {
        fprintf(stderr, "Error in initializing writebuf on process %d\n",
                mynod);
        MPI_Abort(MPI_COMM_WORLD, 1);
    }
    /* end of initialization */

    /* write the array to the file */
    errcode = MPI_File_open(MPI_COMM_WORLD, filename,
                            MPI_MODE_CREATE | MPI_MODE_RDWR, info, &fh);
    if (errcode != MPI_SUCCESS) handle_error(errcode, "MPI_File_open");

    errcode = MPI_File_set_view(fh, 0, MPI_INT, newtype, "native", info);
    if (errcode != MPI_SUCCESS) handle_error(errcode, "MPI_File_set_view");

    errcode = MPI_File_iwrite_all(fh, writebuf, bufcount, MPI_INT, &request);
    if (errcode != MPI_SUCCESS) handle_error(errcode, "MPI_File_iwrite_all");
    MPI_Wait(&request, &status);

    errcode = MPI_File_close(&fh);
    if (errcode != MPI_SUCCESS) handle_error(errcode, "MPI_File_close");

    if (!mynod) {
        /* wkl suggests potential for false " No Errors" if both read
         * and write use the same file view */
        /* solution: rank 0 reads entire file and checks write values */
        errcode = MPI_File_open(MPI_COMM_SELF, filename, MPI_MODE_RDONLY, info,
                                &fh);
        if (errcode != MPI_SUCCESS) handle_error(errcode, "MPI_File_open");

        readbuf = (int *)malloc(array_size * sizeof(int));
        errcode = MPI_File_read(fh, readbuf, array_size, MPI_INT, &status);
        if (errcode != MPI_SUCCESS) handle_error(errcode, "MPI_File_read");

        errcode = MPI_File_close(&fh);
        if (errcode != MPI_SUCCESS) handle_error(errcode, "MPI_File_close");

        for (i = 0; i < array_size; i++)
            if (readbuf[i] != i) {
                errs++;
                fprintf(stderr, "Error: write integer %d but read %d\n",
                        i, readbuf[i]);
                break;
            }
        free(readbuf);
    }
    MPI_Barrier(MPI_COMM_WORLD);

    /* now read it back */
    readbuf = (int *)malloc(bufcount * sizeof(int));
    errcode = MPI_File_open(MPI_COMM_WORLD, filename,
                            MPI_MODE_CREATE | MPI_MODE_RDWR, info, &fh);
    if (errcode != MPI_SUCCESS) handle_error(errcode, "MPI_File_open");

    errcode = MPI_File_set_view(fh, 0, MPI_INT, newtype, "native", info);
    if (errcode != MPI_SUCCESS) handle_error(errcode, "MPI_File_set_view");
    errcode = MPI_File_iread_all(fh, readbuf, bufcount, MPI_INT, &request);
    if (errcode != MPI_SUCCESS) handle_error(errcode, "MPI_File_iread_all");
    MPI_Wait(&request, &status);
    errcode = MPI_File_close(&fh);
    if (errcode != MPI_SUCCESS) handle_error(errcode, "MPI_File_close");

    /* check the data read */
    for (i = 0; i < bufcount; i++) {
        if (readbuf[i] != writebuf[i]) {
            errs++;
            fprintf(stderr, "Process %d, readbuf %d, writebuf %d, i %d\n",
                    mynod, readbuf[i], writebuf[i], i);
        }
    }

    MPI_Allreduce(&errs, &toterrs, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
    if (mynod == 0) {
        if (toterrs > 0) {
            fprintf(stderr, "Found %d errors\n", toterrs);
        }
        else {
            fprintf(stdout, " No Errors\n");
        }
    }

    MPI_Type_free(&newtype);
    free(readbuf);
    free(writebuf);
    if (mynod) free(filename);

    MPI_Finalize();
    return 0;
}
예제 #16
0
/* darray_2d_test1()
 *
 * Performs a sequence of tests building darrays with single-element
 * blocks, running through all the various positions that the element might
 * come from.
 *
 * Returns the number of errors encountered.
 */
int darray_2d_c_test1(void)
{
    MPI_Datatype darray;
    int array[9];               /* initialized below */
    int array_size[2] = { 3, 3 };
    int array_distrib[2] = { MPI_DISTRIBUTE_BLOCK, MPI_DISTRIBUTE_BLOCK };
    int array_dargs[2] = { MPI_DISTRIBUTE_DFLT_DARG, MPI_DISTRIBUTE_DFLT_DARG };
    int array_psizes[2] = { 3, 3 };

    int i, rank, err, errs = 0, sizeoftype;

    /* pretend we are each rank, one at a time */
    for (rank = 0; rank < 9; rank++) {
        /* set up buffer */
        for (i = 0; i < 9; i++) {
            array[i] = i;
        }

        /* set up type */
        err = MPI_Type_create_darray(9, /* size */
                                     rank, 2,   /* dims */
                                     array_size,
                                     array_distrib,
                                     array_dargs, array_psizes, MPI_ORDER_C, MPI_INT, &darray);
        if (err != MPI_SUCCESS) {
            errs++;
            if (verbose) {
                fprintf(stderr,
                        "error in MPI_Type_create_darray call; aborting after %d errors\n", errs);
            }
            MTestPrintError(err);
            return errs;
        }

        MPI_Type_commit(&darray);

        MPI_Type_size(darray, &sizeoftype);
        if (sizeoftype != sizeof(int)) {
            errs++;
            if (verbose)
                fprintf(stderr, "size of type = %d; should be %d\n", sizeoftype, (int) sizeof(int));
            return errs;
        }

        err = pack_and_unpack((char *) array, 1, darray, 9 * sizeof(int));

        for (i = 0; i < 9; i++) {

            if ((i == rank) && (array[i] != rank)) {
                errs++;
                if (verbose)
                    fprintf(stderr, "[2d array rank=%d]:array[%d] = %d; should be %d\n",
                            rank, i, array[i], rank);
            }
            else if ((i != rank) && (array[i] != 0)) {
                errs++;
                if (verbose)
                    fprintf(stderr, "[2d array rank=%d]:array[%d] = %d; should be %d\n",
                            rank, i, array[i], 0);
            }
        }
        MPI_Type_free(&darray);
    }

    return errs;
}
예제 #17
0
int main(int argc, char **argv)
{
    MPI_Datatype newtype;
    int i, ndims, array_of_gsizes[3], array_of_distribs[3];
    int order, nprocs, len, flag, err;
    int array_of_dargs[3], array_of_psizes[3];
    int *readbuf, *writebuf, bufcount, mynod;
    char filename[1024];
    MPI_File fh;
    MPI_Status status;
    MPI_Aint size_with_aint;
    MPI_Offset size_with_offset;

    MPI_Init(&argc,&argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &mynod);
    MPI_Comm_size(MPI_COMM_WORLD, &nprocs);

/* process 0 takes the file name as a command-line argument and 
   broadcasts it to other processes */
    if (!mynod) {
	i = 1;
	while ((i < argc) && strcmp("-fname", *argv)) {
	    i++;
	    argv++;
	}
	if (i >= argc) {
	    printf("\n*#  Usage: large_array -fname filename\n\n");
	    MPI_Abort(MPI_COMM_WORLD, 1);
	}
	argv++;
	len = strlen(*argv);
	strcpy(filename, *argv);
	MPI_Bcast(&len, 1, MPI_INT, 0, MPI_COMM_WORLD);
	MPI_Bcast(filename, len+1, MPI_CHAR, 0, MPI_COMM_WORLD);
	printf("This program creates a 4 Gbyte file. Don't run it if you don't have that much disk space!\n");
    }
    else {
	MPI_Bcast(&len, 1, MPI_INT, 0, MPI_COMM_WORLD);
	MPI_Bcast(filename, len+1, MPI_CHAR, 0, MPI_COMM_WORLD);
    }

/* create the distributed array filetype */
    ndims = 3;
    order = MPI_ORDER_C;

    array_of_gsizes[0] = 1024;
    array_of_gsizes[1] = 1024;
    array_of_gsizes[2] = 4*1024/sizeof(int);

    array_of_distribs[0] = MPI_DISTRIBUTE_BLOCK;
    array_of_distribs[1] = MPI_DISTRIBUTE_BLOCK;
    array_of_distribs[2] = MPI_DISTRIBUTE_BLOCK;

    array_of_dargs[0] = MPI_DISTRIBUTE_DFLT_DARG;
    array_of_dargs[1] = MPI_DISTRIBUTE_DFLT_DARG;
    array_of_dargs[2] = MPI_DISTRIBUTE_DFLT_DARG;

    for (i=0; i<ndims; i++) array_of_psizes[i] = 0;
    MPI_Dims_create(nprocs, ndims, array_of_psizes);

/* check if MPI_Aint is large enough for size of global array. 
   if not, complain. */

    size_with_aint = sizeof(int);
    for (i=0; i<ndims; i++) size_with_aint *= array_of_gsizes[i];
    size_with_offset = sizeof(int);
    for (i=0; i<ndims; i++) size_with_offset *= array_of_gsizes[i];
    if (size_with_aint != size_with_offset) {
        printf("Can't use an array of this size unless the MPI implementation defines a 64-bit MPI_Aint\n");
        MPI_Abort(MPI_COMM_WORLD, 1);
    }

    MPI_Type_create_darray(nprocs, mynod, ndims, array_of_gsizes, 
			   array_of_distribs, array_of_dargs,
			   array_of_psizes, order, MPI_INT, &newtype);
    MPI_Type_commit(&newtype);

/* initialize writebuf */

    MPI_Type_size(newtype, &bufcount);
    bufcount = bufcount/sizeof(int);
    writebuf = (int *) malloc(bufcount * sizeof(int));
    if (!writebuf) printf("Process %d, not enough memory for writebuf\n", mynod);
    for (i=0; i<bufcount; i++) writebuf[i] = mynod*1024 + i;

    /* write the array to the file */
    MPI_File_open(MPI_COMM_WORLD, filename, MPI_MODE_CREATE | MPI_MODE_RDWR, 
                  MPI_INFO_NULL, &fh);
    MPI_File_set_view(fh, 0, MPI_INT, newtype, "native", MPI_INFO_NULL);
    MPI_File_write_all(fh, writebuf, bufcount, MPI_INT, &status);
    MPI_File_close(&fh);

    free(writebuf);

    /* now read it back */
    readbuf = (int *) calloc(bufcount, sizeof(int));
    if (!readbuf) printf("Process %d, not enough memory for readbuf\n", mynod);

    MPI_File_open(MPI_COMM_WORLD, filename, MPI_MODE_CREATE | MPI_MODE_RDWR, 
                  MPI_INFO_NULL, &fh);
    MPI_File_set_view(fh, 0, MPI_INT, newtype, "native", MPI_INFO_NULL);
    MPI_File_read_all(fh, readbuf, bufcount, MPI_INT, &status);
    MPI_File_close(&fh);

    /* check the data read */
    flag = 0;
    for (i=0; i<bufcount; i++) 
	if (readbuf[i] != mynod*1024 + i) {
	    printf("Process %d, readbuf=%d, writebuf=%d\n", mynod, readbuf[i], mynod*1024 + i);
            flag = 1;
	}
    if (!flag) printf("Process %d: data read back is correct\n", mynod);

    MPI_Type_free(&newtype);
    free(readbuf);

    MPI_Barrier(MPI_COMM_WORLD);
    if (!mynod) {
	err = MPI_File_delete(filename, MPI_INFO_NULL);
	if (err == MPI_SUCCESS) printf("file deleted\n");
    }

    MPI_Finalize();
    return 0;
}
예제 #18
0
/* Definitions of Fortran Wrapper routines */
EXPORT_MPI_API void FORTRAN_API mpi_type_create_darray_(MPI_Fint *size, MPI_Fint *rank, MPI_Fint *ndims,
                             MPI_Fint *array_of_gsizes,
			     MPI_Fint *array_of_distribs,
			     MPI_Fint *array_of_dargs,
			     MPI_Fint *array_of_psizes, MPI_Fint *order, 
			     MPI_Fint *oldtype, MPI_Fint *newtype, 
			     MPI_Fint *__ierr )
{
    int i;
    int *l_array_of_gsizes;
    int local_l_array_of_gsizes[MPIR_USE_LOCAL_ARRAY];
    int *l_array_of_distribs;
    int local_l_array_of_distribs[MPIR_USE_LOCAL_ARRAY];
    int *l_array_of_dargs;
    int local_l_array_of_dargs[MPIR_USE_LOCAL_ARRAY];
    int *l_array_of_psizes;
    int local_l_array_of_psizes[MPIR_USE_LOCAL_ARRAY];
    MPI_Datatype oldtype_c, newtype_c;

    oldtype_c = MPI_Type_f2c(*oldtype);

    if ((int)*ndims > 0) {
	if ((int)*ndims > MPIR_USE_LOCAL_ARRAY) {
	    MPIR_FALLOC(l_array_of_gsizes,(int *) MALLOC( *ndims * sizeof(int) ), 
			MPIR_COMM_WORLD, MPI_ERR_EXHAUSTED,
			"MPI_TYPE_CREATE_DARRAY" );

	    MPIR_FALLOC(l_array_of_distribs,(int *) MALLOC( *ndims * sizeof(int) ), 
			MPIR_COMM_WORLD, MPI_ERR_EXHAUSTED,
			"MPI_TYPE_CREATE_DARRAY" );

	    MPIR_FALLOC(l_array_of_dargs,(int *) MALLOC( *ndims * sizeof(int) ), 
			MPIR_COMM_WORLD, MPI_ERR_EXHAUSTED,
			"MPI_TYPE_CREATE_DARRAY" );

	    MPIR_FALLOC(l_array_of_psizes,(int *) MALLOC( *ndims * sizeof(int) ), 
			MPIR_COMM_WORLD, MPI_ERR_EXHAUSTED,
			"MPI_TYPE_CREATE_DARRAY" );
	}
	else {
	    l_array_of_gsizes = local_l_array_of_gsizes;
	    l_array_of_distribs = local_l_array_of_distribs;
	    l_array_of_dargs = local_l_array_of_dargs;
	    l_array_of_psizes = local_l_array_of_psizes;
	}

	for (i=0; i<(int)*ndims; i++) {
	    l_array_of_gsizes[i] = (int)array_of_gsizes[i];
	    l_array_of_distribs[i] = (int)array_of_distribs[i];
	    l_array_of_dargs[i] = (int)array_of_dargs[i];
	    l_array_of_psizes[i] = (int)array_of_psizes[i];
	}
    }

    *__ierr = MPI_Type_create_darray((int)*size, (int)*rank, (int)*ndims,
				     l_array_of_gsizes, l_array_of_distribs,
				     l_array_of_dargs, l_array_of_psizes,
				     (int)*order, oldtype_c, &newtype_c);

    if ((int)*ndims > MPIR_USE_LOCAL_ARRAY) {
	FREE( l_array_of_gsizes );
	FREE( l_array_of_distribs );
	FREE( l_array_of_dargs );
	FREE( l_array_of_psizes );
    }

    *newtype = MPI_Type_c2f(newtype_c);
}
예제 #19
0
파일: mpi.c 프로젝트: atchekho/harmpi
//when called with stage = 0, initializes rdump_buffer
//when called with stage = 1, initializes dump_buffer and gdump_buffer
void initialize_parallel_write(int stage)
{
#if MPI && DO_PARALLEL_WRITE
    size_t nvars_dump, nvars_gdump, nvars_gdump2, nvars_rdump, nvars_fdump;
    size_t max_buffer_size_bytes, dump_buffer_size_bytes, gdump_buffer_size_bytes, gdump2_buffer_size_bytes, rdump_buffer_size_bytes, fdump_buffer_size_bytes;
    int array_of_distribs[NDIM], array_of_dargs[NDIM];
    int dim;
    int is_dry_run = 1;


    //figure out the amount of memory needed to hold each dump type
    if (stage) {
      //check if various dumps fit into the above-allocated buffer
      nvars_dump = dump(0, is_dry_run);
      nvars_gdump = gdump(is_dry_run);
      nvars_gdump2 = gdump2(is_dry_run);
      nvars_fdump = NIMG;
      
      dump_buffer_size = nvars_dump*N1*N2*N3;
      gdump_buffer_size = nvars_gdump*N1*N2*N3;
      gdump2_buffer_size = nvars_gdump2*N1*N2*N3;
      fdump_buffer_size = nvars_fdump*N1*N2*N3;
      
      dump_buffer_size_bytes = dump_buffer_size*sizeof(dumptype);
      gdump_buffer_size_bytes = gdump_buffer_size*sizeof(gdumptype);
      gdump2_buffer_size_bytes = gdump2_buffer_size*sizeof(gdump2type);
      fdump_buffer_size_bytes = fdump_buffer_size*sizeof(fdumptype);
      if (i_am_the_master) {
        printf("dump   size = %.2lg GB\n", dump_buffer_size_bytes*mpi_dims[1]*mpi_dims[2]*mpi_dims[3]/(1024.*1024.*1024.));
        printf("gdump  size = %.2lg GB\n", gdump_buffer_size_bytes*mpi_dims[1]*mpi_dims[2]*mpi_dims[3]/(1024.*1024.*1024.));
        printf("gdump2 size = %.2lg GB\n", gdump2_buffer_size_bytes*mpi_dims[1]*mpi_dims[2]*mpi_dims[3]/(1024.*1024.*1024.));
        printf("fdump  size = %.2lg GB\n", fdump_buffer_size_bytes*mpi_dims[1]*mpi_dims[2]*mpi_dims[3]/(1024.*1024.*1024.));
      }
      max_buffer_size_bytes = MY_MAX(dump_buffer_size_bytes,gdump_buffer_size_bytes);
      max_buffer_size_bytes = MY_MAX(max_buffer_size_bytes,gdump2_buffer_size_bytes);
      max_buffer_size_bytes = MY_MAX(max_buffer_size_bytes,fdump_buffer_size_bytes);
    }
    else {
      nvars_rdump = NPR;
      rdump_buffer_size = nvars_rdump*N1*N2*N3;
      rdump_buffer_size_bytes = rdump_buffer_size*sizeof(rdumptype);
      max_buffer_size_bytes = rdump_buffer_size_bytes;
    }
  
    //if already allocated, free memory
    if(mpi_file_buffer) {
      free(mpi_file_buffer);
      mpi_file_buffer = NULL;
    }
    //and then allocate anew to make sure the largest of the dumps fits byte-wise
    mpi_file_buffer = (void*) malloc(max_buffer_size_bytes);
    if (!mpi_file_buffer) {
      fprintf(stderr,"Rank %d could not allocate %ld bytes for holding mpi_file_buffer", mpi_rank, max_buffer_size_bytes);
      MPI_Abort(MPI_COMM_WORLD, errno);
    }
    //all arrays now will share the same memory
    //this possible because different types of dumps are written out in sequence
    dump_buffer = (dumptype*)mpi_file_buffer;
    gdump_buffer = (gdumptype*)mpi_file_buffer;
    gdump2_buffer = (gdump2type*)mpi_file_buffer;
    rdump_buffer = (rdumptype*)mpi_file_buffer;
    fdump_buffer = (fdumptype*)failimage; //can write directly since already contiguous array (because no ghost cells)

    //create MPI file types for each of dump types
  
    //initialize MPI arrays
    for (dim=0; dim<NDIM; dim++) {
        array_of_distribs[dim] = MPI_DISTRIBUTE_BLOCK;
        array_of_dargs[dim] = MPI_DISTRIBUTE_DFLT_DARG;
    }

    if (stage) {
      //create new cell and file types for GDUMP
      MPI_Type_contiguous(nvars_gdump, MPI_GDUMP_TYPE, &gdump_cell_type);
      MPI_Type_commit(&gdump_cell_type);
      MPI_Type_create_darray(mpi_numtasks, mpi_rank, 3,
                             mpi_ntot+1, array_of_distribs+1,
                             array_of_dargs+1, mpi_dims+1, MPI_ORDER_C,
                             gdump_cell_type, &gdump_file_type);
      MPI_Type_commit(&gdump_file_type);

      //create new cell and file types for GDUMP2
      MPI_Type_contiguous(nvars_gdump2, MPI_GDUMP2_TYPE, &gdump2_cell_type);
      MPI_Type_commit(&gdump2_cell_type);
      MPI_Type_create_darray(mpi_numtasks, mpi_rank, 3,
                             mpi_ntot+1, array_of_distribs+1,
                             array_of_dargs+1, mpi_dims+1, MPI_ORDER_C,
                             gdump2_cell_type, &gdump2_file_type);
      MPI_Type_commit(&gdump2_file_type);

      //create new cell and types for DUMP
      MPI_Type_contiguous(nvars_dump, MPI_DUMP_TYPE, &dump_cell_type);
      MPI_Type_commit(&dump_cell_type);
      MPI_Type_create_darray(mpi_numtasks, mpi_rank, 3,
                             mpi_ntot+1, array_of_distribs+1,
                             array_of_dargs+1, mpi_dims+1, MPI_ORDER_C,
                             dump_cell_type, &dump_file_type);
      MPI_Type_commit(&dump_file_type);

      //create new cell and types for FDUMP
      MPI_Type_contiguous(nvars_fdump, MPI_FDUMP_TYPE, &fdump_cell_type);
      MPI_Type_commit(&fdump_cell_type);
      MPI_Type_create_darray(mpi_numtasks, mpi_rank, 3,
                             mpi_ntot+1, array_of_distribs+1,
                             array_of_dargs+1, mpi_dims+1, MPI_ORDER_C,
                             fdump_cell_type, &fdump_file_type);
      MPI_Type_commit(&fdump_file_type);

      //if(i_am_the_master) fprintf(stderr, "dump_buffer_size = %ld bytes, nvars_dump = %ld\n", (long int)dump_buffer_size, (long int)nvars_dump);
      //if(i_am_the_master) fprintf(stderr, "gdump_buffer_size = %ld bytes, nvars_gdump = %ld\n", (long int)gdump_buffer_size, (long int)nvars_gdump);

    }
    else {
      //create new cell and types for RDUMP
      MPI_Type_contiguous(nvars_rdump, MPI_RDUMP_TYPE, &rdump_cell_type);
      MPI_Type_commit(&rdump_cell_type);
      MPI_Type_create_darray(mpi_numtasks, mpi_rank, 3,
                             mpi_ntot+1, array_of_distribs+1,
                             array_of_dargs+1, mpi_dims+1, MPI_ORDER_C,
                             rdump_cell_type, &rdump_file_type);
      MPI_Type_commit(&rdump_file_type);
      //if(i_am_the_master) fprintf(stderr, "rdump_buffer_size = %ld bytes, nvars_rdump = %ld\n", (long int)rdump_buffer_size, (long int)nvars_rdump);
    }
#endif
}