Пример #1
0
int main()
{

  MPI_Init(NULL, NULL);
/* MPI variables (in some sense) */
  MPI_Comm comm;
  MPI_Status status;
  MPI_Request request;
  int size, rank, tag;
  int comm2d, disp, left, right, up, down, reorder;
  int dims[NDIMS], period[NDIMS], direction[NDIMS];

/* variable for the program */
  int nx, ny, nxp, nyp, nxpe, nype;
  int i, j, iter;
  int lastcheck, checkinc;
  double max, delta;
  double avg, mean;
  char picName[20] = "edgeCHANGETHIS.pgm";

/*
 * find the size of the image do the arrays can be defined
 */
  pgmsize(picName, &nx, &ny); 

  comm = MPI_COMM_WORLD;
  MPI_Comm_size(comm, &size);
  tag = 1;


/* Introduce Cartesian topology */
  for(i=0; i<NDIMS; ++i)
  {
    dims[i] = 0;
    period[i] = FALSE;    /* TRUE gives Cyclic */
    direction[i] = i;       /* shift along the same index as element of the array*/
  }
  reorder = TRUE;      /* allows the processes to become reordered to hopefully improve efficiency */
  disp = 1;            /* Shift by 1 */

  MPI_Dims_create(size,NDIMS,dims);
  MPI_Cart_create(comm,NDIMS,dims,period,reorder,&comm2d);
  MPI_Comm_rank(comm2d,&rank);
  MPI_Cart_shift(comm2d,direction[1],disp,&left,&right);
  MPI_Cart_shift(comm2d,direction[0],disp,&up,&down);

/* check the array is a reasonable size to be split up among the processors to be used and if not quit */
  if(nx < dims[1] || ny < dims[0])
  {
    if(ROOT == rank)
    {
      printf("too many processors running on job, %d in x direction but only %d elements, %d in y, %d elements\n", dims[1], nx, dims[0], ny);
    }
    return 1;
  }

  initialise_local_array_sizes(nx, ny, &nxp, &nyp, &nxpe, &nype, dims, rank, size);

/* now declare the arrays necessary (note they can be different sizes on different processes*/
  float localBuf[nxp][nyp];
  float localEdge[nxp+2][nyp+2], localOld[nxp+2][nyp+2], localNew[nxp+2][nyp+2];
  float globalImage[nx][ny];
  




/*
 * set the halos of all the appropriate arrays to 255
 */
  set_halos(localEdge,localOld, localNew, nxp, nyp);

  if(ROOT == rank)
  {
    printf("Reading in Picture\n");
    pgmread(picName, globalImage, nx, ny);
  }

/*set up all the datatypes that will need to be used*/
/*send contiguous halos*/
  MPI_Datatype mcols;
  MPI_Type_contiguous(nyp, MPI_FLOAT, &mcols);
  MPI_Type_commit(&mcols);

/*send non-conmtiguous halos*/
  MPI_Datatype mrows;
  MPI_Type_vector(nxp, 1, nyp+2, MPI_FLOAT, &mrows); /*nyp+2 since will be used on nyp+2 size arrays*/
  MPI_Type_commit(&mrows);

/*scatter data to processes with same size arrays as ROOT*/
  MPI_Datatype scatter[4];
  MPI_Type_vector(nxp, nyp, ny,  MPI_FLOAT, &scatter[3]);
  MPI_Type_commit(&scatter[3]);

/*scatter data to processes with different size arrays than ROOT in dim[0]*/
  MPI_Type_vector(nxp, nype, ny,  MPI_FLOAT, &scatter[0]);
  MPI_Type_commit(&scatter[0]);

/*scatter data to processes with different size arrays than ROOT in dim[1]*/
  MPI_Type_vector(nxpe, nyp, ny,  MPI_FLOAT, &scatter[1]);
  MPI_Type_commit(&scatter[1]);

/*scatter data to processes with different size arrays than ROOT in dim[0] and dim[1]*/
  MPI_Type_vector(nxpe, nype, ny,  MPI_FLOAT, &scatter[4]);
  MPI_Type_commit(&scatter[4]);

  /* Scatter the data from processer 0 to the rest */
  if(ROOT == rank)
  {
    printf("Scattering image\n");
    scatter_data(globalImage, localBuf, ny, nxp, nyp, dims, rank, comm2d, scatter);
  }
  else
  {
    MPI_Recv(localBuf, nxp*nyp, MPI_FLOAT, 0, rank, comm2d, &status);
  }

  
/*
 * set up the edge data to be used in computation
 */
  for(i=0; i<nxp; ++i)
  {
    for(j=0; j<nyp; ++j)
    {
      localEdge[i+1][j+1] = localBuf[i][j];
      localOld[i+1][j+1] = 255;
    }
  }

/*
 * computation loop
 */
  if(ROOT == rank)
  {
    printf("Performing update routine for %d iterations\n", ITERATIONS);
  }

  double t1, t2;
  t1 = MPI_Wtime();

  tag = 2;
  lastcheck = checkinc = iter = 0;
  delta = 1;

  while(iter < ITERATIONS)
  {
    send_halos(localOld, left, right, up, down, comm2d, tag, nxp, nyp, mrows, mcols);

    avg = 0;
    for(i=1; i<nxp+1; ++i)
    {
      for(j=1; j<nyp+1; ++j)
      {
        localNew[i][j] = 0.25*(localOld[i-1][j] + localOld[i+1][j] + localOld[i][j-1] + localOld[i][j+1] - localEdge[i][j]);
        avg = avg + localNew[i][j];
      }
    }

    max = 0;

    for(i=1; i<nxp+1; ++i)
    {
      for(j=1; j<nyp+1; ++j)
      {
        if(fabs(localNew[i][j] - localOld[i][j]) > max)
        {
          max = fabs(localNew[i][j] - localOld[i][j]);
        }
        localOld[i][j] = localNew[i][j];
      }
    }

/*
 * want to perform a calculation of the average pixel value and delta
 */
    if(iter == lastcheck + checkinc)
    {
      lastcheck = iter;
      MPI_Reduce(&avg, &mean, 1, MPI_DOUBLE, MPI_SUM, ROOT, comm2d);
      MPI_Allreduce(&max, &delta, 1, MPI_DOUBLE, MPI_MAX, comm2d);
      if(ROOT == rank)
      {
     //   printf("iteration %d, average pixel value is %f, current delta %f\n", iter, mean/(nx*ny), delta);
      }
      checkinc = (int)(delta*500);
      if(checkinc > 200)
      checkinc = 500;
    }

    ++iter; 
    if(ITERATIONS == iter)
    {
      break;
    } 
  }

  t2 = MPI_Wtime();

  if(ROOT == rank)
  {
    printf("finished after %d iterations, delta was %f\n", iter-1, delta);
    printf("seconds per iteration: %f\n", (t2-t1)/(iter-1));
  }

  for(i=0; i<nxp; ++i)
  {
    for(j=0; j<nyp; ++j)
    {
      localBuf[i][j] = localOld[i+1][j+1];
    }
  }


  tag = 3;

  if(ROOT == rank)
  {

    printf("recieving back data\n");
    receive_data(globalImage, localBuf, ny, nxp, nyp, dims, tag, rank, comm2d, scatter);
  }
  else
  {
    MPI_Issend(localBuf, nxp*nyp, MPI_FLOAT, ROOT, tag, comm2d, &request);
    MPI_Wait(&request, &status);
  }


  if(ROOT == rank)
  {
    pgmwrite("parpictureCHANGETHIS.pgm", globalImage, nx, ny);
  }

  MPI_Finalize();

  return 0;

}
Пример #2
0
main(int argc, char **argv) {

//declare the required data structures

  int N =32;  /* Matrix size */

  /* Matrices and vectors */
  float *A= malloc(MAXN*MAXN);
  int i,j;
   
   //code commented. was used for testing.
  /*
  float temp[64] = {1,2,3,4,5,6,7,8,
		  2,3,4,1,7,4,5,6,
		  2,3,2,1,2,2,1,1,
		  4,5,4,5,5,3,4,2,
		  1,4,8,4,3,7,6,6,
		  9,7,7,3,2,8,5,4,
		  8,6,4,1,1,5,3,3,
		  8,3,2,6,4,6,9,7};

 for(i=0;i<N;i++){
	for(j=0;j<N;j++)
		{
		*(A+((N*i)+j))=temp[i*N+j];
		//printf(" %f",*(A+((8*i)+j)));
		}
	//printf("\n");	
	}

*/
float B[MAXN];// = {5,6,7,3,5,2,9,5};
float X[MAXN];// = {0,0,0,0,0,0,0,0};


  int my_rank=0;   /* My process rank           */
  int p;         /* The number of processes   */
  
  //clock time recording variables
  double      start_time,end_time=0.0; 

///////////////////MPI code starts////////////////////


  //status variable used to check status of communication operation.	 
  MPI_Status  status;

  /* Let the system do what it needs to start up MPI */
   MPI_Init(&argc, &argv);

  /* Get my process rank */
   MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);

  /* Find out how many processes are being used */
   MPI_Comm_size(MPI_COMM_WORLD, &p);
     
  if(my_rank==0)
  {
  /* Process program parameters */
  N = parameters(argc, argv);
	
  /* Initialize A and B */

  initialize_inputs(A, B, X,N);

  /* Print input matrices */
  print_inputs(A, B,N);


  //Start clock and record the start time.
  start_time = MPI_Wtime();

} 

  //broadcast the size of the matrix read by the to all processes.
  MPI_Bcast(&N, 1, MPI_INT, 0, MPI_COMM_WORLD);

  //we need all processes to wait here until all others arrive. 
  //we need to make sure that the input matrix has been initialized
  //by process 0 and the marix size has been propogated to all processes.
  MPI_Barrier(MPI_COMM_WORLD);


  //declare the local variables
   int local_no_of_rows;  //number of rows to be processesd by each process
   int local_matrix_size; //size of the matrix
   float local_norm_row[N]; //the current normaization row
   float local_matrix_A[N][N]; //the part of A matrix on which each process will work
   float local_matrix_B[N];  //the part of B matrix on which each process will work
   int rows_per_process[p];  //the number of rows distributed to each process
   float local_norm_B;      //the element on which B will be normalized
   int displ[p];	    //displacement variable
   int norm=0;              //the index of the current normalizing row
 
 //lets begin. The loop is outermost loop of Gaussian elimination operation.
 for (norm = 0; norm < N - 1; norm++) {

   //lets scatter the data accross all processes.   	
   //This method scatters the matrix A, and broadcasts the current normalizing row,
   // number of rows each process will work on.
   scatter_data(norm,
		my_rank,
		p,
		A,
		N,
		&local_no_of_rows,
		&local_matrix_size,
		local_norm_row,
		&(local_matrix_A[0][0]),
		&rows_per_process[0]);
   
   //lets calculate the send counts and displacement vector for scatter of B matrix.
   if(my_rank==0)
   {
	//printf(" %d", *(rows_per_process));
     *(displ)=0;
     for(j=1;j<p;j++)
		{
		 *(displ+j) = rows_per_process[j-1]+ *(displ+j-1);
		 //printf(" %d", *(rows_per_process+j));	
		}
   }
   
   //This method call scatter the matrix B. Different processes may have different
   //number of elements to work on, when the size of matrix is not completely divisible
   //by number of processes. Hence we have used MPI_Scatterv(), instead of MPI_Scatter
   MPI_Scatterv(B+norm+1, rows_per_process, displ, MPI_FLOAT,local_matrix_B,local_no_of_rows, MPI_FLOAT, 
                                                              0, MPI_COMM_WORLD); 

   //lets broadcast the element against which matrix B will be normalized.
   local_norm_B = B[norm];
   MPI_Bcast(&local_norm_B, 1, MPI_FLOAT, 0, MPI_COMM_WORLD);
   
   //each process performs the following elimination operation on their
   //share of the matrix A and B.
   eliminate(local_matrix_size,
		local_no_of_rows, 
		&local_norm_row[0], 
		&(local_matrix_A[0][0]),
		norm,
		&(local_matrix_B[0]),
		local_norm_B);   

   //we need to calculate the counts and displacement for the Gather operation
   //of the processed matrix A, after each iteration.
   int counts_for_gather[p];
     int displacements_for_gather[p];
    if(my_rank==0)
   {
     
     *(displacements_for_gather)=0;
     counts_for_gather[0] = rows_per_process[0]*local_matrix_size;
  	
     for(j=1;j<p;j++)
		{
		counts_for_gather[j] = rows_per_process[j]*local_matrix_size;
		 *(displacements_for_gather+j) = counts_for_gather[j-1]+ *(displacements_for_gather+j-1);
		}
   }


   //here we gather the processed matrix A from all processes and store it locally
   MPI_Gatherv(local_matrix_A,
		local_no_of_rows*local_matrix_size, 
		MPI_FLOAT,
                A+(N*(norm+1)),
		counts_for_gather, 
		displacements_for_gather,
                MPI_FLOAT, 
		0, 
		MPI_COMM_WORLD);

   //similarly we gather the processed matrix B.
   MPI_Gatherv(local_matrix_B,
		local_no_of_rows, 
		MPI_FLOAT,
                B+norm+1,
		rows_per_process, 
		displ,
                MPI_FLOAT, 
		0, 
		MPI_COMM_WORLD);



 }

  //We need to wait for al processes to complete before we go ahead with
  //back subsitution.
  MPI_Barrier(MPI_COMM_WORLD);

  //perform the back substitution operation only by process 0.
  int row,col;
  if(my_rank==0){
  /* Back substitution */
  for (row = N - 1; row >= 0; row--) {
    X[row] = B[row];
    for (col = N-1; col > row; col--) {
      X[row] -= *(A+(N*row)+col) * X[col];
    }
    X[row] /= *(A+(N*row)+col);
  }


  //Stop clock as operation is finished.
  end_time = MPI_Wtime();  
	
  //display X in matrix size is small.
  if (N < 100) {
    printf("\nX = [");
    for (row = 0; row < N; row++) {
      printf("%5.2f%s", X[row], (row < N-1) ? "; " : "]\n");
    }
  }

  //print the execution time for performance analysis purpose.
  printf("\n\nThe total execution time as recorded on process 0 = %f seconds!!\n!",end_time-start_time);
  
}
  MPI_Finalize();  

}