コード例 #1
0
ファイル: error.c プロジェクト: BillTheBest/RCCE
void error_norm(double rms[]) {

//---------------------------------------------------------------------
//---------------------------------------------------------------------

//---------------------------------------------------------------------
//     this function computes the norm of the difference between the
//     computed solution and the exact solution
//---------------------------------------------------------------------

      int c, i, j, k, m, ii, jj, kk, d, error;
      double xi, eta, zeta, u_exact[5], rms_work[5],
           add;

      for (m = 1; m <= 5; m++) {
         rms_work(m) = 0.0e0;
      }

      for (c = 1; c <= ncells; c++) {
         kk = 0;
         for (k = cell_low(3,c); k <= cell_high(3,c); k++) {
            zeta = (double)(k) * dnzm1;
            jj = 0;
            for (j = cell_low(2,c); j <= cell_high(2,c); j++) {
               eta = (double)(j) * dnym1;
               ii = 0;
               for (i = cell_low(1,c); i <= cell_high(1,c); i++) {
                  xi = (double)(i) * dnxm1;
                  exact_solution(xi, eta, zeta, u_exact);

                  for (m = 1; m <= 5; m++) {
                     add = u(m,ii,jj,kk,c)-u_exact(m);
                     rms_work(m) = rms_work(m) + add*add;
                  }
                  ii = ii + 1;
               }
               jj = jj + 1;
            }
            kk = kk + 1;
         }
      }

      RCCE_allreduce((char*)rms_work, (char*)rms, 5, RCCE_DOUBLE, RCCE_SUM, RCCE_COMM_WORLD);

      for (m = 1; m <= 5; m++) {
         for (d = 1; d <= 3; d++) {
            rms(m) = rms(m) / (double)(grid_points(d)-2);
         }
         rms(m) = sqrt(rms(m));
      }

      return;
}
コード例 #2
0
ファイル: error.c プロジェクト: HerbertJordan/insieme
//---------------------------------------------------------------------
// this function computes the norm of the difference between the
// computed solution and the exact solution
//---------------------------------------------------------------------
void error_norm(double rms[5])
{
  int i, j, k, m, d;
  double xi, eta, zeta, u_exact[5], add;
  double rms_local[5];

  for (m = 0; m < 5; m++) {
    rms[m] = 0.0;
  }

  #pragma omp parallel default(shared) \
          private(i,j,k,m,zeta,eta,xi,add,u_exact,rms_local) shared(rms)
  {
    for (m = 0; m < 5; m++) {
      rms_local[m] = 0.0;
    }
    #pragma omp for nowait
    for (k = 0; k <= grid_points[2]-1; k++) {
      zeta = (double)k * dnzm1;
      for (j = 0; j <= grid_points[1]-1; j++) {
        eta = (double)j * dnym1;
        for (i = 0; i <= grid_points[0]-1; i++) {
          xi = (double)i * dnxm1;
          exact_solution(xi, eta, zeta, u_exact);

          for (m = 0; m < 5; m++) {
            add = u[k][j][i][m]-u_exact[m];
            rms_local[m] = rms_local[m] + add*add;
          }
        }
      }
    }
    for (m = 0; m < 5; m++) {
      #pragma omp atomic
      rms[m] += rms_local[m];
    }
  } //end parallel

  for (m = 0; m < 5; m++) {
    for (d = 0; d < 3; d++) {
      rms[m] = rms[m] / (double)(grid_points[d]-2);
    }
    rms[m] = sqrt(rms[m]);
  }
}
コード例 #3
0
ファイル: gmres_md.cpp プロジェクト: xflying777/OpenAcc
int main()
{
	int p, q, r, N, max_restart, max_iter;
	clock_t t1, t2;
	printf("\n");
	printf(" Input N = 2^p * 3^q * 5^r - 1, (p, q, r) =  ");
	scanf("%d %d %d", &p, &q, &r);
	N = pow(2, p) * pow(3, q) * pow(5, r) - 1;
	printf(" Please input max restart times max_restart = ");
	scanf("%d",&max_restart);
	printf(" Please input max iteration times max_iter = ");
	scanf("%d",&max_iter);
	printf("\n N = %d , max_restart = %d , max_iter = %d \n \n", N, max_restart, max_iter);
	
	double *A, *D, *x, *b, *u, tol;
	A = (double *) malloc(N*N*sizeof(double));
	D = (double *) malloc(N*N*sizeof(double));
	x = (double *) malloc(N*N*sizeof(double));
	b = (double *) malloc(N*N*sizeof(double));
	u = (double *) malloc(N*N*sizeof(double));
	
	initial_x(x, N);
	initial_A(A, N);
	initial_D(D, N);
	source(b, N);

	tol = 1.0e-6;
	t1 = clock();
	gmres(A, D, x, b, N, max_restart, max_iter, tol);
	t2 = clock();
	exact_solution(u, N);
	//printf(" u[%d][%d] = %f \n", N/2, N/2, u[N*N/2+N/2]);
	//printf(" x[%d][%d] = %f \n", N/2, N/2, x[N*N/2+N/2]);
	
	printf(" error = %e \n", error(x, u, N*N));
	printf(" times = %f \n \n", 1.0*(t2-t1)/CLOCKS_PER_SEC);
	
	return 0;
}
コード例 #4
0
ファイル: exact_rhs.c プロジェクト: ashwinma/multicl
//---------------------------------------------------------------------
// compute the right hand side based on exact solution
//---------------------------------------------------------------------
void exact_rhs()
{
  double dtemp[5], xi, eta, zeta, dtpp;
  int m, i, j, k, ip1, im1, jp1, jm1, km1, kp1;

  //---------------------------------------------------------------------
  // initialize                                  
  //---------------------------------------------------------------------
  for (k = 0; k <= grid_points[2]-1; k++) {
    for (j = 0; j <= grid_points[1]-1; j++) {
      for (i = 0; i <= grid_points[0]-1; i++) {
        for (m = 0; m < 5; m++) {
          forcing[k][j][i][m] = 0.0;
        }
      }
    }
  }

  //---------------------------------------------------------------------
  // xi-direction flux differences                      
  //---------------------------------------------------------------------
  for (k = 1; k <= grid_points[2]-2; k++) {
    zeta = (double)(k) * dnzm1;
    for (j = 1; j <= grid_points[1]-2; j++) {
      eta = (double)(j) * dnym1;

      for (i = 0; i <= grid_points[0]-1; i++) {
        xi = (double)(i) * dnxm1;

        exact_solution(xi, eta, zeta, dtemp);
        for (m = 0; m < 5; m++) {
          ue[i][m] = dtemp[m];
        }

        dtpp = 1.0 / dtemp[0];

        for (m = 1; m < 5; m++) {
          buf[i][m] = dtpp * dtemp[m];
        }

        cuf[i]    = buf[i][1] * buf[i][1];
        buf[i][0] = cuf[i] + buf[i][2] * buf[i][2] + buf[i][3] * buf[i][3];
        q[i] = 0.5*(buf[i][1]*ue[i][1] + buf[i][2]*ue[i][2] +
                    buf[i][3]*ue[i][3]);
      }

      for (i = 1; i <= grid_points[0]-2; i++) {
        im1 = i-1;
        ip1 = i+1;

        forcing[k][j][i][0] = forcing[k][j][i][0] -
          tx2*( ue[ip1][1]-ue[im1][1] )+
          dx1tx1*(ue[ip1][0]-2.0*ue[i][0]+ue[im1][0]);

        forcing[k][j][i][1] = forcing[k][j][i][1] - tx2 * (
            (ue[ip1][1]*buf[ip1][1]+c2*(ue[ip1][4]-q[ip1]))-
            (ue[im1][1]*buf[im1][1]+c2*(ue[im1][4]-q[im1])))+
          xxcon1*(buf[ip1][1]-2.0*buf[i][1]+buf[im1][1])+
          dx2tx1*( ue[ip1][1]-2.0* ue[i][1]+ue[im1][1]);

        forcing[k][j][i][2] = forcing[k][j][i][2] - tx2 * (
            ue[ip1][2]*buf[ip1][1]-ue[im1][2]*buf[im1][1])+
          xxcon2*(buf[ip1][2]-2.0*buf[i][2]+buf[im1][2])+
          dx3tx1*( ue[ip1][2]-2.0*ue[i][2] +ue[im1][2]);

        forcing[k][j][i][3] = forcing[k][j][i][3] - tx2*(
            ue[ip1][3]*buf[ip1][1]-ue[im1][3]*buf[im1][1])+
          xxcon2*(buf[ip1][3]-2.0*buf[i][3]+buf[im1][3])+
          dx4tx1*( ue[ip1][3]-2.0* ue[i][3]+ ue[im1][3]);

        forcing[k][j][i][4] = forcing[k][j][i][4] - tx2*(
            buf[ip1][1]*(c1*ue[ip1][4]-c2*q[ip1])-
            buf[im1][1]*(c1*ue[im1][4]-c2*q[im1]))+
          0.5*xxcon3*(buf[ip1][0]-2.0*buf[i][0]+
              buf[im1][0])+
          xxcon4*(cuf[ip1]-2.0*cuf[i]+cuf[im1])+
          xxcon5*(buf[ip1][4]-2.0*buf[i][4]+buf[im1][4])+
          dx5tx1*( ue[ip1][4]-2.0* ue[i][4]+ ue[im1][4]);
      }

      //---------------------------------------------------------------------
      // Fourth-order dissipation                         
      //---------------------------------------------------------------------
      for (m = 0; m < 5; m++) {
        i = 1;
        forcing[k][j][i][m] = forcing[k][j][i][m] - dssp *
          (5.0*ue[i][m] - 4.0*ue[i+1][m] +ue[i+2][m]);
        i = 2;
        forcing[k][j][i][m] = forcing[k][j][i][m] - dssp *
          (-4.0*ue[i-1][m] + 6.0*ue[i][m] -
            4.0*ue[i+1][m] +     ue[i+2][m]);
      }

      for (i = 3; i <= grid_points[0]-4; i++) {
        for (m = 0; m < 5; m++) {
          forcing[k][j][i][m] = forcing[k][j][i][m] - dssp*
            (ue[i-2][m] - 4.0*ue[i-1][m] +
             6.0*ue[i][m] - 4.0*ue[i+1][m] + ue[i+2][m]);
        }
      }

      for (m = 0; m < 5; m++) {
        i = grid_points[0]-3;
        forcing[k][j][i][m] = forcing[k][j][i][m] - dssp *
          (ue[i-2][m] - 4.0*ue[i-1][m] +
           6.0*ue[i][m] - 4.0*ue[i+1][m]);
        i = grid_points[0]-2;
        forcing[k][j][i][m] = forcing[k][j][i][m] - dssp *
          (ue[i-2][m] - 4.0*ue[i-1][m] + 5.0*ue[i][m]);
      }
    }
  }

  //---------------------------------------------------------------------
  // eta-direction flux differences             
  //---------------------------------------------------------------------
  for (k = 1; k <= grid_points[2]-2; k++) {
    zeta = (double)(k) * dnzm1;
    for (i = 1; i <= grid_points[0]-2; i++) {
      xi = (double)(i) * dnxm1;

      for (j = 0; j <= grid_points[1]-1; j++) {
        eta = (double)(j) * dnym1;

        exact_solution(xi, eta, zeta, dtemp);
        for (m = 0; m < 5; m++) {
          ue[j][m] = dtemp[m];
        }

        dtpp = 1.0/dtemp[0];

        for (m = 1; m < 5; m++) {
          buf[j][m] = dtpp * dtemp[m];
        }

        cuf[j]    = buf[j][2] * buf[j][2];
        buf[j][0] = cuf[j] + buf[j][1] * buf[j][1] + buf[j][3] * buf[j][3];
        q[j] = 0.5*(buf[j][1]*ue[j][1] + buf[j][2]*ue[j][2] +
                    buf[j][3]*ue[j][3]);
      }

      for (j = 1; j <= grid_points[1]-2; j++) {
        jm1 = j-1;
        jp1 = j+1;

        forcing[k][j][i][0] = forcing[k][j][i][0] -
          ty2*( ue[jp1][2]-ue[jm1][2] )+
          dy1ty1*(ue[jp1][0]-2.0*ue[j][0]+ue[jm1][0]);

        forcing[k][j][i][1] = forcing[k][j][i][1] - ty2*(
            ue[jp1][1]*buf[jp1][2]-ue[jm1][1]*buf[jm1][2])+
          yycon2*(buf[jp1][1]-2.0*buf[j][1]+buf[jm1][1])+
          dy2ty1*( ue[jp1][1]-2.0* ue[j][1]+ ue[jm1][1]);

        forcing[k][j][i][2] = forcing[k][j][i][2] - ty2*(
            (ue[jp1][2]*buf[jp1][2]+c2*(ue[jp1][4]-q[jp1]))-
            (ue[jm1][2]*buf[jm1][2]+c2*(ue[jm1][4]-q[jm1])))+
          yycon1*(buf[jp1][2]-2.0*buf[j][2]+buf[jm1][2])+
          dy3ty1*( ue[jp1][2]-2.0*ue[j][2] +ue[jm1][2]);

        forcing[k][j][i][3] = forcing[k][j][i][3] - ty2*(
            ue[jp1][3]*buf[jp1][2]-ue[jm1][3]*buf[jm1][2])+
          yycon2*(buf[jp1][3]-2.0*buf[j][3]+buf[jm1][3])+
          dy4ty1*( ue[jp1][3]-2.0*ue[j][3]+ ue[jm1][3]);

        forcing[k][j][i][4] = forcing[k][j][i][4] - ty2*(
            buf[jp1][2]*(c1*ue[jp1][4]-c2*q[jp1])-
            buf[jm1][2]*(c1*ue[jm1][4]-c2*q[jm1]))+
          0.5*yycon3*(buf[jp1][0]-2.0*buf[j][0]+
              buf[jm1][0])+
          yycon4*(cuf[jp1]-2.0*cuf[j]+cuf[jm1])+
          yycon5*(buf[jp1][4]-2.0*buf[j][4]+buf[jm1][4])+
          dy5ty1*(ue[jp1][4]-2.0*ue[j][4]+ue[jm1][4]);
      }

      //---------------------------------------------------------------------
      // Fourth-order dissipation                      
      //---------------------------------------------------------------------
      for (m = 0; m < 5; m++) {
        j = 1;
        forcing[k][j][i][m] = forcing[k][j][i][m] - dssp *
          (5.0*ue[j][m] - 4.0*ue[j+1][m] +ue[j+2][m]);
        j = 2;
        forcing[k][j][i][m] = forcing[k][j][i][m] - dssp *
          (-4.0*ue[j-1][m] + 6.0*ue[j][m] -
           4.0*ue[j+1][m] +       ue[j+2][m]);
      }

      for (j = 3; j <= grid_points[1]-4; j++) {
        for (m = 0; m < 5; m++) {
          forcing[k][j][i][m] = forcing[k][j][i][m] - dssp*
            (ue[j-2][m] - 4.0*ue[j-1][m] +
             6.0*ue[j][m] - 4.0*ue[j+1][m] + ue[j+2][m]);
        }
      }

      for (m = 0; m < 5; m++) {
        j = grid_points[1]-3;
        forcing[k][j][i][m] = forcing[k][j][i][m] - dssp *
          (ue[j-2][m] - 4.0*ue[j-1][m] +
           6.0*ue[j][m] - 4.0*ue[j+1][m]);
        j = grid_points[1]-2;
        forcing[k][j][i][m] = forcing[k][j][i][m] - dssp *
          (ue[j-2][m] - 4.0*ue[j-1][m] + 5.0*ue[j][m]);
      }
    }
  }

  //---------------------------------------------------------------------
  // zeta-direction flux differences                      
  //---------------------------------------------------------------------
  for (j = 1; j <= grid_points[1]-2; j++) {
    eta = (double)(j) * dnym1;
    for (i = 1; i <= grid_points[0]-2; i++) {
      xi = (double)(i) * dnxm1;

      for (k = 0; k <= grid_points[2]-1; k++) {
        zeta = (double)(k) * dnzm1;

        exact_solution(xi, eta, zeta, dtemp);
        for (m = 0; m < 5; m++) {
          ue[k][m] = dtemp[m];
        }

        dtpp = 1.0/dtemp[0];

        for (m = 1; m < 5; m++) {
          buf[k][m] = dtpp * dtemp[m];
        }

        cuf[k]    = buf[k][3] * buf[k][3];
        buf[k][0] = cuf[k] + buf[k][1] * buf[k][1] + buf[k][2] * buf[k][2];
        q[k] = 0.5*(buf[k][1]*ue[k][1] + buf[k][2]*ue[k][2] +
                    buf[k][3]*ue[k][3]);
      }

      for (k = 1; k <= grid_points[2]-2; k++) {
        km1 = k-1;
        kp1 = k+1;

        forcing[k][j][i][0] = forcing[k][j][i][0] -
          tz2*( ue[kp1][3]-ue[km1][3] )+
          dz1tz1*(ue[kp1][0]-2.0*ue[k][0]+ue[km1][0]);

        forcing[k][j][i][1] = forcing[k][j][i][1] - tz2 * (
            ue[kp1][1]*buf[kp1][3]-ue[km1][1]*buf[km1][3])+
          zzcon2*(buf[kp1][1]-2.0*buf[k][1]+buf[km1][1])+
          dz2tz1*( ue[kp1][1]-2.0* ue[k][1]+ ue[km1][1]);

        forcing[k][j][i][2] = forcing[k][j][i][2] - tz2 * (
            ue[kp1][2]*buf[kp1][3]-ue[km1][2]*buf[km1][3])+
          zzcon2*(buf[kp1][2]-2.0*buf[k][2]+buf[km1][2])+
          dz3tz1*(ue[kp1][2]-2.0*ue[k][2]+ue[km1][2]);

        forcing[k][j][i][3] = forcing[k][j][i][3] - tz2 * (
            (ue[kp1][3]*buf[kp1][3]+c2*(ue[kp1][4]-q[kp1]))-
            (ue[km1][3]*buf[km1][3]+c2*(ue[km1][4]-q[km1])))+
          zzcon1*(buf[kp1][3]-2.0*buf[k][3]+buf[km1][3])+
          dz4tz1*( ue[kp1][3]-2.0*ue[k][3] +ue[km1][3]);

        forcing[k][j][i][4] = forcing[k][j][i][4] - tz2 * (
            buf[kp1][3]*(c1*ue[kp1][4]-c2*q[kp1])-
            buf[km1][3]*(c1*ue[km1][4]-c2*q[km1]))+
          0.5*zzcon3*(buf[kp1][0]-2.0*buf[k][0]
              +buf[km1][0])+
          zzcon4*(cuf[kp1]-2.0*cuf[k]+cuf[km1])+
          zzcon5*(buf[kp1][4]-2.0*buf[k][4]+buf[km1][4])+
          dz5tz1*( ue[kp1][4]-2.0*ue[k][4]+ ue[km1][4]);
      }

      //---------------------------------------------------------------------
      // Fourth-order dissipation                        
      //---------------------------------------------------------------------
      for (m = 0; m < 5; m++) {
        k = 1;
        forcing[k][j][i][m] = forcing[k][j][i][m] - dssp *
          (5.0*ue[k][m] - 4.0*ue[k+1][m] +ue[k+2][m]);
        k = 2;
        forcing[k][j][i][m] = forcing[k][j][i][m] - dssp *
          (-4.0*ue[k-1][m] + 6.0*ue[k][m] -
           4.0*ue[k+1][m] +       ue[k+2][m]);
      }

      for (k = 3; k <= grid_points[2]-4; k++) {
        for (m = 0; m < 5; m++) {
          forcing[k][j][i][m] = forcing[k][j][i][m] - dssp*
            (ue[k-2][m] - 4.0*ue[k-1][m] +
             6.0*ue[k][m] - 4.0*ue[k+1][m] + ue[k+2][m]);
        }
      }

      for (m = 0; m < 5; m++) {
        k = grid_points[2]-3;
        forcing[k][j][i][m] = forcing[k][j][i][m] - dssp *
          (ue[k-2][m] - 4.0*ue[k-1][m] +
           6.0*ue[k][m] - 4.0*ue[k+1][m]);
        k = grid_points[2]-2;
        forcing[k][j][i][m] = forcing[k][j][i][m] - dssp *
          (ue[k-2][m] - 4.0*ue[k-1][m] + 5.0*ue[k][m]);
      }

    }
  }

  //---------------------------------------------------------------------
  // now change the sign of the forcing function, 
  //---------------------------------------------------------------------
  for (k = 1; k <= grid_points[2]-2; k++) {
    for (j = 1; j <= grid_points[1]-2; j++) {
      for (i = 1; i <= grid_points[0]-2; i++) {
        for (m = 0; m < 5; m++) {
          forcing[k][j][i][m] = -1.0 * forcing[k][j][i][m];
        }
      }
    }
  }
}
コード例 #5
0
int main()
{


  int rank, processes, process;
   MPI_Status status;
   int master=0;
   int tag=123;

   int thread_count=1;
    MPI_Init(NULL,NULL);
   MPI_Comm_size(MPI_COMM_WORLD,&processes);
   MPI_Comm_rank(MPI_COMM_WORLD,&rank);
    // index:
    int i;
    int j;
    MPI_Request requests[processes];
    double time1;
    double time2;
    double timeD;
    // Variables used to establish the convergence of the Jacobi iterations:
    double iteration_error=1.0;
    double tolerance=1E-15;
    int Max_Iter=1000000/processes;
    if(rank==0){
    double d_sqrt = sqrt(processes);
    int i_sqrt = d_sqrt;
    if ( d_sqrt != i_sqrt){

      fprintf(stderr,"Number of processes must be perfect square\n");
      MPI_Finalize();
    return 0;
    }
  }

    // Initialize Un arbitrarily to 0:
    //     #pragma omp parallel for num_threads(thread_count) shared ( Un ) private ( i, j )
  int sqpr=sqrt(processes);
  int m_iter=m/sqpr;
  int n_iter=n/sqpr; 
// create a 2d array for each processes and set the arrays to 0
  double local_Un[m_iter][n_iter];
  double local_Unp1[m_iter][n_iter];


//MPI: seperate the work between threads into processes number of 2d arrays
  //  0  1   2   3
  //  4  5   6   7
  //  8  9   10  11
  //  12 13  14  15


  

    for(i=0; i<m_iter; i++) {
        for(j=0; j<n_iter; j++){
    local_Un[i][j]=0;
        }}
    
          
    // Impose the left and right boundary conditions:

        int local_column = rank%sqpr;
        int local_row = rank/sqpr;

    if(local_row==0){
       for(i=0; i<n_iter; i++) {
        local_Un[0][i]= exact_solution(x(i+(local_column)*n_iter),c);
       
       }
    }

    if(local_row==(sqpr-1)){
      for(i=0; i<n_iter; i++) {
        local_Un[m_iter-1][i]= exact_solution(x(i+(local_column)*n_iter),d);
       }
    }

    if(local_column==0){
      for(j=0;j<m_iter;j++){
        local_Un[j][0]= exact_solution(a,y(j+(local_row)*m_iter));
      }
    }
    if(local_column==(sqpr-1)) {
      for(j=0;j<m_iter;j++){
        local_Un[j][n_iter-1]= exact_solution(b,y(j+(local_row)*m_iter));
      }
    }


    /*

    for(i=0;i<m_iter;i++){
      local_Un[i][0]= exact_solution(x(0+(local_column)*m_iter),y(i+(local_row)*m_iter));
      local_Un[i][n_iter-1]= exact_solution(x(n_iter-1+(local_column)*m_iter),y(i+(local_row)*m_iter));


  }

      for(j=0;j<m_iter;j++){
        local_Un[0][j]= exact_solution(x(j+(local_column)*m_iter),y(0+(local_row)*m_iter));
        local_Un[n_iter-1][j]= exact_solution(x(j+(local_column)*m_iter),y(n_iter-1+(local_row)*m_iter));

      }
    

    /*for(i=0;i<m_iter;i++){
      for(j=0;j<n_iter;j++){
        printf("[%d][%d]= %f",i+(rank/sqpr)*m_iter,j+(rank%sqpr)*m_iter,local_Un[i][j]);
      }
      printf("\n\n");
    }*/

    // Initialize the interation counter:
    int iteration_count;
    iteration_count=0;
    
  
    // Iterate until steady-state is reached.
    // Otherwise stops at Max_Iter iterations (to avoid infinite loops).
    //
    // Recall that we say that the steady-state is reached when the maximum difference between
    // two iterates is less than or equal to the tolerance, i.e. max|Unp1-Un| <= tolerance.
    

       time1=MPI_Wtime();




  MPI_Barrier(MPI_COMM_WORLD);



       
       // {
    while(iteration_error> tolerance && iteration_count < Max_Iter){
      {      
       //iteration_count++;
	/*	if(rank==0){
	  iteration_count++;
	  if(iteration_count>10){
	    MPI_Bcast(&iteration_count,1,MPI_INT,0,MPI_COMM_WORLD);
	  }
	  }*/
	iteration_count++;


    double upToDown[n_iter];
    double downToUp[n_iter];
    double leftToRight[m_iter];
    double rightToLeft[m_iter];
       // if(iteration_count % 1000 == 0) std::cout<<"iteration " << iteration_count << std::endl;
//broadcast iteration count to all processes
       

       // Treat the left and right boundary conditions:

       if(local_row==0){
       for(i=0; i<n_iter; i++) {
        local_Unp1[0][i]= exact_solution(x(i+(local_column)*n_iter),c);
       }

       }

    if(local_row==(sqpr-1)){
      for(i=0; i<n_iter; i++) {
        local_Unp1[m_iter-1][i]= exact_solution(x(i+(local_column)*n_iter),d);
       }
    }
    if((local_column)!=(sqpr-1)){
      for(i=0;i<n_iter;i++){
        leftToRight[i]=  exact_solution(x(m_iter-1+local_column*m_iter),y(i+local_row*m_iter));
      }  
     }
    if(local_column==0){
      for(j=0;j<m_iter;j++){
        local_Unp1[j][0]= exact_solution(a,y(j+(local_row)*m_iter));
      }
    }
    if((local_row)!=(sqpr-1)){
        for(i=0;i<n_iter;i++){
          upToDown[i]= exact_solution(x(i+local_column*m_iter),y(m_iter-1+local_row*m_iter));
       }
     }
    if(local_column==(sqpr-1)) {
      for(j=0;j<m_iter;j++){
        local_Unp1[j][n_iter-1]= exact_solution(b,y(j+(local_row)*n_iter));
      }
    }
     


    for(i=0;i<m_iter;i++){
      local_Unp1[i][0]= exact_solution(x(0+(local_column)*m_iter),y(i+(local_row)*m_iter));
      local_Unp1[i][n_iter-1]= exact_solution(x(n_iter-1+(local_column)*m_iter),y(i+(local_row)*m_iter));


  }

      for(j=0;j<m_iter;j++){
        local_Unp1[0][j]= exact_solution(x(j+(local_column)*m_iter),y(0+(local_row)*m_iter));
        local_Unp1[n_iter-1][j]= exact_solution(x(j+(local_column)*m_iter),y(n_iter-1+(local_row)*m_iter));

      }

    

   /* printf("rank is %d",rank);
    for(i=0;i<m_iter;i++){
      for(j=0;j<n_iter;j++){
        printf("[%d][%d]= %f",i+(rank/sqpr)*m_iter,j+(rank%sqpr)*m_iter,local_Un[i][j]);
      }
      printf("\n\n");
    }
*/


  
   



        // Treat the interior points using the update formula:
	
//parallelize the update for loop and use scatter to spread the solutions
   

   
       

     for(i=1; i< m_iter-1; i++){
	  
            for(j=1; j< n_iter-1;j++){
	      local_Unp1[j][i]= 0.5*(dy2*(local_Un[j+1][i]+local_Un[j-1][i])+dx2*(local_Un[j][i+1]+local_Un[j][i-1]) - S(x(i+(local_column)*m_iter),y(j+(local_row)*n_iter))*dx2*dy2)*dxdyd;
  }
}

     //send from up to Down
  //all 2d besides the top row and grap the ghost from above
/*

 if((local_row)!=(sqpr-1)){
      for(i=0;i<n_iter;i++){
       // upToDown[i]= local_Unp1[m_iter-1][i];
      }

       MPI_Send(upToDown,n_iter,MPI_DOUBLE,rank+sqpr,100,MPI_COMM_WORLD);  
     }


     if((local_row)!=0){
       MPI_Recv(upToDown,n_iter,MPI_DOUBLE,rank-sqpr,100,MPI_COMM_WORLD,&status);
       
       for(i=1;i<n_iter-1;i++){
	 local_Unp1[0][i]= 0.5*(dy2*(local_Un[0][i+1]+local_Un[0][i-1])+dx2*(local_Un[1][i]+upToDown[i]) - S(x(i+(local_column)*n_iter),y(0+(local_row)*m_iter))*dx2*dy2)*dxdyd;
	 downToUp[i]= local_Unp1[0][i];
       }
       downToUp[0]=local_Unp1[0][0];
       downToUp[n_iter-1]=local_Unp1[0][n_iter-1];
       MPI_Send(downToUp,n_iter,MPI_DOUBLE,rank-sqpr,100,MPI_COMM_WORLD);
     
	 
   
	 
     }


     //all arrays besides bottom get the ghost array from their underprocess
     if((local_row)!=(sqpr-1)){
      MPI_Recv(downToUp,n_iter,MPI_DOUBLE,rank+sqpr,100,MPI_COMM_WORLD,&status);
      
      
        for(i=1;i<n_iter-1;i++){
   local_Unp1[m_iter-1][i]= 0.5*(dy2*(local_Un[m_iter-1][i+1]+local_Un[m_iter-1][i-1])+dx2*(local_Un[m_iter-2][i]+downToUp[i]) - S(x(i+(local_column)*n_iter),y(m_iter-1+(local_row)*m_iter))*dx2*dy2)*dxdyd;
       }

     
     }




////////////left to right

      
    


     if((local_column)!=(sqpr-1)){

      MPI_Send(leftToRight,n_iter,MPI_DOUBLE,rank+1,300,MPI_COMM_WORLD);

     }

//all 2d besides the left side so we grab the ghopst from the left
     if((local_column)!=0){

      MPI_Recv(leftToRight,n_iter,MPI_DOUBLE,rank-1,300,MPI_COMM_WORLD,&status);
        for(i=1;i<n_iter-1;i++){
   local_Unp1[i][0]= 0.5*(dy2*(local_Un[i][1]+leftToRight[i])+dx2*(local_Un[i+1][0]+local_Un[i-1][0]) - S(x(0+(local_column)*n_iter),y(i+(local_row)*m_iter))*dx2*dy2)*dxdyd;
   rightToLeft[i]=local_Unp1[i][0];
  //    rightToLeft[i]=exact_solution(x(0+local_column*m_iter),y(i+local_row*m_iter));

       }
       rightToLeft[0]=local_Unp1[0][0];
       rightToLeft[n_iter-1]=local_Unp1[n_iter-1][0];
       MPI_Send(rightToLeft,n_iter,MPI_DOUBLE,rank-1,400,MPI_COMM_WORLD);

       

     }

     //all 2d array besides the right
     if((local_column)!=(sqpr-1)){

      MPI_Recv(rightToLeft,n_iter,MPI_DOUBLE,rank+1,400,MPI_COMM_WORLD,&status);
      for(i=1;i<n_iter-1;i++){
        local_Unp1[i][m_iter-1]= 0.5*(dy2*(local_Un[i][m_iter-2]+rightToLeft[i])+dx2*(local_Un[i+1][m_iter-1]+local_Un[i-1][m_iter-1]) - S(x(m_iter-1+(local_column)*n_iter),y(i+(local_row)*m_iter))*dx2*dy2)*dxdyd;
      }
           
     }

     ///////////




     //work on the bot lef corner
     if(local_column!=0&& local_row!=(sqpr-1)){
        local_Unp1[n_iter-1][0]= 0.5*(dy2*(local_Un[n_iter-1][1]+leftToRight[n_iter-1])+dx2*(local_Un[n_iter-2][0]+downToUp[0]) - S(x(0+(local_column)*n_iter),y(n_iter-1+(local_row)*m_iter))*dx2*dy2)*dxdyd;
        local_Unp1[n_iter-1][0]=exact_solution(x(0+(local_column)*m_iter),y(n_iter-1+(local_row)*m_iter));
     }

     //bottomRight
     if(local_column!=(sqpr-1) && local_row!=(sqpr-1)){
      local_Unp1[n_iter-1][m_iter-1]= 0.5*(dy2*(local_Un[n_iter-1][m_iter-2]+rightToLeft[n_iter-1])+dx2*(local_Un[n_iter-2][m_iter-1]+downToUp[m_iter-1]) - S(x(m_iter-1+(local_column)*n_iter),y(n_iter-1+(local_row)*m_iter))*dx2*dy2)*dxdyd;
      local_Unp1[n_iter-1][n_iter-1] = exact_solution(x(n_iter-1+(local_column)*m_iter),y(n_iter-1+(local_row)*m_iter));
     }

     //top left
     if(local_column!=0 && local_row!=0){
        local_Unp1[0][0]= 0.5*(dy2*(local_Un[0][1]+leftToRight[0])+dx2*(local_Un[1][0]+upToDown[0]) - S(x(0+(local_column)*n_iter),y(0+(local_row)*m_iter))*dx2*dy2)*dxdyd;
        local_Unp1[0][0] = exact_solution(x(0+(local_column)*m_iter),y(0+(local_row)*m_iter));

     }

     //top right
     if(local_column!=(sqpr-1)&&local_row!=0){
        local_Unp1[0][m_iter-1]= 0.5*(dy2*(local_Un[0][m_iter-2]+rightToLeft[0])+dx2*(local_Un[1][m_iter-1]+downToUp[m_iter-1]) - S(x(m_iter-1+(local_column)*n_iter),y(0+(local_row)*m_iter))*dx2*dy2)*dxdyd;
        local_Unp1[0][n_iter-1] = exact_solution(x(0+(local_column)*m_iter),y(n_iter-1+(local_row)*m_iter));
     }

//MPI_Waitall(processes,requests,MPI_STATUSES_IGNORE);
      
	//	
        // Compute the maximum error between 2 iterates to establish whether or not
        // steady-state is reached:

     

*/


	double my_iteration_error=0.0;
	  for(i=0; i< m_iter-1; i++){
            for(j=0;j<n_iter-1;j++){
            double local_iteration_error = fabs(local_Unp1[i][j] - local_Un[i][j]);
	  
	    if (local_iteration_error > my_iteration_error) {
	      // #pragma omp critical
        my_iteration_error = local_iteration_error;

        	       
	    } 
	    }
	  }



    if(rank==0){
      
      
      if(iteration_error > my_iteration_error){
          iteration_error=my_iteration_error;
        }
      for(i=1;i<processes;i++){
        
        MPI_Recv(&my_iteration_error,1,MPI_DOUBLE,i,1,MPI_COMM_WORLD,&status);

        if(iteration_error > my_iteration_error){
          iteration_error=my_iteration_error;
        }
      }
    }
    else{
    MPI_Send(&my_iteration_error,1,MPI_DOUBLE,0,1,MPI_COMM_WORLD);
        }

	

    MPI_Bcast(&iteration_error,1,MPI_DOUBLE,0,MPI_COMM_WORLD);


            MPI_Barrier(MPI_COMM_WORLD);
    
    //Do I just make everything Isend then call wait??


        // Prepare for the next iteration:
        //Wait for all the updates to finish then we can start preperation
            for(i=0; i< m_iter; i++){
            for(j=0;j<n_iter;j++){
            local_Un[i][j]=local_Unp1[i][j];
	    }
	    }
	/*	*/   
	
//        if(iteration_count % 1000 == 0) std::cout<< "The error between two iterates is " << iteration_error << std::endl;
      }
      // std::cout<<Unp1[5][5]<<","<<exact_solution(x(5),y(5))<<std::endl;
    } //Done with While loop


                //MPI_Barrier(MPI_COMM_WORLD);

    time2=MPI_Wtime();
    






    int ii;
    int jj;

    int counter=0;

// Compute the maximum error between the computed and exact solutions:
        double solution_error=0.0;
        double my_solution_error=0.0;

    for(i=1; i< m_iter-1; i++){
        for(j=1; j<n_iter-1;j++){
	 
	 
	
	  double local_solution_error=fabs(local_Unp1[j][i] - exact_solution(x(i+(local_column)*m_iter),y(j+(local_row)*n_iter)));
       if(rank==10){  std::cout<<"error of rank "<<rank<<" is "<<local_solution_error<<" "<<j<<i<<std::endl;
          }
	if (local_solution_error > my_solution_error) {
//	
            my_solution_error = local_solution_error;}
	
	}
    }



if(rank==0){
  std::cout<<"checking solution error in master "<<my_solution_error<<std::endl;
      solution_error=my_solution_error;
      for(i=1;i<processes;i++){
        MPI_Recv(&my_solution_error,1,MPI_DOUBLE,i,2,MPI_COMM_WORLD,MPI_STATUSES_IGNORE);
	//std::cout<<"Rank "<<rank<<"error"<<my_solution_error<<std::endl;
	if(my_solution_error > solution_error){

    

          solution_error=my_solution_error;
        }
      }
    }
    else{
    MPI_Send(&my_solution_error,1,MPI_DOUBLE,0,2,MPI_COMM_WORLD);
        }


	
    
    


    // time2=omp_get_wtime();
    timeD=time2-time1;

 double diff;

 if(rank==0){
    // Output:
    printf("\n\n");
    std::cout<< "-------------------------------------------------------"               << std::endl;
    std::cout<< "SUMMARY:"                                                 << std::endl << std::endl;
    std::cout<< "The error between two iterates is "    << iteration_error << std::endl << std::endl;
    std::cout<< "The maximum error in the solution is " << solution_error <<std::endl;
    std::cout<< "iteration count" <<iteration_count<<" per process"<<std::endl;  
    std::cout<< "time taken " <<timeD<<std::endl;
    std::cout<< "-------------------------------------------------------"  << std::endl << std::endl;
 }
/*
for(i=0; i< m; i++){
  for(j=1;j<n-1;j++){
    std::cout<<counter<<","<<Unp1[i][j]<<std::endl;
    counter++;
  }
  } */
 MPI_Finalize();
    return 0;
 }