void error_norm(double rms[]) { //--------------------------------------------------------------------- //--------------------------------------------------------------------- //--------------------------------------------------------------------- // this function computes the norm of the difference between the // computed solution and the exact solution //--------------------------------------------------------------------- int c, i, j, k, m, ii, jj, kk, d, error; double xi, eta, zeta, u_exact[5], rms_work[5], add; for (m = 1; m <= 5; m++) { rms_work(m) = 0.0e0; } for (c = 1; c <= ncells; c++) { kk = 0; for (k = cell_low(3,c); k <= cell_high(3,c); k++) { zeta = (double)(k) * dnzm1; jj = 0; for (j = cell_low(2,c); j <= cell_high(2,c); j++) { eta = (double)(j) * dnym1; ii = 0; for (i = cell_low(1,c); i <= cell_high(1,c); i++) { xi = (double)(i) * dnxm1; exact_solution(xi, eta, zeta, u_exact); for (m = 1; m <= 5; m++) { add = u(m,ii,jj,kk,c)-u_exact(m); rms_work(m) = rms_work(m) + add*add; } ii = ii + 1; } jj = jj + 1; } kk = kk + 1; } } RCCE_allreduce((char*)rms_work, (char*)rms, 5, RCCE_DOUBLE, RCCE_SUM, RCCE_COMM_WORLD); for (m = 1; m <= 5; m++) { for (d = 1; d <= 3; d++) { rms(m) = rms(m) / (double)(grid_points(d)-2); } rms(m) = sqrt(rms(m)); } return; }
//--------------------------------------------------------------------- // this function computes the norm of the difference between the // computed solution and the exact solution //--------------------------------------------------------------------- void error_norm(double rms[5]) { int i, j, k, m, d; double xi, eta, zeta, u_exact[5], add; double rms_local[5]; for (m = 0; m < 5; m++) { rms[m] = 0.0; } #pragma omp parallel default(shared) \ private(i,j,k,m,zeta,eta,xi,add,u_exact,rms_local) shared(rms) { for (m = 0; m < 5; m++) { rms_local[m] = 0.0; } #pragma omp for nowait for (k = 0; k <= grid_points[2]-1; k++) { zeta = (double)k * dnzm1; for (j = 0; j <= grid_points[1]-1; j++) { eta = (double)j * dnym1; for (i = 0; i <= grid_points[0]-1; i++) { xi = (double)i * dnxm1; exact_solution(xi, eta, zeta, u_exact); for (m = 0; m < 5; m++) { add = u[k][j][i][m]-u_exact[m]; rms_local[m] = rms_local[m] + add*add; } } } } for (m = 0; m < 5; m++) { #pragma omp atomic rms[m] += rms_local[m]; } } //end parallel for (m = 0; m < 5; m++) { for (d = 0; d < 3; d++) { rms[m] = rms[m] / (double)(grid_points[d]-2); } rms[m] = sqrt(rms[m]); } }
int main() { int p, q, r, N, max_restart, max_iter; clock_t t1, t2; printf("\n"); printf(" Input N = 2^p * 3^q * 5^r - 1, (p, q, r) = "); scanf("%d %d %d", &p, &q, &r); N = pow(2, p) * pow(3, q) * pow(5, r) - 1; printf(" Please input max restart times max_restart = "); scanf("%d",&max_restart); printf(" Please input max iteration times max_iter = "); scanf("%d",&max_iter); printf("\n N = %d , max_restart = %d , max_iter = %d \n \n", N, max_restart, max_iter); double *A, *D, *x, *b, *u, tol; A = (double *) malloc(N*N*sizeof(double)); D = (double *) malloc(N*N*sizeof(double)); x = (double *) malloc(N*N*sizeof(double)); b = (double *) malloc(N*N*sizeof(double)); u = (double *) malloc(N*N*sizeof(double)); initial_x(x, N); initial_A(A, N); initial_D(D, N); source(b, N); tol = 1.0e-6; t1 = clock(); gmres(A, D, x, b, N, max_restart, max_iter, tol); t2 = clock(); exact_solution(u, N); //printf(" u[%d][%d] = %f \n", N/2, N/2, u[N*N/2+N/2]); //printf(" x[%d][%d] = %f \n", N/2, N/2, x[N*N/2+N/2]); printf(" error = %e \n", error(x, u, N*N)); printf(" times = %f \n \n", 1.0*(t2-t1)/CLOCKS_PER_SEC); return 0; }
//--------------------------------------------------------------------- // compute the right hand side based on exact solution //--------------------------------------------------------------------- void exact_rhs() { double dtemp[5], xi, eta, zeta, dtpp; int m, i, j, k, ip1, im1, jp1, jm1, km1, kp1; //--------------------------------------------------------------------- // initialize //--------------------------------------------------------------------- for (k = 0; k <= grid_points[2]-1; k++) { for (j = 0; j <= grid_points[1]-1; j++) { for (i = 0; i <= grid_points[0]-1; i++) { for (m = 0; m < 5; m++) { forcing[k][j][i][m] = 0.0; } } } } //--------------------------------------------------------------------- // xi-direction flux differences //--------------------------------------------------------------------- for (k = 1; k <= grid_points[2]-2; k++) { zeta = (double)(k) * dnzm1; for (j = 1; j <= grid_points[1]-2; j++) { eta = (double)(j) * dnym1; for (i = 0; i <= grid_points[0]-1; i++) { xi = (double)(i) * dnxm1; exact_solution(xi, eta, zeta, dtemp); for (m = 0; m < 5; m++) { ue[i][m] = dtemp[m]; } dtpp = 1.0 / dtemp[0]; for (m = 1; m < 5; m++) { buf[i][m] = dtpp * dtemp[m]; } cuf[i] = buf[i][1] * buf[i][1]; buf[i][0] = cuf[i] + buf[i][2] * buf[i][2] + buf[i][3] * buf[i][3]; q[i] = 0.5*(buf[i][1]*ue[i][1] + buf[i][2]*ue[i][2] + buf[i][3]*ue[i][3]); } for (i = 1; i <= grid_points[0]-2; i++) { im1 = i-1; ip1 = i+1; forcing[k][j][i][0] = forcing[k][j][i][0] - tx2*( ue[ip1][1]-ue[im1][1] )+ dx1tx1*(ue[ip1][0]-2.0*ue[i][0]+ue[im1][0]); forcing[k][j][i][1] = forcing[k][j][i][1] - tx2 * ( (ue[ip1][1]*buf[ip1][1]+c2*(ue[ip1][4]-q[ip1]))- (ue[im1][1]*buf[im1][1]+c2*(ue[im1][4]-q[im1])))+ xxcon1*(buf[ip1][1]-2.0*buf[i][1]+buf[im1][1])+ dx2tx1*( ue[ip1][1]-2.0* ue[i][1]+ue[im1][1]); forcing[k][j][i][2] = forcing[k][j][i][2] - tx2 * ( ue[ip1][2]*buf[ip1][1]-ue[im1][2]*buf[im1][1])+ xxcon2*(buf[ip1][2]-2.0*buf[i][2]+buf[im1][2])+ dx3tx1*( ue[ip1][2]-2.0*ue[i][2] +ue[im1][2]); forcing[k][j][i][3] = forcing[k][j][i][3] - tx2*( ue[ip1][3]*buf[ip1][1]-ue[im1][3]*buf[im1][1])+ xxcon2*(buf[ip1][3]-2.0*buf[i][3]+buf[im1][3])+ dx4tx1*( ue[ip1][3]-2.0* ue[i][3]+ ue[im1][3]); forcing[k][j][i][4] = forcing[k][j][i][4] - tx2*( buf[ip1][1]*(c1*ue[ip1][4]-c2*q[ip1])- buf[im1][1]*(c1*ue[im1][4]-c2*q[im1]))+ 0.5*xxcon3*(buf[ip1][0]-2.0*buf[i][0]+ buf[im1][0])+ xxcon4*(cuf[ip1]-2.0*cuf[i]+cuf[im1])+ xxcon5*(buf[ip1][4]-2.0*buf[i][4]+buf[im1][4])+ dx5tx1*( ue[ip1][4]-2.0* ue[i][4]+ ue[im1][4]); } //--------------------------------------------------------------------- // Fourth-order dissipation //--------------------------------------------------------------------- for (m = 0; m < 5; m++) { i = 1; forcing[k][j][i][m] = forcing[k][j][i][m] - dssp * (5.0*ue[i][m] - 4.0*ue[i+1][m] +ue[i+2][m]); i = 2; forcing[k][j][i][m] = forcing[k][j][i][m] - dssp * (-4.0*ue[i-1][m] + 6.0*ue[i][m] - 4.0*ue[i+1][m] + ue[i+2][m]); } for (i = 3; i <= grid_points[0]-4; i++) { for (m = 0; m < 5; m++) { forcing[k][j][i][m] = forcing[k][j][i][m] - dssp* (ue[i-2][m] - 4.0*ue[i-1][m] + 6.0*ue[i][m] - 4.0*ue[i+1][m] + ue[i+2][m]); } } for (m = 0; m < 5; m++) { i = grid_points[0]-3; forcing[k][j][i][m] = forcing[k][j][i][m] - dssp * (ue[i-2][m] - 4.0*ue[i-1][m] + 6.0*ue[i][m] - 4.0*ue[i+1][m]); i = grid_points[0]-2; forcing[k][j][i][m] = forcing[k][j][i][m] - dssp * (ue[i-2][m] - 4.0*ue[i-1][m] + 5.0*ue[i][m]); } } } //--------------------------------------------------------------------- // eta-direction flux differences //--------------------------------------------------------------------- for (k = 1; k <= grid_points[2]-2; k++) { zeta = (double)(k) * dnzm1; for (i = 1; i <= grid_points[0]-2; i++) { xi = (double)(i) * dnxm1; for (j = 0; j <= grid_points[1]-1; j++) { eta = (double)(j) * dnym1; exact_solution(xi, eta, zeta, dtemp); for (m = 0; m < 5; m++) { ue[j][m] = dtemp[m]; } dtpp = 1.0/dtemp[0]; for (m = 1; m < 5; m++) { buf[j][m] = dtpp * dtemp[m]; } cuf[j] = buf[j][2] * buf[j][2]; buf[j][0] = cuf[j] + buf[j][1] * buf[j][1] + buf[j][3] * buf[j][3]; q[j] = 0.5*(buf[j][1]*ue[j][1] + buf[j][2]*ue[j][2] + buf[j][3]*ue[j][3]); } for (j = 1; j <= grid_points[1]-2; j++) { jm1 = j-1; jp1 = j+1; forcing[k][j][i][0] = forcing[k][j][i][0] - ty2*( ue[jp1][2]-ue[jm1][2] )+ dy1ty1*(ue[jp1][0]-2.0*ue[j][0]+ue[jm1][0]); forcing[k][j][i][1] = forcing[k][j][i][1] - ty2*( ue[jp1][1]*buf[jp1][2]-ue[jm1][1]*buf[jm1][2])+ yycon2*(buf[jp1][1]-2.0*buf[j][1]+buf[jm1][1])+ dy2ty1*( ue[jp1][1]-2.0* ue[j][1]+ ue[jm1][1]); forcing[k][j][i][2] = forcing[k][j][i][2] - ty2*( (ue[jp1][2]*buf[jp1][2]+c2*(ue[jp1][4]-q[jp1]))- (ue[jm1][2]*buf[jm1][2]+c2*(ue[jm1][4]-q[jm1])))+ yycon1*(buf[jp1][2]-2.0*buf[j][2]+buf[jm1][2])+ dy3ty1*( ue[jp1][2]-2.0*ue[j][2] +ue[jm1][2]); forcing[k][j][i][3] = forcing[k][j][i][3] - ty2*( ue[jp1][3]*buf[jp1][2]-ue[jm1][3]*buf[jm1][2])+ yycon2*(buf[jp1][3]-2.0*buf[j][3]+buf[jm1][3])+ dy4ty1*( ue[jp1][3]-2.0*ue[j][3]+ ue[jm1][3]); forcing[k][j][i][4] = forcing[k][j][i][4] - ty2*( buf[jp1][2]*(c1*ue[jp1][4]-c2*q[jp1])- buf[jm1][2]*(c1*ue[jm1][4]-c2*q[jm1]))+ 0.5*yycon3*(buf[jp1][0]-2.0*buf[j][0]+ buf[jm1][0])+ yycon4*(cuf[jp1]-2.0*cuf[j]+cuf[jm1])+ yycon5*(buf[jp1][4]-2.0*buf[j][4]+buf[jm1][4])+ dy5ty1*(ue[jp1][4]-2.0*ue[j][4]+ue[jm1][4]); } //--------------------------------------------------------------------- // Fourth-order dissipation //--------------------------------------------------------------------- for (m = 0; m < 5; m++) { j = 1; forcing[k][j][i][m] = forcing[k][j][i][m] - dssp * (5.0*ue[j][m] - 4.0*ue[j+1][m] +ue[j+2][m]); j = 2; forcing[k][j][i][m] = forcing[k][j][i][m] - dssp * (-4.0*ue[j-1][m] + 6.0*ue[j][m] - 4.0*ue[j+1][m] + ue[j+2][m]); } for (j = 3; j <= grid_points[1]-4; j++) { for (m = 0; m < 5; m++) { forcing[k][j][i][m] = forcing[k][j][i][m] - dssp* (ue[j-2][m] - 4.0*ue[j-1][m] + 6.0*ue[j][m] - 4.0*ue[j+1][m] + ue[j+2][m]); } } for (m = 0; m < 5; m++) { j = grid_points[1]-3; forcing[k][j][i][m] = forcing[k][j][i][m] - dssp * (ue[j-2][m] - 4.0*ue[j-1][m] + 6.0*ue[j][m] - 4.0*ue[j+1][m]); j = grid_points[1]-2; forcing[k][j][i][m] = forcing[k][j][i][m] - dssp * (ue[j-2][m] - 4.0*ue[j-1][m] + 5.0*ue[j][m]); } } } //--------------------------------------------------------------------- // zeta-direction flux differences //--------------------------------------------------------------------- for (j = 1; j <= grid_points[1]-2; j++) { eta = (double)(j) * dnym1; for (i = 1; i <= grid_points[0]-2; i++) { xi = (double)(i) * dnxm1; for (k = 0; k <= grid_points[2]-1; k++) { zeta = (double)(k) * dnzm1; exact_solution(xi, eta, zeta, dtemp); for (m = 0; m < 5; m++) { ue[k][m] = dtemp[m]; } dtpp = 1.0/dtemp[0]; for (m = 1; m < 5; m++) { buf[k][m] = dtpp * dtemp[m]; } cuf[k] = buf[k][3] * buf[k][3]; buf[k][0] = cuf[k] + buf[k][1] * buf[k][1] + buf[k][2] * buf[k][2]; q[k] = 0.5*(buf[k][1]*ue[k][1] + buf[k][2]*ue[k][2] + buf[k][3]*ue[k][3]); } for (k = 1; k <= grid_points[2]-2; k++) { km1 = k-1; kp1 = k+1; forcing[k][j][i][0] = forcing[k][j][i][0] - tz2*( ue[kp1][3]-ue[km1][3] )+ dz1tz1*(ue[kp1][0]-2.0*ue[k][0]+ue[km1][0]); forcing[k][j][i][1] = forcing[k][j][i][1] - tz2 * ( ue[kp1][1]*buf[kp1][3]-ue[km1][1]*buf[km1][3])+ zzcon2*(buf[kp1][1]-2.0*buf[k][1]+buf[km1][1])+ dz2tz1*( ue[kp1][1]-2.0* ue[k][1]+ ue[km1][1]); forcing[k][j][i][2] = forcing[k][j][i][2] - tz2 * ( ue[kp1][2]*buf[kp1][3]-ue[km1][2]*buf[km1][3])+ zzcon2*(buf[kp1][2]-2.0*buf[k][2]+buf[km1][2])+ dz3tz1*(ue[kp1][2]-2.0*ue[k][2]+ue[km1][2]); forcing[k][j][i][3] = forcing[k][j][i][3] - tz2 * ( (ue[kp1][3]*buf[kp1][3]+c2*(ue[kp1][4]-q[kp1]))- (ue[km1][3]*buf[km1][3]+c2*(ue[km1][4]-q[km1])))+ zzcon1*(buf[kp1][3]-2.0*buf[k][3]+buf[km1][3])+ dz4tz1*( ue[kp1][3]-2.0*ue[k][3] +ue[km1][3]); forcing[k][j][i][4] = forcing[k][j][i][4] - tz2 * ( buf[kp1][3]*(c1*ue[kp1][4]-c2*q[kp1])- buf[km1][3]*(c1*ue[km1][4]-c2*q[km1]))+ 0.5*zzcon3*(buf[kp1][0]-2.0*buf[k][0] +buf[km1][0])+ zzcon4*(cuf[kp1]-2.0*cuf[k]+cuf[km1])+ zzcon5*(buf[kp1][4]-2.0*buf[k][4]+buf[km1][4])+ dz5tz1*( ue[kp1][4]-2.0*ue[k][4]+ ue[km1][4]); } //--------------------------------------------------------------------- // Fourth-order dissipation //--------------------------------------------------------------------- for (m = 0; m < 5; m++) { k = 1; forcing[k][j][i][m] = forcing[k][j][i][m] - dssp * (5.0*ue[k][m] - 4.0*ue[k+1][m] +ue[k+2][m]); k = 2; forcing[k][j][i][m] = forcing[k][j][i][m] - dssp * (-4.0*ue[k-1][m] + 6.0*ue[k][m] - 4.0*ue[k+1][m] + ue[k+2][m]); } for (k = 3; k <= grid_points[2]-4; k++) { for (m = 0; m < 5; m++) { forcing[k][j][i][m] = forcing[k][j][i][m] - dssp* (ue[k-2][m] - 4.0*ue[k-1][m] + 6.0*ue[k][m] - 4.0*ue[k+1][m] + ue[k+2][m]); } } for (m = 0; m < 5; m++) { k = grid_points[2]-3; forcing[k][j][i][m] = forcing[k][j][i][m] - dssp * (ue[k-2][m] - 4.0*ue[k-1][m] + 6.0*ue[k][m] - 4.0*ue[k+1][m]); k = grid_points[2]-2; forcing[k][j][i][m] = forcing[k][j][i][m] - dssp * (ue[k-2][m] - 4.0*ue[k-1][m] + 5.0*ue[k][m]); } } } //--------------------------------------------------------------------- // now change the sign of the forcing function, //--------------------------------------------------------------------- for (k = 1; k <= grid_points[2]-2; k++) { for (j = 1; j <= grid_points[1]-2; j++) { for (i = 1; i <= grid_points[0]-2; i++) { for (m = 0; m < 5; m++) { forcing[k][j][i][m] = -1.0 * forcing[k][j][i][m]; } } } } }
int main() { int rank, processes, process; MPI_Status status; int master=0; int tag=123; int thread_count=1; MPI_Init(NULL,NULL); MPI_Comm_size(MPI_COMM_WORLD,&processes); MPI_Comm_rank(MPI_COMM_WORLD,&rank); // index: int i; int j; MPI_Request requests[processes]; double time1; double time2; double timeD; // Variables used to establish the convergence of the Jacobi iterations: double iteration_error=1.0; double tolerance=1E-15; int Max_Iter=1000000/processes; if(rank==0){ double d_sqrt = sqrt(processes); int i_sqrt = d_sqrt; if ( d_sqrt != i_sqrt){ fprintf(stderr,"Number of processes must be perfect square\n"); MPI_Finalize(); return 0; } } // Initialize Un arbitrarily to 0: // #pragma omp parallel for num_threads(thread_count) shared ( Un ) private ( i, j ) int sqpr=sqrt(processes); int m_iter=m/sqpr; int n_iter=n/sqpr; // create a 2d array for each processes and set the arrays to 0 double local_Un[m_iter][n_iter]; double local_Unp1[m_iter][n_iter]; //MPI: seperate the work between threads into processes number of 2d arrays // 0 1 2 3 // 4 5 6 7 // 8 9 10 11 // 12 13 14 15 for(i=0; i<m_iter; i++) { for(j=0; j<n_iter; j++){ local_Un[i][j]=0; }} // Impose the left and right boundary conditions: int local_column = rank%sqpr; int local_row = rank/sqpr; if(local_row==0){ for(i=0; i<n_iter; i++) { local_Un[0][i]= exact_solution(x(i+(local_column)*n_iter),c); } } if(local_row==(sqpr-1)){ for(i=0; i<n_iter; i++) { local_Un[m_iter-1][i]= exact_solution(x(i+(local_column)*n_iter),d); } } if(local_column==0){ for(j=0;j<m_iter;j++){ local_Un[j][0]= exact_solution(a,y(j+(local_row)*m_iter)); } } if(local_column==(sqpr-1)) { for(j=0;j<m_iter;j++){ local_Un[j][n_iter-1]= exact_solution(b,y(j+(local_row)*m_iter)); } } /* for(i=0;i<m_iter;i++){ local_Un[i][0]= exact_solution(x(0+(local_column)*m_iter),y(i+(local_row)*m_iter)); local_Un[i][n_iter-1]= exact_solution(x(n_iter-1+(local_column)*m_iter),y(i+(local_row)*m_iter)); } for(j=0;j<m_iter;j++){ local_Un[0][j]= exact_solution(x(j+(local_column)*m_iter),y(0+(local_row)*m_iter)); local_Un[n_iter-1][j]= exact_solution(x(j+(local_column)*m_iter),y(n_iter-1+(local_row)*m_iter)); } /*for(i=0;i<m_iter;i++){ for(j=0;j<n_iter;j++){ printf("[%d][%d]= %f",i+(rank/sqpr)*m_iter,j+(rank%sqpr)*m_iter,local_Un[i][j]); } printf("\n\n"); }*/ // Initialize the interation counter: int iteration_count; iteration_count=0; // Iterate until steady-state is reached. // Otherwise stops at Max_Iter iterations (to avoid infinite loops). // // Recall that we say that the steady-state is reached when the maximum difference between // two iterates is less than or equal to the tolerance, i.e. max|Unp1-Un| <= tolerance. time1=MPI_Wtime(); MPI_Barrier(MPI_COMM_WORLD); // { while(iteration_error> tolerance && iteration_count < Max_Iter){ { //iteration_count++; /* if(rank==0){ iteration_count++; if(iteration_count>10){ MPI_Bcast(&iteration_count,1,MPI_INT,0,MPI_COMM_WORLD); } }*/ iteration_count++; double upToDown[n_iter]; double downToUp[n_iter]; double leftToRight[m_iter]; double rightToLeft[m_iter]; // if(iteration_count % 1000 == 0) std::cout<<"iteration " << iteration_count << std::endl; //broadcast iteration count to all processes // Treat the left and right boundary conditions: if(local_row==0){ for(i=0; i<n_iter; i++) { local_Unp1[0][i]= exact_solution(x(i+(local_column)*n_iter),c); } } if(local_row==(sqpr-1)){ for(i=0; i<n_iter; i++) { local_Unp1[m_iter-1][i]= exact_solution(x(i+(local_column)*n_iter),d); } } if((local_column)!=(sqpr-1)){ for(i=0;i<n_iter;i++){ leftToRight[i]= exact_solution(x(m_iter-1+local_column*m_iter),y(i+local_row*m_iter)); } } if(local_column==0){ for(j=0;j<m_iter;j++){ local_Unp1[j][0]= exact_solution(a,y(j+(local_row)*m_iter)); } } if((local_row)!=(sqpr-1)){ for(i=0;i<n_iter;i++){ upToDown[i]= exact_solution(x(i+local_column*m_iter),y(m_iter-1+local_row*m_iter)); } } if(local_column==(sqpr-1)) { for(j=0;j<m_iter;j++){ local_Unp1[j][n_iter-1]= exact_solution(b,y(j+(local_row)*n_iter)); } } for(i=0;i<m_iter;i++){ local_Unp1[i][0]= exact_solution(x(0+(local_column)*m_iter),y(i+(local_row)*m_iter)); local_Unp1[i][n_iter-1]= exact_solution(x(n_iter-1+(local_column)*m_iter),y(i+(local_row)*m_iter)); } for(j=0;j<m_iter;j++){ local_Unp1[0][j]= exact_solution(x(j+(local_column)*m_iter),y(0+(local_row)*m_iter)); local_Unp1[n_iter-1][j]= exact_solution(x(j+(local_column)*m_iter),y(n_iter-1+(local_row)*m_iter)); } /* printf("rank is %d",rank); for(i=0;i<m_iter;i++){ for(j=0;j<n_iter;j++){ printf("[%d][%d]= %f",i+(rank/sqpr)*m_iter,j+(rank%sqpr)*m_iter,local_Un[i][j]); } printf("\n\n"); } */ // Treat the interior points using the update formula: //parallelize the update for loop and use scatter to spread the solutions for(i=1; i< m_iter-1; i++){ for(j=1; j< n_iter-1;j++){ local_Unp1[j][i]= 0.5*(dy2*(local_Un[j+1][i]+local_Un[j-1][i])+dx2*(local_Un[j][i+1]+local_Un[j][i-1]) - S(x(i+(local_column)*m_iter),y(j+(local_row)*n_iter))*dx2*dy2)*dxdyd; } } //send from up to Down //all 2d besides the top row and grap the ghost from above /* if((local_row)!=(sqpr-1)){ for(i=0;i<n_iter;i++){ // upToDown[i]= local_Unp1[m_iter-1][i]; } MPI_Send(upToDown,n_iter,MPI_DOUBLE,rank+sqpr,100,MPI_COMM_WORLD); } if((local_row)!=0){ MPI_Recv(upToDown,n_iter,MPI_DOUBLE,rank-sqpr,100,MPI_COMM_WORLD,&status); for(i=1;i<n_iter-1;i++){ local_Unp1[0][i]= 0.5*(dy2*(local_Un[0][i+1]+local_Un[0][i-1])+dx2*(local_Un[1][i]+upToDown[i]) - S(x(i+(local_column)*n_iter),y(0+(local_row)*m_iter))*dx2*dy2)*dxdyd; downToUp[i]= local_Unp1[0][i]; } downToUp[0]=local_Unp1[0][0]; downToUp[n_iter-1]=local_Unp1[0][n_iter-1]; MPI_Send(downToUp,n_iter,MPI_DOUBLE,rank-sqpr,100,MPI_COMM_WORLD); } //all arrays besides bottom get the ghost array from their underprocess if((local_row)!=(sqpr-1)){ MPI_Recv(downToUp,n_iter,MPI_DOUBLE,rank+sqpr,100,MPI_COMM_WORLD,&status); for(i=1;i<n_iter-1;i++){ local_Unp1[m_iter-1][i]= 0.5*(dy2*(local_Un[m_iter-1][i+1]+local_Un[m_iter-1][i-1])+dx2*(local_Un[m_iter-2][i]+downToUp[i]) - S(x(i+(local_column)*n_iter),y(m_iter-1+(local_row)*m_iter))*dx2*dy2)*dxdyd; } } ////////////left to right if((local_column)!=(sqpr-1)){ MPI_Send(leftToRight,n_iter,MPI_DOUBLE,rank+1,300,MPI_COMM_WORLD); } //all 2d besides the left side so we grab the ghopst from the left if((local_column)!=0){ MPI_Recv(leftToRight,n_iter,MPI_DOUBLE,rank-1,300,MPI_COMM_WORLD,&status); for(i=1;i<n_iter-1;i++){ local_Unp1[i][0]= 0.5*(dy2*(local_Un[i][1]+leftToRight[i])+dx2*(local_Un[i+1][0]+local_Un[i-1][0]) - S(x(0+(local_column)*n_iter),y(i+(local_row)*m_iter))*dx2*dy2)*dxdyd; rightToLeft[i]=local_Unp1[i][0]; // rightToLeft[i]=exact_solution(x(0+local_column*m_iter),y(i+local_row*m_iter)); } rightToLeft[0]=local_Unp1[0][0]; rightToLeft[n_iter-1]=local_Unp1[n_iter-1][0]; MPI_Send(rightToLeft,n_iter,MPI_DOUBLE,rank-1,400,MPI_COMM_WORLD); } //all 2d array besides the right if((local_column)!=(sqpr-1)){ MPI_Recv(rightToLeft,n_iter,MPI_DOUBLE,rank+1,400,MPI_COMM_WORLD,&status); for(i=1;i<n_iter-1;i++){ local_Unp1[i][m_iter-1]= 0.5*(dy2*(local_Un[i][m_iter-2]+rightToLeft[i])+dx2*(local_Un[i+1][m_iter-1]+local_Un[i-1][m_iter-1]) - S(x(m_iter-1+(local_column)*n_iter),y(i+(local_row)*m_iter))*dx2*dy2)*dxdyd; } } /////////// //work on the bot lef corner if(local_column!=0&& local_row!=(sqpr-1)){ local_Unp1[n_iter-1][0]= 0.5*(dy2*(local_Un[n_iter-1][1]+leftToRight[n_iter-1])+dx2*(local_Un[n_iter-2][0]+downToUp[0]) - S(x(0+(local_column)*n_iter),y(n_iter-1+(local_row)*m_iter))*dx2*dy2)*dxdyd; local_Unp1[n_iter-1][0]=exact_solution(x(0+(local_column)*m_iter),y(n_iter-1+(local_row)*m_iter)); } //bottomRight if(local_column!=(sqpr-1) && local_row!=(sqpr-1)){ local_Unp1[n_iter-1][m_iter-1]= 0.5*(dy2*(local_Un[n_iter-1][m_iter-2]+rightToLeft[n_iter-1])+dx2*(local_Un[n_iter-2][m_iter-1]+downToUp[m_iter-1]) - S(x(m_iter-1+(local_column)*n_iter),y(n_iter-1+(local_row)*m_iter))*dx2*dy2)*dxdyd; local_Unp1[n_iter-1][n_iter-1] = exact_solution(x(n_iter-1+(local_column)*m_iter),y(n_iter-1+(local_row)*m_iter)); } //top left if(local_column!=0 && local_row!=0){ local_Unp1[0][0]= 0.5*(dy2*(local_Un[0][1]+leftToRight[0])+dx2*(local_Un[1][0]+upToDown[0]) - S(x(0+(local_column)*n_iter),y(0+(local_row)*m_iter))*dx2*dy2)*dxdyd; local_Unp1[0][0] = exact_solution(x(0+(local_column)*m_iter),y(0+(local_row)*m_iter)); } //top right if(local_column!=(sqpr-1)&&local_row!=0){ local_Unp1[0][m_iter-1]= 0.5*(dy2*(local_Un[0][m_iter-2]+rightToLeft[0])+dx2*(local_Un[1][m_iter-1]+downToUp[m_iter-1]) - S(x(m_iter-1+(local_column)*n_iter),y(0+(local_row)*m_iter))*dx2*dy2)*dxdyd; local_Unp1[0][n_iter-1] = exact_solution(x(0+(local_column)*m_iter),y(n_iter-1+(local_row)*m_iter)); } //MPI_Waitall(processes,requests,MPI_STATUSES_IGNORE); // // Compute the maximum error between 2 iterates to establish whether or not // steady-state is reached: */ double my_iteration_error=0.0; for(i=0; i< m_iter-1; i++){ for(j=0;j<n_iter-1;j++){ double local_iteration_error = fabs(local_Unp1[i][j] - local_Un[i][j]); if (local_iteration_error > my_iteration_error) { // #pragma omp critical my_iteration_error = local_iteration_error; } } } if(rank==0){ if(iteration_error > my_iteration_error){ iteration_error=my_iteration_error; } for(i=1;i<processes;i++){ MPI_Recv(&my_iteration_error,1,MPI_DOUBLE,i,1,MPI_COMM_WORLD,&status); if(iteration_error > my_iteration_error){ iteration_error=my_iteration_error; } } } else{ MPI_Send(&my_iteration_error,1,MPI_DOUBLE,0,1,MPI_COMM_WORLD); } MPI_Bcast(&iteration_error,1,MPI_DOUBLE,0,MPI_COMM_WORLD); MPI_Barrier(MPI_COMM_WORLD); //Do I just make everything Isend then call wait?? // Prepare for the next iteration: //Wait for all the updates to finish then we can start preperation for(i=0; i< m_iter; i++){ for(j=0;j<n_iter;j++){ local_Un[i][j]=local_Unp1[i][j]; } } /* */ // if(iteration_count % 1000 == 0) std::cout<< "The error between two iterates is " << iteration_error << std::endl; } // std::cout<<Unp1[5][5]<<","<<exact_solution(x(5),y(5))<<std::endl; } //Done with While loop //MPI_Barrier(MPI_COMM_WORLD); time2=MPI_Wtime(); int ii; int jj; int counter=0; // Compute the maximum error between the computed and exact solutions: double solution_error=0.0; double my_solution_error=0.0; for(i=1; i< m_iter-1; i++){ for(j=1; j<n_iter-1;j++){ double local_solution_error=fabs(local_Unp1[j][i] - exact_solution(x(i+(local_column)*m_iter),y(j+(local_row)*n_iter))); if(rank==10){ std::cout<<"error of rank "<<rank<<" is "<<local_solution_error<<" "<<j<<i<<std::endl; } if (local_solution_error > my_solution_error) { // my_solution_error = local_solution_error;} } } if(rank==0){ std::cout<<"checking solution error in master "<<my_solution_error<<std::endl; solution_error=my_solution_error; for(i=1;i<processes;i++){ MPI_Recv(&my_solution_error,1,MPI_DOUBLE,i,2,MPI_COMM_WORLD,MPI_STATUSES_IGNORE); //std::cout<<"Rank "<<rank<<"error"<<my_solution_error<<std::endl; if(my_solution_error > solution_error){ solution_error=my_solution_error; } } } else{ MPI_Send(&my_solution_error,1,MPI_DOUBLE,0,2,MPI_COMM_WORLD); } // time2=omp_get_wtime(); timeD=time2-time1; double diff; if(rank==0){ // Output: printf("\n\n"); std::cout<< "-------------------------------------------------------" << std::endl; std::cout<< "SUMMARY:" << std::endl << std::endl; std::cout<< "The error between two iterates is " << iteration_error << std::endl << std::endl; std::cout<< "The maximum error in the solution is " << solution_error <<std::endl; std::cout<< "iteration count" <<iteration_count<<" per process"<<std::endl; std::cout<< "time taken " <<timeD<<std::endl; std::cout<< "-------------------------------------------------------" << std::endl << std::endl; } /* for(i=0; i< m; i++){ for(j=1;j<n-1;j++){ std::cout<<counter<<","<<Unp1[i][j]<<std::endl; counter++; } } */ MPI_Finalize(); return 0; }