// N is size of physical grid from which the sparse system is derived // diag is the value of each diagonal element of the sparse matrix // recipdiag is the reciprocal of each diagonal element // odiag is the value of each off-diagonal non-zero element in matrix // eps is the threshold for the convergence criterion // nprfreq is the number of iterations per output of currwnt residual // xnew and xold are used to hold the N*N guess solution vector components // resid holds the current residual // rhoinit, rhonew hold the initial and current residual error norms, resp. int main (int argc, char * argv[]) { double b[(N+2)*(N+2)]; double xold[(N+2)*(N+2)]; double xnew[(N+2)*(N+2)]; double resid[(N+2)*(N+2)]; double rhoinit,rhonew; int i,j,iter,u; omp_set_num_threads(atoi(argv[1])); init(xold,xnew,b); rhoinit = rhocalc(b); clkbegin = rtclock(); for(iter=0;iter<maxiter;iter++){ update(xold,xnew,resid,b); rhonew = rhocalc(resid); if(rhonew<eps){ clkend = rtclock(); t = clkend-clkbegin; printf("Solution converged in %d iterations\n",iter); printf("Final residual norm = %f\n",rhonew); printf("Solution at center and four corners of interior N/2 by N/2 grid : \n"); i=(N+2)/4; j=(N+2)/4; printf("xnew[%d][%d]=%f\n",i,j,xnew[i*(N+2)+j]); i=(N+2)/4; j=3*(N+2)/4; printf("xnew[%d][%d]=%f\n",i,j,xnew[i*(N+2)+j]); i=(N+1)/2; j=(N+1)/2; printf("xnew[%d][%d]=%f\n",i,j,xnew[i*(N+2)+j]); i=3*(N+2)/4; j=(N+2)/4; printf("xnew[%d][%d]=%f\n",i,j,xnew[i*(N+2)+j]); i=3*(N+2)/4; j=3*(N+2)/4; printf("xnew[%d][%d]=%f\n",i,j,xnew[i*(N+2)+j]); printf("Sequential Jacobi: Matrix Size = %d; %.1f GFLOPS; Time = %.3f sec; \n", N,13.0*1e-9*N*N*(iter+1)/t,t); break; } copy(xold,xnew); if((iter%nprfreq)==0) printf("Iter = %d Resid Norm = %f\n",iter,rhonew); } }
int main (int argc, char * argv[]) { double * b = malloc(sizeof(double)*(N+2)*(N+2)); double * xold = malloc(sizeof(double)*(N+2)*(N+2)); double * xnew = malloc(sizeof(double)*(N+2)*(N+2)); double * resid = malloc(sizeof(double)*(N+2)*(N+2)); double rhoinit,rhonew; int i,j,iter,u; init(xold,xnew,b); rhoinit = rhocalc(b); clkbegin = rtclock(); for(iter=0;iter<maxiter;iter++){ // This can of course not be parallelized. The number of iterations will be the same no matter how we divide the work up amongst the processors. update(xold,xnew,resid,b); rhonew = rhocalc(resid); if(rhonew<eps){ clkend = rtclock(); t = clkend-clkbegin; printf("Solution converged in %d iterations\n",iter); printf("Final residual norm = %f\n",rhonew); printf("Solution at center and four corners of interior N/2 by N/2 grid : \n"); i=(N+2)/4; j=(N+2)/4; printf("xnew[%d][%d]=%f\n",i,j,xnew[i*(N+2)+j]); i=(N+2)/4; j=3*(N+2)/4; printf("xnew[%d][%d]=%f\n",i,j,xnew[i*(N+2)+j]); i=(N+1)/2; j=(N+1)/2; printf("xnew[%d][%d]=%f\n",i,j,xnew[i*(N+2)+j]); i=3*(N+2)/4; j=(N+2)/4; printf("xnew[%d][%d]=%f\n",i,j,xnew[i*(N+2)+j]); i=3*(N+2)/4; j=3*(N+2)/4; printf("xnew[%d][%d]=%f\n",i,j,xnew[i*(N+2)+j]); printf("Sequential Jacobi: Matrix Size = %d; %.1f GFLOPS; Time = %.3f sec; \n", N,13.0*1e-9*N*N*(iter+1)/t,t); break; } copy(xold,xnew); if((iter%nprfreq)==0) printf("Iter = %d Resid Norm = %f\n",iter,rhonew); } }