Example #1
0
//     N is size of physical grid from which the sparse system is derived
//     diag is the value of each diagonal element of the sparse matrix
//     recipdiag is the reciprocal of each diagonal element 
//     odiag is the value of each off-diagonal non-zero element in matrix
//     eps is the threshold for the convergence criterion
//     nprfreq is the number of iterations per output of currwnt residual
//     xnew and xold are used to hold the N*N guess solution vector components
//     resid holds the current residual
//     rhoinit, rhonew hold the initial and current residual error norms, resp.
int main (int argc, char * argv[])
{
 double b[(N+2)*(N+2)];
 double xold[(N+2)*(N+2)];
 double xnew[(N+2)*(N+2)];
 double resid[(N+2)*(N+2)];
 double rhoinit,rhonew; 
 int i,j,iter,u;

 omp_set_num_threads(atoi(argv[1]));

 init(xold,xnew,b);
 rhoinit = rhocalc(b);

  clkbegin = rtclock();
 for(iter=0;iter<maxiter;iter++){
  update(xold,xnew,resid,b);
  rhonew = rhocalc(resid);
  if(rhonew<eps){
   clkend = rtclock();
   t = clkend-clkbegin;
   printf("Solution converged in %d iterations\n",iter);
   printf("Final residual norm = %f\n",rhonew);
   printf("Solution at center and four corners of interior N/2 by N/2 grid : \n");
   i=(N+2)/4; j=(N+2)/4; printf("xnew[%d][%d]=%f\n",i,j,xnew[i*(N+2)+j]);
   i=(N+2)/4; j=3*(N+2)/4; printf("xnew[%d][%d]=%f\n",i,j,xnew[i*(N+2)+j]);
   i=(N+1)/2; j=(N+1)/2; printf("xnew[%d][%d]=%f\n",i,j,xnew[i*(N+2)+j]);
   i=3*(N+2)/4; j=(N+2)/4; printf("xnew[%d][%d]=%f\n",i,j,xnew[i*(N+2)+j]);
   i=3*(N+2)/4; j=3*(N+2)/4; printf("xnew[%d][%d]=%f\n",i,j,xnew[i*(N+2)+j]);
   printf("Sequential Jacobi: Matrix Size = %d; %.1f GFLOPS; Time = %.3f sec; \n",
          N,13.0*1e-9*N*N*(iter+1)/t,t); 
   break;
  } 
  copy(xold,xnew);
  if((iter%nprfreq)==0)
    printf("Iter = %d Resid Norm = %f\n",iter,rhonew);
 }
} 
int main (int argc, char * argv[])
{
  double * b = malloc(sizeof(double)*(N+2)*(N+2));
  double * xold = malloc(sizeof(double)*(N+2)*(N+2));
  double * xnew = malloc(sizeof(double)*(N+2)*(N+2));
  double * resid = malloc(sizeof(double)*(N+2)*(N+2));
  double rhoinit,rhonew; 
  int i,j,iter,u;

  init(xold,xnew,b);
  rhoinit = rhocalc(b);

  clkbegin = rtclock();
  for(iter=0;iter<maxiter;iter++){
    // This can of course not be parallelized. The number of iterations will be the same no matter how we divide the work up amongst the processors.
    update(xold,xnew,resid,b);
    rhonew = rhocalc(resid);
    if(rhonew<eps){
      clkend = rtclock();
      t = clkend-clkbegin;
      printf("Solution converged in %d iterations\n",iter);
      printf("Final residual norm = %f\n",rhonew);
      printf("Solution at center and four corners of interior N/2 by N/2 grid : \n");
      i=(N+2)/4; j=(N+2)/4; printf("xnew[%d][%d]=%f\n",i,j,xnew[i*(N+2)+j]);
      i=(N+2)/4; j=3*(N+2)/4; printf("xnew[%d][%d]=%f\n",i,j,xnew[i*(N+2)+j]);
      i=(N+1)/2; j=(N+1)/2; printf("xnew[%d][%d]=%f\n",i,j,xnew[i*(N+2)+j]);
      i=3*(N+2)/4; j=(N+2)/4; printf("xnew[%d][%d]=%f\n",i,j,xnew[i*(N+2)+j]);
      i=3*(N+2)/4; j=3*(N+2)/4; printf("xnew[%d][%d]=%f\n",i,j,xnew[i*(N+2)+j]);
      printf("Sequential Jacobi: Matrix Size = %d; %.1f GFLOPS; Time = %.3f sec; \n",
	     N,13.0*1e-9*N*N*(iter+1)/t,t); 
      break;
    } 
    copy(xold,xnew);
    if((iter%nprfreq)==0)
      printf("Iter = %d Resid Norm = %f\n",iter,rhonew);
  }
}