Ejemplo n.º 1
0
Archivo: gmres.c Proyecto: LLNL/COGENT
HYPRE_Int
hypre_GMRESDestroy( void *gmres_vdata )
{
   hypre_GMRESData *gmres_data = gmres_vdata;
   hypre_GMRESFunctions *gmres_functions = gmres_data->functions;
   HYPRE_Int i;
 
   if (gmres_data)
   {
      if ( (gmres_data->logging>0) || (gmres_data->print_level) > 0 )
      {
         if ( (gmres_data -> norms) != NULL )
            hypre_TFreeF( gmres_data -> norms, gmres_functions );
      }
 
      if ( (gmres_data -> matvec_data) != NULL )
         (*(gmres_functions->MatvecDestroy))(gmres_data -> matvec_data);
 
      if ( (gmres_data -> r) != NULL )
         (*(gmres_functions->DestroyVector))(gmres_data -> r);
      if ( (gmres_data -> w) != NULL )
         (*(gmres_functions->DestroyVector))(gmres_data -> w);
      if ( (gmres_data -> w_2) != NULL )
         (*(gmres_functions->DestroyVector))(gmres_data -> w_2);


      if ( (gmres_data -> p) != NULL )
      {
         for (i = 0; i < (gmres_data -> k_dim+1); i++)
         {
            if ( (gmres_data -> p)[i] != NULL )
	       (*(gmres_functions->DestroyVector))( (gmres_data -> p) [i]);
         }
         hypre_TFreeF( gmres_data->p, gmres_functions );
      }
      hypre_TFreeF( gmres_data, gmres_functions );
      hypre_TFreeF( gmres_functions, gmres_functions );
   }
 
   return hypre_error_flag;
}
Ejemplo n.º 2
0
int
hypre_GMRESSolve(void  *gmres_vdata,
                 void  *A,
                 void  *b,
		 void  *x)
{
   hypre_GMRESData  *gmres_data   = gmres_vdata;
   hypre_GMRESFunctions *gmres_functions = gmres_data->functions;
   int 		     k_dim        = (gmres_data -> k_dim);
   int               min_iter     = (gmres_data -> min_iter);
   int 		     max_iter     = (gmres_data -> max_iter);
   int               rel_change   = (gmres_data -> rel_change);
   int 		     stop_crit    = (gmres_data -> stop_crit);
   double 	     accuracy     = (gmres_data -> tol);
   double 	     cf_tol       = (gmres_data -> cf_tol);
   void             *matvec_data  = (gmres_data -> matvec_data);

   void             *r            = (gmres_data -> r);
   void             *w            = (gmres_data -> w);
   void            **p            = (gmres_data -> p);

   int 	           (*precond)(void*, void*, void*, void*)   = (gmres_functions -> precond);
   int 	            *precond_data = (gmres_data -> precond_data);

   int             print_level    = (gmres_data -> print_level);
   int             logging        = (gmres_data -> logging);

   double         *norms          = (gmres_data -> norms);
/* not used yet   char           *log_file_name  = (gmres_data -> log_file_name);*/
/*   FILE           *fp; */
   
   int        ierr = 0;
   int        break_value = 0;
   int	      i, j, k;
   double     *rs, **hh, *c, *s;
   int        iter; 
   int        my_id, num_procs;
   double     epsilon, gamma, t, r_norm, b_norm, den_norm, x_norm;
   double     epsmac = 1.e-16; 
   double     ieee_check = 0.;

   double     guard_zero_residual; 
   double     cf_ave_0 = 0.0;
   double     cf_ave_1 = 0.0;
   double     weight;
   double     r_norm_0;
   double     relative_error;

   (gmres_data -> converged) = 0;
   /*-----------------------------------------------------------------------
    * With relative change convergence test on, it is possible to attempt
    * another iteration with a zero residual. This causes the parameter
    * alpha to go NaN. The guard_zero_residual parameter is to circumvent
    * this. Perhaps it should be set to something non-zero (but small).
    *-----------------------------------------------------------------------*/
   guard_zero_residual = 0.0;

   (*(gmres_functions->CommInfo))(A,&my_id,&num_procs);
   if ( logging>0 || print_level>0 )
   {
      norms          = (gmres_data -> norms);
      /* not used yet      log_file_name  = (gmres_data -> log_file_name);*/
      /* fp = fopen(log_file_name,"w"); */
   }

   /* initialize work arrays */
   rs = hypre_CTAllocF(double,k_dim+1,gmres_functions); 
   c = hypre_CTAllocF(double,k_dim,gmres_functions); 
   s = hypre_CTAllocF(double,k_dim,gmres_functions); 

   hh = hypre_CTAllocF(double*,k_dim+1,gmres_functions); 
   for (i=0; i < k_dim+1; i++)
   {	
   	hh[i] = hypre_CTAllocF(double,k_dim,gmres_functions); 
   }

   (*(gmres_functions->CopyVector))(b,p[0]);

   /* compute initial residual */
   (*(gmres_functions->Matvec))(matvec_data,-1.0, A, x, 1.0, p[0]);

   b_norm = sqrt((*(gmres_functions->InnerProd))(b,b));

   /* Since it is does not diminish performance, attempt to return an error flag
      and notify users when they supply bad input. */
   if (b_norm != 0.) ieee_check = b_norm/b_norm; /* INF -> NaN conversion */
   if (ieee_check != ieee_check)
   {
      /* ...INFs or NaNs in input can make ieee_check a NaN.  This test
         for ieee_check self-equality works on all IEEE-compliant compilers/
         machines, c.f. page 8 of "Lecture Notes on the Status of IEEE 754"
         by W. Kahan, May 31, 1996.  Currently (July 2002) this paper may be
         found at http://HTTP.CS.Berkeley.EDU/~wkahan/ieee754status/IEEE754.PDF */
      if (logging > 0 || print_level > 0)
      {
        printf("\n\nERROR detected by Hypre ... BEGIN\n");
        printf("ERROR -- hypre_GMRESSolve: INFs and/or NaNs detected in input.\n");
        printf("User probably placed non-numerics in supplied b.\n");
        printf("Returning error flag += 101.  Program not terminated.\n");
        printf("ERROR detected by Hypre ... END\n\n\n");
      }
      ierr += 101;
      return ierr;
   }

   r_norm = sqrt((*(gmres_functions->InnerProd))(p[0],p[0]));
   r_norm_0 = r_norm;

   /* Since it is does not diminish performance, attempt to return an error flag
      and notify users when they supply bad input. */
   if (r_norm != 0.) ieee_check = r_norm/r_norm; /* INF -> NaN conversion */
   if (ieee_check != ieee_check)
   {
      /* ...INFs or NaNs in input can make ieee_check a NaN.  This test
         for ieee_check self-equality works on all IEEE-compliant compilers/
         machines, c.f. page 8 of "Lecture Notes on the Status of IEEE 754"
         by W. Kahan, May 31, 1996.  Currently (July 2002) this paper may be
         found at http://HTTP.CS.Berkeley.EDU/~wkahan/ieee754status/IEEE754.PDF */
      if (logging > 0 || print_level > 0)
      {
        printf("\n\nERROR detected by Hypre ... BEGIN\n");
        printf("ERROR -- hypre_GMRESSolve: INFs and/or NaNs detected in input.\n");
        printf("User probably placed non-numerics in supplied A or x_0.\n");
        printf("Returning error flag += 101.  Program not terminated.\n");
        printf("ERROR detected by Hypre ... END\n\n\n");
      }
      ierr += 101;
      return ierr;
   }

   if ( logging>0 || print_level > 0)
   {
      norms[0] = r_norm;
      if ( print_level>1 && my_id == 0 )
      {
  	 printf("L2 norm of b: %e\n", b_norm);
         if (b_norm == 0.0)
            printf("Rel_resid_norm actually contains the residual norm\n");
         printf("Initial L2 norm of residual: %e\n", r_norm);
      
      }
   }
   iter = 0;

   if (b_norm > 0.0)
   {
/* convergence criterion |r_i|/|b| <= accuracy if |b| > 0 */
     den_norm= b_norm;
   }
   else
   {
/* convergence criterion |r_i|/|r0| <= accuracy if |b| = 0 */
     den_norm= r_norm;
   };

   epsilon= accuracy;

/* convergence criterion |r_i| <= accuracy , absolute residual norm*/
   if ( stop_crit && !rel_change )
      epsilon = accuracy;

   if ( print_level>1 && my_id == 0 )
   {
      if (b_norm > 0.0)
         {printf("=============================================\n\n");
          printf("Iters     resid.norm     conv.rate  rel.res.norm\n");
          printf("-----    ------------    ---------- ------------\n");
      
          }

      else
         {printf("=============================================\n\n");
          printf("Iters     resid.norm     conv.rate\n");
          printf("-----    ------------    ----------\n");
      
          };
   }

  /* set the relative_error to initially bypass the stopping criterion */
   if (rel_change)
   {
      relative_error= epsilon + 1.;
   }

   while (iter < max_iter)
   {
   /* initialize first term of hessenberg system */

	rs[0] = r_norm;
        if (r_norm == 0.0)
        {
           hypre_TFreeF(c,gmres_functions); 
           hypre_TFreeF(s,gmres_functions); 
           hypre_TFreeF(rs,gmres_functions);
           for (i=0; i < k_dim+1; i++) hypre_TFreeF(hh[i],gmres_functions);
           hypre_TFreeF(hh,gmres_functions); 
	   ierr = 0;
	   return ierr;
	}

	if (r_norm/den_norm <= epsilon && iter >= min_iter) 
        {
                if (rel_change)
                {
                   if (relative_error <= epsilon)
                   {
		      (*(gmres_functions->CopyVector))(b,r);
          	      (*(gmres_functions->Matvec))(matvec_data,-1.0,A,x,1.0,r);
		      r_norm = sqrt((*(gmres_functions->InnerProd))(r,r));
		      if (r_norm/den_norm <= epsilon)
                      {
                         if ( print_level>1 && my_id == 0)
                         {
                            printf("\n\n");
                            printf("Final L2 norm of residual: %e\n\n", r_norm);
                         }
                         break;
                      }
                      else
                      if ( print_level>0 && my_id == 0)
                           printf("false convergence 1\n");
                   }
                }
                else
                {
                   (*(gmres_functions->CopyVector))(b,r);
                   (*(gmres_functions->Matvec))(matvec_data,-1.0,A,x,1.0,r);
                   r_norm = sqrt((*(gmres_functions->InnerProd))(r,r));
                   if (r_norm/den_norm <= epsilon)
                   {
                       if ( print_level>1 && my_id == 0)
                       {
                            printf("\n\n");
                            printf("Final L2 norm of residual: %e\n\n", r_norm);
                       }
                       break;
                   }
                   else
                      if ( print_level>0 && my_id == 0)
                           printf("false convergence 1\n");
                }

	}

      	t = 1.0 / r_norm;
	(*(gmres_functions->ScaleVector))(t,p[0]);
	i = 0;
	while (i < k_dim && ( (r_norm/den_norm > epsilon || iter < min_iter)
                         || ((rel_change) && relative_error > epsilon) )
                         && iter < max_iter)
	{
		i++;
		iter++;
		(*(gmres_functions->ClearVector))(r);
		precond(precond_data, A, p[i-1], r);
		(*(gmres_functions->Matvec))(matvec_data, 1.0, A, r, 0.0, p[i]);
		/* modified Gram_Schmidt */
		for (j=0; j < i; j++)
		{
			hh[j][i-1] = (*(gmres_functions->InnerProd))(p[j],p[i]);
			(*(gmres_functions->Axpy))(-hh[j][i-1],p[j],p[i]);
		}
		t = sqrt((*(gmres_functions->InnerProd))(p[i],p[i]));
		hh[i][i-1] = t;	
		if (t != 0.0)
		{
			t = 1.0/t;
			(*(gmres_functions->ScaleVector))(t,p[i]);
		}
		/* done with modified Gram_schmidt and Arnoldi step.
		   update factorization of hh */
		for (j = 1; j < i; j++)
		{
			t = hh[j-1][i-1];
			hh[j-1][i-1] = c[j-1]*t + s[j-1]*hh[j][i-1];		
			hh[j][i-1] = -s[j-1]*t + c[j-1]*hh[j][i-1];
		}
		gamma = sqrt(hh[i-1][i-1]*hh[i-1][i-1] + hh[i][i-1]*hh[i][i-1]);
		if (gamma == 0.0) gamma = epsmac;
		c[i-1] = hh[i-1][i-1]/gamma;
		s[i-1] = hh[i][i-1]/gamma;
		rs[i] = -s[i-1]*rs[i-1];
		rs[i-1] = c[i-1]*rs[i-1];
		/* determine residual norm */
		hh[i-1][i-1] = c[i-1]*hh[i-1][i-1] + s[i-1]*hh[i][i-1];
		r_norm = fabs(rs[i]);
		if ( print_level>0 )
		{
		   norms[iter] = r_norm;
                   if ( print_level>1 && my_id == 0 )
   		   {
      		      if (b_norm > 0.0)
             	         printf("% 5d    %e    %f   %e\n", iter, 
				norms[iter],norms[iter]/norms[iter-1],
 	             		norms[iter]/b_norm);
      		      else
             	         printf("% 5d    %e    %f\n", iter, norms[iter],
				norms[iter]/norms[iter-1]);
   		   }
		}
                if (cf_tol > 0.0)
                {
                   cf_ave_0 = cf_ave_1;
           	   cf_ave_1 = pow( r_norm / r_norm_0, 1.0/(2.0*iter));

           	   weight   = fabs(cf_ave_1 - cf_ave_0);
           	   weight   = weight / hypre_max(cf_ave_1, cf_ave_0);
           	   weight   = 1.0 - weight;
#if 0
           	   printf("I = %d: cf_new = %e, cf_old = %e, weight = %e\n",
                	i, cf_ave_1, cf_ave_0, weight );
#endif
           	   if (weight * cf_ave_1 > cf_tol) 
		   {
		      break_value = 1;
		      break;
		   }
        	}

	}
	/* now compute solution, first solve upper triangular system */

	if (break_value) break;
	
	rs[i-1] = rs[i-1]/hh[i-1][i-1];
	for (k = i-2; k >= 0; k--)
	{
		t = rs[k];
		for (j = k+1; j < i; j++)
		{
			t -= hh[k][j]*rs[j];
		}
		rs[k] = t/hh[k][k];
	}
	
	(*(gmres_functions->CopyVector))(p[0],w);
	(*(gmres_functions->ScaleVector))(rs[0],w);
	for (j = 1; j < i; j++)
		(*(gmres_functions->Axpy))(rs[j], p[j], w);

	(*(gmres_functions->ClearVector))(r);
	precond(precond_data, A, w, r);

	(*(gmres_functions->Axpy))(1.0,r,x);

/* check for convergence, evaluate actual residual */
	if (r_norm/den_norm <= epsilon && iter >= min_iter) 
        {
                if (rel_change)
                {
                   x_norm = sqrt( (*(gmres_functions->InnerProd))(x,x) );
                   if ( x_norm<=guard_zero_residual ) break; /* don't divide by 0 */
		   r_norm = sqrt( (*(gmres_functions->InnerProd))(r,r) );
                   relative_error= r_norm/x_norm;
                }

		(*(gmres_functions->CopyVector))(b,r);
          	(*(gmres_functions->Matvec))(matvec_data,-1.0,A,x,1.0,r);
		r_norm = sqrt( (*(gmres_functions->InnerProd))(r,r) );
		if (r_norm/den_norm <= epsilon)
                {
                   if ( print_level>1 && my_id == 0 )
                   {
                      printf("\n\n");
                      printf("Final L2 norm of residual: %e\n\n", r_norm);
                   }
                   if (rel_change && r_norm > guard_zero_residual)
                      /* Also test on relative change of iterates, x_i - x_(i-1) */
                   {  /* At this point r = x_i - x_(i-1) */
                      x_norm = sqrt( (*(gmres_functions->InnerProd))(x,x) );
                      if ( x_norm<=guard_zero_residual ) break; /* don't divide by 0 */
                      if ( relative_error < epsilon )
                      {
                         (gmres_data -> converged) = 1;
                         break;
                      }
                   }
                   else
                   {
                      (gmres_data -> converged) = 1;
                      break;
                   }
                }
		else 
                {
                   if ( print_level>0 && my_id == 0)
                      printf("false convergence 2\n");
		   (*(gmres_functions->CopyVector))(r,p[0]);
		   i = 0;
		}
	}

/* compute residual vector and continue loop */

	for (j=i ; j > 0; j--)
	{
		rs[j-1] = -s[j-1]*rs[j];
		rs[j] = c[j-1]*rs[j];
	}

	if (i) (*(gmres_functions->Axpy))(rs[0]-1.0,p[0],p[0]);
	for (j=1; j < i+1; j++)
		(*(gmres_functions->Axpy))(rs[j],p[j],p[0]);	
   }

   if ( print_level>1 && my_id == 0 )
          printf("\n\n"); 

   (gmres_data -> num_iterations) = iter;
   if (b_norm > 0.0)
      (gmres_data -> rel_residual_norm) = r_norm/b_norm;
   if (b_norm == 0.0)
      (gmres_data -> rel_residual_norm) = r_norm;

   if (iter >= max_iter && r_norm/den_norm > epsilon) ierr = 1;

   hypre_TFreeF(c,gmres_functions); 
   hypre_TFreeF(s,gmres_functions); 
   hypre_TFreeF(rs,gmres_functions);
 
   for (i=0; i < k_dim+1; i++)
   {	
   	hypre_TFreeF(hh[i],gmres_functions);
   }
   hypre_TFreeF(hh,gmres_functions); 

   return ierr;
}
Ejemplo n.º 3
0
HYPRE_Int
hypre_LGMRESSolve(void  *lgmres_vdata,
                 void  *A,
                 void  *b,
		 void  *x)
{
   hypre_LGMRESData  *lgmres_data   = (hypre_LGMRESData *)lgmres_vdata;
   hypre_LGMRESFunctions *lgmres_functions = lgmres_data->functions;
   HYPRE_Int 		     k_dim        = (lgmres_data -> k_dim);
   HYPRE_Int               min_iter     = (lgmres_data -> min_iter);
   HYPRE_Int 		     max_iter     = (lgmres_data -> max_iter);
   HYPRE_Real 	     r_tol        = (lgmres_data -> tol);
   HYPRE_Real 	     cf_tol       = (lgmres_data -> cf_tol);
   HYPRE_Real        a_tol        = (lgmres_data -> a_tol);
   void             *matvec_data  = (lgmres_data -> matvec_data);

   void             *r            = (lgmres_data -> r);
   void             *w            = (lgmres_data -> w);
   

   void            **p            = (lgmres_data -> p);

   /* lgmres  mod*/
   void          **aug_vecs       = (lgmres_data ->aug_vecs);
   void          **a_aug_vecs     = (lgmres_data ->a_aug_vecs);
   HYPRE_Int            *aug_order      = (lgmres_data->aug_order);
   HYPRE_Int             aug_dim        = (lgmres_data -> aug_dim);
   HYPRE_Int             approx_constant=  (lgmres_data ->approx_constant);
   HYPRE_Int             it_arnoldi, aug_ct, it_total, ii, order, it_aug;
   HYPRE_Int             spot = 0;
   HYPRE_Real      tmp_norm, r_norm_last;
   /*---*/

   HYPRE_Int 	           (*precond)(void*,void*,void*,void*)   = (lgmres_functions -> precond);
   HYPRE_Int 	            *precond_data = (HYPRE_Int*)(lgmres_data -> precond_data);

   HYPRE_Int             print_level    = (lgmres_data -> print_level);
   HYPRE_Int             logging        = (lgmres_data -> logging);

   HYPRE_Real     *norms          = (lgmres_data -> norms);
   
   HYPRE_Int        break_value = 0;
   HYPRE_Int	      i, j, k;
   HYPRE_Real *rs, **hh, *c, *s; 
   HYPRE_Int        iter; 
   HYPRE_Int        my_id, num_procs;
   HYPRE_Real epsilon, gamma, t, r_norm, b_norm, den_norm;
   
   HYPRE_Real epsmac = 1.e-16; 
   HYPRE_Real ieee_check = 0.;

   HYPRE_Real cf_ave_0 = 0.0;
   HYPRE_Real cf_ave_1 = 0.0;
   HYPRE_Real weight;
   HYPRE_Real r_norm_0;


   /* We are not checking rel. change for now... */
   

   (lgmres_data -> converged) = 0;
   /*-----------------------------------------------------------------------
    * With relative change convergence test on, it is possible to attempt
    * another iteration with a zero residual. This causes the parameter
    * alpha to go NaN. The guard_zero_residual parameter is to circumvent
    * this. Perhaps it should be set to something non-zero (but small).
    *-----------------------------------------------------------------------*/

   (*(lgmres_functions->CommInfo))(A,&my_id,&num_procs);
   if ( logging>0 || print_level>0 )
   {
      norms          = (lgmres_data -> norms);
      /* not used yet      log_file_name  = (lgmres_data -> log_file_name);*/
      /* fp = fopen(log_file_name,"w"); */
   }

   /* initialize work arrays  - lgmres includes aug_dim*/
   rs = hypre_CTAllocF(HYPRE_Real,k_dim+1+aug_dim,lgmres_functions); 
   c = hypre_CTAllocF(HYPRE_Real,k_dim+aug_dim,lgmres_functions); 
   s = hypre_CTAllocF(HYPRE_Real,k_dim+aug_dim,lgmres_functions); 


   
  /* lgmres mod. - need non-modified hessenberg to avoid aug_dim matvecs */
   hh = hypre_CTAllocF(HYPRE_Real*,k_dim+aug_dim+1,lgmres_functions); 
   for (i=0; i < k_dim+aug_dim+1; i++)
   {	
   	hh[i] = hypre_CTAllocF(HYPRE_Real,k_dim+aug_dim,lgmres_functions); 
   }
   
   (*(lgmres_functions->CopyVector))(b,p[0]);

   /* compute initial residual */
   (*(lgmres_functions->Matvec))(matvec_data,-1.0, A, x, 1.0, p[0]);

   b_norm = sqrt((*(lgmres_functions->InnerProd))(b,b));

   /* Since it is does not diminish performance, attempt to return an error flag
      and notify users when they supply bad input. */
   if (b_norm != 0.) ieee_check = b_norm/b_norm; /* INF -> NaN conversion */
   if (ieee_check != ieee_check)
   {
      /* ...INFs or NaNs in input can make ieee_check a NaN.  This test
         for ieee_check self-equality works on all IEEE-compliant compilers/
         machines, c.f. page 8 of "Lecture Notes on the Status of IEEE 754"
         by W. Kahan, May 31, 1996.  Currently (July 2002) this paper may be
         found at http://HTTP.CS.Berkeley.EDU/~wkahan/ieee754status/IEEE754.PDF */
      if (logging > 0 || print_level > 0)
      {
        hypre_printf("\n\nERROR detected by Hypre ... BEGIN\n");
        hypre_printf("ERROR -- hypre_LGMRESSolve: INFs and/or NaNs detected in input.\n");
        hypre_printf("User probably placed non-numerics in supplied b.\n");
        hypre_printf("Returning error flag += 101.  Program not terminated.\n");
        hypre_printf("ERROR detected by Hypre ... END\n\n\n");
      }
      hypre_error(HYPRE_ERROR_GENERIC);
      return hypre_error_flag;
   }

   r_norm = sqrt((*(lgmres_functions->InnerProd))(p[0],p[0]));
   r_norm_0 = r_norm;

   /* Since it is does not diminish performance, attempt to return an error flag
      and notify users when they supply bad input. */
   if (r_norm != 0.) ieee_check = r_norm/r_norm; /* INF -> NaN conversion */
   if (ieee_check != ieee_check)
   {
      /* ...INFs or NaNs in input can make ieee_check a NaN.  This test
         for ieee_check self-equality works on all IEEE-compliant compilers/
         machines, c.f. page 8 of "Lecture Notes on the Status of IEEE 754"
         by W. Kahan, May 31, 1996.  Currently (July 2002) this paper may be
         found at http://HTTP.CS.Berkeley.EDU/~wkahan/ieee754status/IEEE754.PDF */
      if (logging > 0 || print_level > 0)
      {
        hypre_printf("\n\nERROR detected by Hypre ... BEGIN\n");
        hypre_printf("ERROR -- hypre_LGMRESSolve: INFs and/or NaNs detected in input.\n");
        hypre_printf("User probably placed non-numerics in supplied A or x_0.\n");
        hypre_printf("Returning error flag += 101.  Program not terminated.\n");
        hypre_printf("ERROR detected by Hypre ... END\n\n\n");
      }
      hypre_error(HYPRE_ERROR_GENERIC);
      return hypre_error_flag;
   }

   if ( logging>0 || print_level > 0)
   {
      norms[0] = r_norm;
      if ( print_level>1 && my_id == 0 )
      {
  	 hypre_printf("L2 norm of b: %e\n", b_norm);
         if (b_norm == 0.0)
            hypre_printf("Rel_resid_norm actually contains the residual norm\n");
         hypre_printf("Initial L2 norm of residual: %e\n", r_norm);
      
      }
   }
   iter = 0;

   if (b_norm > 0.0)
   {
/* convergence criterion |r_i|/|b| <= accuracy if |b| > 0 */
     den_norm= b_norm;
   }
   else
   {
/* convergence criterion |r_i|/|r0| <= accuracy if |b| = 0 */
     den_norm= r_norm;
   };

  /* convergence criteria: |r_i| <= max( a_tol, r_tol * den_norm)
      den_norm = |r_0| or |b|
      note: default for a_tol is 0.0, so relative residual criteria is used unless
            user specifies a_tol, or sets r_tol = 0.0, which means absolute
            tol only is checked  */
      
   epsilon = hypre_max(a_tol,r_tol*den_norm);
   
   /* so now our stop criteria is |r_i| <= epsilon */
 

   if ( print_level>1 && my_id == 0 )
   {
      if (b_norm > 0.0)
         {hypre_printf("=============================================\n\n");
          hypre_printf("Iters     resid.norm     conv.rate  rel.res.norm\n");
          hypre_printf("-----    ------------    ---------- ------------\n");
      
          }

      else
         {hypre_printf("=============================================\n\n");
          hypre_printf("Iters     resid.norm     conv.rate\n");
          hypre_printf("-----    ------------    ----------\n");
      
          };
   }

   
   
/*lgmres initialization */
   for (ii=0; ii<aug_dim; ii++) {
      aug_order[ii] = 0;
   }
   aug_ct = 0; /* number of aug. vectors available */



   /* outer iteration cycle */
   while (iter < max_iter)
   {
   /* initialize first term of hessenberg system */

	rs[0] = r_norm;
        if (r_norm == 0.0)
        {
           hypre_TFreeF(c,lgmres_functions); 
           hypre_TFreeF(s,lgmres_functions); 
           hypre_TFreeF(rs,lgmres_functions);
           for (i=0; i < k_dim+aug_dim+1; i++) {
              hypre_TFreeF(hh[i],lgmres_functions);
           }

           hypre_TFreeF(hh,lgmres_functions); 
	   return hypre_error_flag;
           
	}

        /* see if we are already converged and 
           should print the final norm and exit */
	if (r_norm <= epsilon && iter >= min_iter) 
        {
           (*(lgmres_functions->CopyVector))(b,r);
           (*(lgmres_functions->Matvec))(matvec_data,-1.0,A,x,1.0,r);
           r_norm = sqrt((*(lgmres_functions->InnerProd))(r,r));
           if (r_norm  <= epsilon)
           {
              if ( print_level>1 && my_id == 0)
              {
                 hypre_printf("\n\n");
                 hypre_printf("Final L2 norm of residual: %e\n\n", r_norm);
              }
              break;
              }
           else
              if ( print_level>0 && my_id == 0)
                 hypre_printf("false convergence 1\n");
           
	}
        
      	t = 1.0 / r_norm;
        r_norm_last = r_norm;
	
        (*(lgmres_functions->ScaleVector))(t,p[0]);
	i = 0;

        /* lgmres mod: determine number of arnoldi steps to take */
        /* if approx_constant then we keep the space the same size
           even if we don't have the full number of aug vectors yet*/
        if (approx_constant) {
           it_arnoldi = k_dim - aug_ct;
        } else {
           it_arnoldi = k_dim - aug_dim; 
        }
        it_total =  it_arnoldi + aug_ct;
        it_aug = 0; /* keep track of augmented iterations */


        /***RESTART CYCLE (right-preconditioning) ***/
        while (i < it_total && iter < max_iter)
	{
           i++;
           iter++;
           (*(lgmres_functions->ClearVector))(r);


           /*LGMRES_MOD: decide whether this is an arnoldi step or an aug step */ 
           if ( i <= it_arnoldi) 
           { /* Arnoldi */
              precond(precond_data, A, p[i-1], r);
              (*(lgmres_functions->Matvec))(matvec_data, 1.0, A, r, 0.0, p[i]);
           } else 
           { /*lgmres aug step */
              it_aug ++;
              order = i - it_arnoldi - 1; /* which aug step (note i starts at 1) - aug order number at 0*/ 
              for (ii=0; ii<aug_dim; ii++) 
              {
                 if (aug_order[ii] == order) 
                 {
                    spot = ii;
                    break; /* must have this because there will be duplicates before aug_ct = aug_dim */ 
                 }  
              }
              /* copy a_aug_vecs[spot] to p[i] */ 
              (*(lgmres_functions->CopyVector))(a_aug_vecs[spot],p[i]);
              
              /*note: an alternate implementation choice would be to only save the AUGVECS and
                not A_AUGVEC and then apply the PC here to the augvec */
           }
           /*---*/

           /* modified Gram_Schmidt */
           for (j=0; j < i; j++)
           {
              hh[j][i-1] = (*(lgmres_functions->InnerProd))(p[j],p[i]);
              (*(lgmres_functions->Axpy))(-hh[j][i-1],p[j],p[i]);
           }
           t = sqrt((*(lgmres_functions->InnerProd))(p[i],p[i]));
           hh[i][i-1] = t;	
           if (t != 0.0)
           {
              t = 1.0/t;
              (*(lgmres_functions->ScaleVector))(t,p[i]);
           }


           /* done with modified Gram_schmidt and Arnoldi step.
              update factorization of hh */
           for (j = 1; j < i; j++)
           {
              t = hh[j-1][i-1];
              hh[j-1][i-1] = s[j-1]*hh[j][i-1] + c[j-1]*t;
              hh[j][i-1] = -s[j-1]*t + c[j-1]*hh[j][i-1];
           }
           t= hh[i][i-1]*hh[i][i-1];
           t+= hh[i-1][i-1]*hh[i-1][i-1];
           gamma = sqrt(t);
           if (gamma == 0.0) gamma = epsmac;
           c[i-1] = hh[i-1][i-1]/gamma;
           s[i-1] = hh[i][i-1]/gamma;
           rs[i] = -hh[i][i-1]*rs[i-1];
           rs[i]/=  gamma;
           rs[i-1] = c[i-1]*rs[i-1];
           /* determine residual norm */
           hh[i-1][i-1] = s[i-1]*hh[i][i-1] + c[i-1]*hh[i-1][i-1];
           r_norm = fabs(rs[i]);

           /* print ? */
           if ( print_level>0 )
           {
              norms[iter] = r_norm;
              if ( print_level>1 && my_id == 0 )
              {
                 if (b_norm > 0.0)
                    hypre_printf("% 5d    %e    %f   %e\n", iter, 
                           norms[iter],norms[iter]/norms[iter-1],
                           norms[iter]/b_norm);
                 else
                    hypre_printf("% 5d    %e    %f\n", iter, norms[iter],
                           norms[iter]/norms[iter-1]);
              }
           }
           /*convergence factor tolerance */
           if (cf_tol > 0.0)
           {
              cf_ave_0 = cf_ave_1;
              cf_ave_1 = pow( r_norm / r_norm_0, 1.0/(2.0*iter));
              
              weight   = fabs(cf_ave_1 - cf_ave_0);
              weight   = weight / hypre_max(cf_ave_1, cf_ave_0);
              weight   = 1.0 - weight;
#if 0
              hypre_printf("I = %d: cf_new = %e, cf_old = %e, weight = %e\n",
                     i, cf_ave_1, cf_ave_0, weight );
#endif
              if (weight * cf_ave_1 > cf_tol) 
              {
                 break_value = 1;
                 break;
              }
           }
           /* should we exit the restart cycle? (conv. check) */
           if (r_norm <= epsilon && iter >= min_iter)
           {
                 break;
           }
           

	} /*** end of restart cycle ***/

	/* now compute solution, first solve upper triangular system */

	if (break_value) break;
	
	rs[i-1] = rs[i-1]/hh[i-1][i-1];
	for (k = i-2; k >= 0; k--)
	{
           t = 0.0;
           for (j = k+1; j < i; j++)
           {
              t -= hh[k][j]*rs[j];
           }
           t+= rs[k];
           rs[k] = t/hh[k][k];
	}
        /* form linear combination of p's to get solution */
        /* put the new aug_vector in aug_vecs[aug_dim]  - a temp position*/	
        /* i = number of iterations */  
        /* it_aug = number of augmented iterations */ 
        /* it_arnoldi = number of arnoldi iterations */


        /*check if exited early before all arnoldi its */
        if (it_arnoldi > i) it_arnoldi = i; 


        if (!it_aug)
        {
           (*(lgmres_functions->CopyVector))(p[i-1],w);
           (*(lgmres_functions->ScaleVector))(rs[i-1],w);
           for (j = i-2; j >=0; j--)
              (*(lgmres_functions->Axpy))(rs[j], p[j], w);
        }
        else /* need some of the augvecs */
        {
           (*(lgmres_functions->CopyVector))(p[0],w);
           (*(lgmres_functions->ScaleVector))(rs[0],w);

           /* reg. arnoldi directions */  
           for (j = 1; j < it_arnoldi; j++) /*first one already done */
           {
              (*(lgmres_functions->Axpy))(rs[j], p[j], w);
           }
            
           /* augment directions */
           for (ii=0; ii<it_aug; ii++) 
           {
              for (j=0; j<aug_dim; j++) 
              {
                 if (aug_order[j] == ii) 
                 {
                    spot = j;
                    break; /* must have this because there will be
                            * duplicates before aug_ct = aug_dim */ 
                 }  
              }
              (*(lgmres_functions->Axpy))(rs[it_arnoldi+ii], aug_vecs[spot], w);
           }
        }


        /* grab the new aug vector before the prec*/
        (*(lgmres_functions->CopyVector))(w,aug_vecs[aug_dim]);
        
	(*(lgmres_functions->ClearVector))(r);
	/* find correction (in r) (un-wind precond.)*/
        precond(precond_data, A, w, r);

        /* update current solution x (in x) */
	(*(lgmres_functions->Axpy))(1.0,r,x);
         

        /* check for convergence by evaluating the actual residual */
	if (r_norm <= epsilon && iter >= min_iter) 
        {
           /* calculate actual residual norm*/
           (*(lgmres_functions->CopyVector))(b,r);
           (*(lgmres_functions->Matvec))(matvec_data,-1.0,A,x,1.0,r);
           r_norm = sqrt( (*(lgmres_functions->InnerProd))(r,r) );
           
           if (r_norm <= epsilon)
           {
              if ( print_level>1 && my_id == 0 )
              {
                 hypre_printf("\n\n");
                 hypre_printf("Final L2 norm of residual: %e\n\n", r_norm);
              }
              (lgmres_data -> converged) = 1;
              break;
           }
           else /* conv. has not occurred, according to true residual */ 
           {
              if ( print_level>0 && my_id == 0)
                 hypre_printf("false convergence 2\n");
              (*(lgmres_functions->CopyVector))(r,p[0]);
              i = 0;
           }
	} /* end of convergence check */
        
        /* compute residual vector and continue loop */

        /* copy r0 (not scaled) to w*/  
	(*(lgmres_functions->CopyVector))(p[0],w);
	(*(lgmres_functions->ScaleVector))(r_norm_last,w);


	for (j=i ; j > 0; j--)
	{
           rs[j-1] = -s[j-1]*rs[j];
           rs[j] = c[j-1]*rs[j];
	}
        
        if (i) (*(lgmres_functions->Axpy))(rs[i]-1.0,p[i],p[i]);
        for (j=i-1 ; j > 0; j--)
           (*(lgmres_functions->Axpy))(rs[j],p[j],p[i]);
        
        if (i)
        {
           (*(lgmres_functions->Axpy))(rs[0]-1.0,p[0],p[0]);
           (*(lgmres_functions->Axpy))(1.0,p[i],p[0]);
        }

        /* lgmres mod */  
        /* collect aug vector and A*augvector for future restarts -
           only if we will be restarting (i.e. this cycle performed it_total
           iterations). ordering starts at 0.*/
        if (aug_dim > 0) 
        {
           if (!aug_ct) 
           {
              spot = 0;
              aug_ct++;
           } 
           else if (aug_ct < aug_dim) 
           {
              spot = aug_ct;
              aug_ct++;
           } 
           else 
           { /* truncate - already have aug_dim number of vectors*/
              for (ii=0; ii<aug_dim; ii++) 
              {
                 if (aug_order[ii] == (aug_dim-1)) 
                 {
                    spot = ii;
                 }  
              }
           } 
           /* aug_vecs[aug_dim] contains new aug vector */
           (*(lgmres_functions->CopyVector))(aug_vecs[aug_dim], aug_vecs[spot]);
           /*need to normalize */
           tmp_norm = sqrt((*(lgmres_functions->InnerProd))(aug_vecs[spot], aug_vecs[spot]));
           
           tmp_norm = 1.0/tmp_norm;
           (*(lgmres_functions->ScaleVector))(tmp_norm ,aug_vecs[spot]);
           
           /*set new aug vector to order 0  - move all others back one */
           for (ii=0; ii < aug_dim; ii++) 
           {
              aug_order[ii]++;
           } 
           aug_order[spot] = 0; 

           /*now add the A*aug vector to A_AUGVEC(spot) - this is
            * independ. of preconditioning type*/
           /* A*augvec = V*H*y  = r0-rm   (r0 is in w and rm is in p[0])*/
           (*(lgmres_functions->CopyVector))( w, a_aug_vecs[spot]);
           (*(lgmres_functions->ScaleVector))(- 1.0, a_aug_vecs[spot]); /* -r0*/
           (*(lgmres_functions->Axpy))(1.0, p[0],a_aug_vecs[spot]); /* rm - r0 */
           (*(lgmres_functions->ScaleVector))(-tmp_norm, a_aug_vecs[spot]); /* r0-rm /norm */
           
        }
        
   } /* END of iteration while loop */
        

   if ( print_level>1 && my_id == 0 )
          hypre_printf("\n\n"); 

   (lgmres_data -> num_iterations) = iter;
   if (b_norm > 0.0)
      (lgmres_data -> rel_residual_norm) = r_norm/b_norm;
   if (b_norm == 0.0)
      (lgmres_data -> rel_residual_norm) = r_norm;

   if (iter >= max_iter && r_norm > epsilon) hypre_error(HYPRE_ERROR_CONV);
   

   hypre_TFreeF(c,lgmres_functions); 
   hypre_TFreeF(s,lgmres_functions); 
   hypre_TFreeF(rs,lgmres_functions);

   for (i=0; i < k_dim+1+aug_dim; i++)
   {	
   	hypre_TFreeF(hh[i],lgmres_functions);
   }
   hypre_TFreeF(hh,lgmres_functions); 

   return hypre_error_flag;
}
Ejemplo n.º 4
0
HYPRE_Int
hypre_LGMRESDestroy( void *lgmres_vdata )
{
	hypre_LGMRESData *lgmres_data = (hypre_LGMRESData *)lgmres_vdata;
   HYPRE_Int i;
 
   if (lgmres_data)
   {
      hypre_LGMRESFunctions *lgmres_functions = lgmres_data->functions;
      if ( (lgmres_data->logging>0) || (lgmres_data->print_level) > 0 )
      {
         if ( (lgmres_data -> norms) != NULL )
            hypre_TFreeF( lgmres_data -> norms, lgmres_functions );
      }
 
      if ( (lgmres_data -> matvec_data) != NULL )
         (*(lgmres_functions->MatvecDestroy))(lgmres_data -> matvec_data);
 
      if ( (lgmres_data -> r) != NULL )
         (*(lgmres_functions->DestroyVector))(lgmres_data -> r);
      if ( (lgmres_data -> w) != NULL )
         (*(lgmres_functions->DestroyVector))(lgmres_data -> w);
      if ( (lgmres_data -> w_2) != NULL )
         (*(lgmres_functions->DestroyVector))(lgmres_data -> w_2);


      if ( (lgmres_data -> p) != NULL )
      {
         for (i = 0; i < (lgmres_data -> k_dim+1); i++)
         {
            if ( (lgmres_data -> p)[i] != NULL )
	       (*(lgmres_functions->DestroyVector))( (lgmres_data -> p) [i]);
         }
         hypre_TFreeF( lgmres_data->p, lgmres_functions );
      }

      /* lgmres mod */
      if ( (lgmres_data -> aug_vecs) != NULL )
      {
         for (i = 0; i < (lgmres_data -> aug_dim + 1); i++)
         {
            if ( (lgmres_data -> aug_vecs)[i] != NULL )
	       (*(lgmres_functions->DestroyVector))( (lgmres_data -> aug_vecs) [i]);
         }
         hypre_TFreeF( lgmres_data->aug_vecs, lgmres_functions );
      }
      if ( (lgmres_data -> a_aug_vecs) != NULL )
      {
         for (i = 0; i < (lgmres_data -> aug_dim); i++)
         {
            if ( (lgmres_data -> a_aug_vecs)[i] != NULL )
	       (*(lgmres_functions->DestroyVector))( (lgmres_data -> a_aug_vecs) [i]);
         }
         hypre_TFreeF( lgmres_data->a_aug_vecs, lgmres_functions );
      }
      /*---*/

      hypre_TFreeF(lgmres_data->aug_order, lgmres_functions);
      


      hypre_TFreeF( lgmres_data, lgmres_functions );
      hypre_TFreeF( lgmres_functions, lgmres_functions );
   }
 
   return hypre_error_flag;
}
Ejemplo n.º 5
0
Archivo: gmres.c Proyecto: LLNL/COGENT
HYPRE_Int
hypre_GMRESSolve(void  *gmres_vdata,
                 void  *A,
                 void  *b,
		 void  *x)
{
   hypre_GMRESData  *gmres_data   = gmres_vdata;
   hypre_GMRESFunctions *gmres_functions = gmres_data->functions;
   HYPRE_Int 		     k_dim        = (gmres_data -> k_dim);
   HYPRE_Int               min_iter     = (gmres_data -> min_iter);
   HYPRE_Int 		     max_iter     = (gmres_data -> max_iter);
   HYPRE_Int               rel_change   = (gmres_data -> rel_change);
   HYPRE_Int         skip_real_r_check  = (gmres_data -> skip_real_r_check);
   double 	     r_tol        = (gmres_data -> tol);
   double 	     cf_tol       = (gmres_data -> cf_tol);
   double            a_tol        = (gmres_data -> a_tol);
   void             *matvec_data  = (gmres_data -> matvec_data);

   void             *r            = (gmres_data -> r);
   void             *w            = (gmres_data -> w);
   /* note: w_2 is only allocated if rel_change = 1 */
   void             *w_2          = (gmres_data -> w_2); 

   void            **p            = (gmres_data -> p);


   HYPRE_Int 	           (*precond)()   = (gmres_functions -> precond);
   HYPRE_Int 	            *precond_data = (gmres_data -> precond_data);

   HYPRE_Int             print_level    = (gmres_data -> print_level);
   HYPRE_Int             logging        = (gmres_data -> logging);

   double         *norms          = (gmres_data -> norms);
/* not used yet   char           *log_file_name  = (gmres_data -> log_file_name);*/
/*   FILE           *fp; */
   
   HYPRE_Int        break_value = 0;
   HYPRE_Int	      i, j, k;
   double     *rs, **hh, *c, *s, *rs_2; 
   HYPRE_Int        iter; 
   HYPRE_Int        my_id, num_procs;
   double     epsilon, gamma, t, r_norm, b_norm, den_norm, x_norm;
   double     w_norm;
   
   double     epsmac = 1.e-16; 
   double     ieee_check = 0.;

   double     guard_zero_residual; 
   double     cf_ave_0 = 0.0;
   double     cf_ave_1 = 0.0;
   double     weight;
   double     r_norm_0;
   double     relative_error = 1.0;

   HYPRE_Int        rel_change_passed = 0, num_rel_change_check = 0;

   double     real_r_norm_old, real_r_norm_new;

   (gmres_data -> converged) = 0;
   /*-----------------------------------------------------------------------
    * With relative change convergence test on, it is possible to attempt
    * another iteration with a zero residual. This causes the parameter
    * alpha to go NaN. The guard_zero_residual parameter is to circumvent
    * this. Perhaps it should be set to something non-zero (but small).
    *-----------------------------------------------------------------------*/
   guard_zero_residual = 0.0;

   (*(gmres_functions->CommInfo))(A,&my_id,&num_procs);
   if ( logging>0 || print_level>0 )
   {
      norms          = (gmres_data -> norms);
   }

   /* initialize work arrays */
   rs = hypre_CTAllocF(double,k_dim+1,gmres_functions); 
   c = hypre_CTAllocF(double,k_dim,gmres_functions); 
   s = hypre_CTAllocF(double,k_dim,gmres_functions); 
   if (rel_change) rs_2 = hypre_CTAllocF(double,k_dim+1,gmres_functions); 
   


   hh = hypre_CTAllocF(double*,k_dim+1,gmres_functions); 
   for (i=0; i < k_dim+1; i++)
   {	
   	hh[i] = hypre_CTAllocF(double,k_dim,gmres_functions); 
   }

   (*(gmres_functions->CopyVector))(b,p[0]);

   /* compute initial residual */
   (*(gmres_functions->Matvec))(matvec_data,-1.0, A, x, 1.0, p[0]);

   b_norm = sqrt((*(gmres_functions->InnerProd))(b,b));
   real_r_norm_old = b_norm;

   /* Since it is does not diminish performance, attempt to return an error flag
      and notify users when they supply bad input. */
   if (b_norm != 0.) ieee_check = b_norm/b_norm; /* INF -> NaN conversion */
   if (ieee_check != ieee_check)
   {
      /* ...INFs or NaNs in input can make ieee_check a NaN.  This test
         for ieee_check self-equality works on all IEEE-compliant compilers/
         machines, c.f. page 8 of "Lecture Notes on the Status of IEEE 754"
         by W. Kahan, May 31, 1996.  Currently (July 2002) this paper may be
         found at http://HTTP.CS.Berkeley.EDU/~wkahan/ieee754status/IEEE754.PDF */
      if (logging > 0 || print_level > 0)
      {
        hypre_printf("\n\nERROR detected by Hypre ... BEGIN\n");
        hypre_printf("ERROR -- hypre_GMRESSolve: INFs and/or NaNs detected in input.\n");
        hypre_printf("User probably placed non-numerics in supplied b.\n");
        hypre_printf("Returning error flag += 101.  Program not terminated.\n");
        hypre_printf("ERROR detected by Hypre ... END\n\n\n");
      }
      hypre_error(HYPRE_ERROR_GENERIC);
      return hypre_error_flag;
   }

   r_norm = sqrt((*(gmres_functions->InnerProd))(p[0],p[0]));
   r_norm_0 = r_norm;

   /* Since it is does not diminish performance, attempt to return an error flag
      and notify users when they supply bad input. */
   if (r_norm != 0.) ieee_check = r_norm/r_norm; /* INF -> NaN conversion */
   if (ieee_check != ieee_check)
   {
      /* ...INFs or NaNs in input can make ieee_check a NaN.  This test
         for ieee_check self-equality works on all IEEE-compliant compilers/
         machines, c.f. page 8 of "Lecture Notes on the Status of IEEE 754"
         by W. Kahan, May 31, 1996.  Currently (July 2002) this paper may be
         found at http://HTTP.CS.Berkeley.EDU/~wkahan/ieee754status/IEEE754.PDF */
      if (logging > 0 || print_level > 0)
      {
        hypre_printf("\n\nERROR detected by Hypre ... BEGIN\n");
        hypre_printf("ERROR -- hypre_GMRESSolve: INFs and/or NaNs detected in input.\n");
        hypre_printf("User probably placed non-numerics in supplied A or x_0.\n");
        hypre_printf("Returning error flag += 101.  Program not terminated.\n");
        hypre_printf("ERROR detected by Hypre ... END\n\n\n");
      }
      hypre_error(HYPRE_ERROR_GENERIC);
      return hypre_error_flag;
   }

   if ( logging>0 || print_level > 0)
   {
      norms[0] = r_norm;
      if ( print_level>1 && my_id == 0 )
      {
  	 hypre_printf("L2 norm of b: %e\n", b_norm);
         if (b_norm == 0.0)
            hypre_printf("Rel_resid_norm actually contains the residual norm\n");
         hypre_printf("Initial L2 norm of residual: %e\n", r_norm);
      
      }
   }
   iter = 0;

   if (b_norm > 0.0)
   {
     /* convergence criterion |r_i|/|b| <= accuracy if |b| > 0 */
     den_norm= b_norm;
   }
   else
   {
     /* convergence criterion |r_i|/|r0| <= accuracy if |b| = 0 */
     den_norm= r_norm;
   };


   /* convergence criteria: |r_i| <= max( a_tol, r_tol * den_norm)
      den_norm = |r_0| or |b|
      note: default for a_tol is 0.0, so relative residual criteria is used unless
            user specifies a_tol, or sets r_tol = 0.0, which means absolute
            tol only is checked  */
      
   epsilon = hypre_max(a_tol,r_tol*den_norm);
   
   /* so now our stop criteria is |r_i| <= epsilon */

   if ( print_level>1 && my_id == 0 )
   {
      if (b_norm > 0.0)
         {hypre_printf("=============================================\n\n");
          hypre_printf("Iters     resid.norm     conv.rate  rel.res.norm\n");
          hypre_printf("-----    ------------    ---------- ------------\n");
      
          }

      else
         {hypre_printf("=============================================\n\n");
          hypre_printf("Iters     resid.norm     conv.rate\n");
          hypre_printf("-----    ------------    ----------\n");
      
          };
   }


   /* once the rel. change check has passed, we do not want to check it again */
   rel_change_passed = 0;


   /* outer iteration cycle */
   while (iter < max_iter)
   {
   /* initialize first term of hessenberg system */

	rs[0] = r_norm;
        if (r_norm == 0.0)
        {
           hypre_TFreeF(c,gmres_functions); 
           hypre_TFreeF(s,gmres_functions); 
           hypre_TFreeF(rs,gmres_functions);
           if (rel_change)  hypre_TFreeF(rs_2,gmres_functions);
           for (i=0; i < k_dim+1; i++) hypre_TFreeF(hh[i],gmres_functions);
           hypre_TFreeF(hh,gmres_functions); 
	   return hypre_error_flag;
           
	}

        /* see if we are already converged and 
           should print the final norm and exit */
	if (r_norm  <= epsilon && iter >= min_iter) 
        {
           if (!rel_change) /* shouldn't exit after no iterations if
                             * relative change is on*/
           {
              (*(gmres_functions->CopyVector))(b,r);
              (*(gmres_functions->Matvec))(matvec_data,-1.0,A,x,1.0,r);
              r_norm = sqrt((*(gmres_functions->InnerProd))(r,r));
              if (r_norm  <= epsilon)
              {
                 if ( print_level>1 && my_id == 0)
                 {
                    hypre_printf("\n\n");
                    hypre_printf("Final L2 norm of residual: %e\n\n", r_norm);
                 }
                 break;
              }
              else
                 if ( print_level>0 && my_id == 0)
                    hypre_printf("false convergence 1\n");
           }
	}

      
        
      	t = 1.0 / r_norm;
	(*(gmres_functions->ScaleVector))(t,p[0]);
	i = 0;

        /***RESTART CYCLE (right-preconditioning) ***/
        while (i < k_dim && iter < max_iter)
	{
           i++;
           iter++;
           (*(gmres_functions->ClearVector))(r);
           precond(precond_data, A, p[i-1], r);
           (*(gmres_functions->Matvec))(matvec_data, 1.0, A, r, 0.0, p[i]);
           /* modified Gram_Schmidt */
           for (j=0; j < i; j++)
           {
              hh[j][i-1] = (*(gmres_functions->InnerProd))(p[j],p[i]);
              (*(gmres_functions->Axpy))(-hh[j][i-1],p[j],p[i]);
           }
           t = sqrt((*(gmres_functions->InnerProd))(p[i],p[i]));
           hh[i][i-1] = t;	
           if (t != 0.0)
           {
              t = 1.0/t;
              (*(gmres_functions->ScaleVector))(t,p[i]);
           }
           /* done with modified Gram_schmidt and Arnoldi step.
              update factorization of hh */
           for (j = 1; j < i; j++)
           {
              t = hh[j-1][i-1];
              hh[j-1][i-1] = s[j-1]*hh[j][i-1] + c[j-1]*t;
              hh[j][i-1] = -s[j-1]*t + c[j-1]*hh[j][i-1];
           }
           t= hh[i][i-1]*hh[i][i-1];
           t+= hh[i-1][i-1]*hh[i-1][i-1];
           gamma = sqrt(t);
           if (gamma == 0.0) gamma = epsmac;
           c[i-1] = hh[i-1][i-1]/gamma;
           s[i-1] = hh[i][i-1]/gamma;
           rs[i] = -hh[i][i-1]*rs[i-1];
           rs[i]/=  gamma;
           rs[i-1] = c[i-1]*rs[i-1];
           /* determine residual norm */
           hh[i-1][i-1] = s[i-1]*hh[i][i-1] + c[i-1]*hh[i-1][i-1];
           r_norm = fabs(rs[i]);

           /* print ? */
           if ( print_level>0 )
           {
              norms[iter] = r_norm;
              if ( print_level>1 && my_id == 0 )
              {
                 if (b_norm > 0.0)
                    hypre_printf("% 5d    %e    %f   %e\n", iter, 
                           norms[iter],norms[iter]/norms[iter-1],
                           norms[iter]/b_norm);
                 else
                    hypre_printf("% 5d    %e    %f\n", iter, norms[iter],
                           norms[iter]/norms[iter-1]);
              }
           }
           /*convergence factor tolerance */
           if (cf_tol > 0.0)
           {
              cf_ave_0 = cf_ave_1;
              cf_ave_1 = pow( r_norm / r_norm_0, 1.0/(2.0*iter));
              
              weight   = fabs(cf_ave_1 - cf_ave_0);
              weight   = weight / hypre_max(cf_ave_1, cf_ave_0);
              weight   = 1.0 - weight;
#if 0
              hypre_printf("I = %d: cf_new = %e, cf_old = %e, weight = %e\n",
                     i, cf_ave_1, cf_ave_0, weight );
#endif
              if (weight * cf_ave_1 > cf_tol) 
              {
                 break_value = 1;
                 break;
              }
           }
           /* should we exit the restart cycle? (conv. check) */
           if (r_norm <= epsilon && iter >= min_iter)
           {
              if (rel_change && !rel_change_passed)
              {
                 
                 /* To decide whether to break here: to actually
                  determine the relative change requires the approx
                  solution (so a triangular solve) and a
                  precond. solve - so if we have to do this many
                  times, it will be expensive...(unlike cg where is
                  is relatively straightforward)

                  previously, the intent (there was a bug), was to
                  exit the restart cycle based on the residual norm
                  and check the relative change outside the cycle.
                  Here we will check the relative here as we don't
                  want to exit the restart cycle prematurely */
                 
                 for (k=0; k<i; k++) /* extra copy of rs so we don't need
                                        to change the later solve */
                    rs_2[k] = rs[k];

                 /* solve tri. system*/
                 rs_2[i-1] = rs_2[i-1]/hh[i-1][i-1];
                 for (k = i-2; k >= 0; k--)
                 {
                    t = 0.0;
                    for (j = k+1; j < i; j++)
                    {
                       t -= hh[k][j]*rs_2[j];
                    }
                    t+= rs_2[k];
                    rs_2[k] = t/hh[k][k];
                 }
                 
                 (*(gmres_functions->CopyVector))(p[i-1],w);
                 (*(gmres_functions->ScaleVector))(rs_2[i-1],w);
                 for (j = i-2; j >=0; j--)
                    (*(gmres_functions->Axpy))(rs_2[j], p[j], w);
                    
                 (*(gmres_functions->ClearVector))(r);
                 /* find correction (in r) */
                 precond(precond_data, A, w, r);
                 /* copy current solution (x) to w (don't want to over-write x)*/
                 (*(gmres_functions->CopyVector))(x,w);

                 /* add the correction */
                 (*(gmres_functions->Axpy))(1.0,r,w);

                 /* now w is the approx solution  - get the norm*/
                 x_norm = sqrt( (*(gmres_functions->InnerProd))(w,w) );

                 if ( !(x_norm <= guard_zero_residual ))
                    /* don't divide by zero */
                 {  /* now get  x_i - x_i-1 */
                    
                    if (num_rel_change_check)
                    {
                       /* have already checked once so we can avoid another precond.
                          solve */
                       (*(gmres_functions->CopyVector))(w, r);
                       (*(gmres_functions->Axpy))(-1.0, w_2, r);
                       /* now r contains x_i - x_i-1*/

                       /* save current soln w in w_2 for next time */
                       (*(gmres_functions->CopyVector))(w, w_2);
                    }
                    else
                    {
                       /* first time to check rel change*/

                       /* first save current soln w in w_2 for next time */
                       (*(gmres_functions->CopyVector))(w, w_2);

                       /* for relative change take x_(i-1) to be 
                          x + M^{-1}[sum{j=0..i-2} rs_j p_j ]. 
                          Now
                          x_i - x_{i-1}= {x + M^{-1}[sum{j=0..i-1} rs_j p_j ]}
                          - {x + M^{-1}[sum{j=0..i-2} rs_j p_j ]}
                          = M^{-1} rs_{i-1}{p_{i-1}} */
                       
                       (*(gmres_functions->ClearVector))(w);
                       (*(gmres_functions->Axpy))(rs_2[i-1], p[i-1], w);
                       (*(gmres_functions->ClearVector))(r);
                       /* apply the preconditioner */
                       precond(precond_data, A, w, r);
                       /* now r contains x_i - x_i-1 */          
                    }
                    /* find the norm of x_i - x_i-1 */          
                    w_norm = sqrt( (*(gmres_functions->InnerProd))(r,r) );
                    relative_error = w_norm/x_norm;
                    if (relative_error <= r_tol)
                    {
                       rel_change_passed = 1;
                       break;
                    }
                 }
                 else
                 {
                    rel_change_passed = 1;
                    break;

                 }
                 num_rel_change_check++;
              }
           else /* no relative change */
              {
                 break;
              }
           }
           

	} /*** end of restart cycle ***/

	/* now compute solution, first solve upper triangular system */

	if (break_value) break;
	
	rs[i-1] = rs[i-1]/hh[i-1][i-1];
	for (k = i-2; k >= 0; k--)
	{
           t = 0.0;
           for (j = k+1; j < i; j++)
           {
              t -= hh[k][j]*rs[j];
           }
           t+= rs[k];
           rs[k] = t/hh[k][k];
	}

        (*(gmres_functions->CopyVector))(p[i-1],w);
        (*(gmres_functions->ScaleVector))(rs[i-1],w);
        for (j = i-2; j >=0; j--)
                (*(gmres_functions->Axpy))(rs[j], p[j], w);

	(*(gmres_functions->ClearVector))(r);
	/* find correction (in r) */
        precond(precond_data, A, w, r);

        /* update current solution x (in x) */
	(*(gmres_functions->Axpy))(1.0,r,x);
         

        /* check for convergence by evaluating the actual residual */
	if (r_norm  <= epsilon && iter >= min_iter)
        {
           if (skip_real_r_check)
           {
              (gmres_data -> converged) = 1;
              break;
           }

           /* calculate actual residual norm*/
           (*(gmres_functions->CopyVector))(b,r);
           (*(gmres_functions->Matvec))(matvec_data,-1.0,A,x,1.0,r);
           real_r_norm_new = r_norm = sqrt( (*(gmres_functions->InnerProd))(r,r) );

           if (r_norm <= epsilon)
           {
              if (rel_change && !rel_change_passed) /* calculate the relative change */
              {

                 /* calculate the norm of the solution */
                 x_norm = sqrt( (*(gmres_functions->InnerProd))(x,x) );
               
                 if ( !(x_norm <= guard_zero_residual ))
                    /* don't divide by zero */
                 {
                    
                    /* for relative change take x_(i-1) to be 
                       x + M^{-1}[sum{j=0..i-2} rs_j p_j ]. 
                       Now
                       x_i - x_{i-1}= {x + M^{-1}[sum{j=0..i-1} rs_j p_j ]}
                       - {x + M^{-1}[sum{j=0..i-2} rs_j p_j ]}
                       = M^{-1} rs_{i-1}{p_{i-1}} */
                    (*(gmres_functions->ClearVector))(w);
                    (*(gmres_functions->Axpy))(rs[i-1], p[i-1], w);
                    (*(gmres_functions->ClearVector))(r);
                    /* apply the preconditioner */
                    precond(precond_data, A, w, r);
                    /* find the norm of x_i - x_i-1 */          
                    w_norm = sqrt( (*(gmres_functions->InnerProd))(r,r) );
                    relative_error= w_norm/x_norm;
                    if ( relative_error < r_tol )
                    {
                       (gmres_data -> converged) = 1;
                       if ( print_level>1 && my_id == 0 )
                       {
                          hypre_printf("\n\n");
                          hypre_printf("Final L2 norm of residual: %e\n\n", r_norm);
                       }
                       break;
                    }
                 }
                 else
                 {
                    (gmres_data -> converged) = 1;
                    if ( print_level>1 && my_id == 0 )
                    {
                       hypre_printf("\n\n");
                       hypre_printf("Final L2 norm of residual: %e\n\n", r_norm);
                    }
                    break;
                 }

              }
              else /* don't need to check rel. change */
              {
                 if ( print_level>1 && my_id == 0 )
                 {
                    hypre_printf("\n\n");
                    hypre_printf("Final L2 norm of residual: %e\n\n", r_norm);
                 }
                 (gmres_data -> converged) = 1;
                 break;
              }
           }
           else /* conv. has not occurred, according to true residual */
           {
              /* exit if the real residual norm has not decreased */
              if (real_r_norm_new >= real_r_norm_old)
              {
                 if (print_level > 1 && my_id == 0)
                 {
                    hypre_printf("\n\n");
                    hypre_printf("Final L2 norm of residual: %e\n\n", r_norm);
                 }
                 (gmres_data -> converged) = 1;
                 break;
              }

              /* report discrepancy between real/GMRES residuals and restart */
              if ( print_level>0 && my_id == 0)
                 hypre_printf("false convergence 2, L2 norm of residual: %e\n", r_norm);
              (*(gmres_functions->CopyVector))(r,p[0]);
              i = 0;
              real_r_norm_old = real_r_norm_new;
           }
	} /* end of convergence check */

        /* compute residual vector and continue loop */
	for (j=i ; j > 0; j--)
	{
           rs[j-1] = -s[j-1]*rs[j];
           rs[j] = c[j-1]*rs[j];
	}
        
        if (i) (*(gmres_functions->Axpy))(rs[i]-1.0,p[i],p[i]);
        for (j=i-1 ; j > 0; j--)
           (*(gmres_functions->Axpy))(rs[j],p[j],p[i]);
        
        if (i)
        {
           (*(gmres_functions->Axpy))(rs[0]-1.0,p[0],p[0]);
           (*(gmres_functions->Axpy))(1.0,p[i],p[0]);
        }
   } /* END of iteration while loop */


   if ( print_level>1 && my_id == 0 )
          hypre_printf("\n\n"); 

   (gmres_data -> num_iterations) = iter;
   if (b_norm > 0.0)
      (gmres_data -> rel_residual_norm) = r_norm/b_norm;
   if (b_norm == 0.0)
      (gmres_data -> rel_residual_norm) = r_norm;

   if (iter >= max_iter && r_norm > epsilon) hypre_error(HYPRE_ERROR_CONV);
   

   hypre_TFreeF(c,gmres_functions); 
   hypre_TFreeF(s,gmres_functions); 
   hypre_TFreeF(rs,gmres_functions);
   if (rel_change)  hypre_TFreeF(rs_2,gmres_functions);

   for (i=0; i < k_dim+1; i++)
   {	
   	hypre_TFreeF(hh[i],gmres_functions);
   }
   hypre_TFreeF(hh,gmres_functions); 

   return hypre_error_flag;
}