HYPRE_Int 
HYPRE_ParCSRParaSailsCreate( MPI_Comm comm, HYPRE_Solver *solver )
{
   Secret *secret;
   
   secret = (Secret *) malloc(sizeof(Secret));

   if (secret == NULL)
   {
      hypre_error(HYPRE_ERROR_MEMORY);
      return hypre_error_flag;
   }

   secret->sym     = 1;
   secret->thresh  = 0.1;
   secret->nlevels = 1;
   secret->filter  = 0.1;
   secret->loadbal = 0.0;
   secret->reuse   = 0;
   secret->comm    = comm;
   secret->logging = 0;

   hypre_ParaSailsCreate(comm, &secret->obj);

   *solver = (HYPRE_Solver) secret;

   return hypre_error_flag;
}
Esempio n. 2
0
HYPRE_Int hypre_ParaSailsSetupValues(hypre_ParaSails obj,
                                     HYPRE_DistributedMatrix *distmat, HYPRE_Real filter, HYPRE_Real loadbal,
                                     HYPRE_Int logging)
{
   Matrix *mat;
   hypre_ParaSails_struct *internal = (hypre_ParaSails_struct *) obj;
   HYPRE_Int err;

   mat = convert_matrix(internal->comm, distmat);

   internal->ps->loadbal_beta = loadbal;
   internal->ps->setup_pattern_time = 0.0;

   err = ParaSailsSetupValues(internal->ps, mat, filter);

   if (logging)
      ParaSailsStatsValues(internal->ps, mat);

   MatrixDestroy(mat);

   if (err)
   {
      hypre_error(HYPRE_ERROR_GENERIC);
   }
   return hypre_error_flag;
}
Esempio n. 3
0
HYPRE_Int hypre_ParaSailsSetup(hypre_ParaSails obj,
                               HYPRE_DistributedMatrix *distmat, HYPRE_Int sym, HYPRE_Real thresh, HYPRE_Int nlevels,
                               HYPRE_Real filter, HYPRE_Real loadbal, HYPRE_Int logging)
{
   /* HYPRE_Real cost; */
   Matrix *mat;
   hypre_ParaSails_struct *internal = (hypre_ParaSails_struct *) obj;
   HYPRE_Int err;

   mat = convert_matrix(internal->comm, distmat);

   ParaSailsDestroy(internal->ps);

   internal->ps = ParaSailsCreate(internal->comm, 
                                  mat->beg_row, mat->end_row, sym);

   ParaSailsSetupPattern(internal->ps, mat, thresh, nlevels);

   if (logging)
      /* cost = */ ParaSailsStatsPattern(internal->ps, mat);

   internal->ps->loadbal_beta = loadbal;

   err = ParaSailsSetupValues(internal->ps, mat, filter);

   if (logging)
      ParaSailsStatsValues(internal->ps, mat);

   MatrixDestroy(mat);

   if (err)
   {
      hypre_error(HYPRE_ERROR_GENERIC);
   }
   return hypre_error_flag;
}
Esempio n. 4
0
HYPRE_Int
hypre_LGMRESSolve(void  *lgmres_vdata,
                 void  *A,
                 void  *b,
		 void  *x)
{
   hypre_LGMRESData  *lgmres_data   = (hypre_LGMRESData *)lgmres_vdata;
   hypre_LGMRESFunctions *lgmres_functions = lgmres_data->functions;
   HYPRE_Int 		     k_dim        = (lgmres_data -> k_dim);
   HYPRE_Int               min_iter     = (lgmres_data -> min_iter);
   HYPRE_Int 		     max_iter     = (lgmres_data -> max_iter);
   HYPRE_Real 	     r_tol        = (lgmres_data -> tol);
   HYPRE_Real 	     cf_tol       = (lgmres_data -> cf_tol);
   HYPRE_Real        a_tol        = (lgmres_data -> a_tol);
   void             *matvec_data  = (lgmres_data -> matvec_data);

   void             *r            = (lgmres_data -> r);
   void             *w            = (lgmres_data -> w);
   

   void            **p            = (lgmres_data -> p);

   /* lgmres  mod*/
   void          **aug_vecs       = (lgmres_data ->aug_vecs);
   void          **a_aug_vecs     = (lgmres_data ->a_aug_vecs);
   HYPRE_Int            *aug_order      = (lgmres_data->aug_order);
   HYPRE_Int             aug_dim        = (lgmres_data -> aug_dim);
   HYPRE_Int             approx_constant=  (lgmres_data ->approx_constant);
   HYPRE_Int             it_arnoldi, aug_ct, it_total, ii, order, it_aug;
   HYPRE_Int             spot = 0;
   HYPRE_Real      tmp_norm, r_norm_last;
   /*---*/

   HYPRE_Int 	           (*precond)(void*,void*,void*,void*)   = (lgmres_functions -> precond);
   HYPRE_Int 	            *precond_data = (HYPRE_Int*)(lgmres_data -> precond_data);

   HYPRE_Int             print_level    = (lgmres_data -> print_level);
   HYPRE_Int             logging        = (lgmres_data -> logging);

   HYPRE_Real     *norms          = (lgmres_data -> norms);
   
   HYPRE_Int        break_value = 0;
   HYPRE_Int	      i, j, k;
   HYPRE_Real *rs, **hh, *c, *s; 
   HYPRE_Int        iter; 
   HYPRE_Int        my_id, num_procs;
   HYPRE_Real epsilon, gamma, t, r_norm, b_norm, den_norm;
   
   HYPRE_Real epsmac = 1.e-16; 
   HYPRE_Real ieee_check = 0.;

   HYPRE_Real cf_ave_0 = 0.0;
   HYPRE_Real cf_ave_1 = 0.0;
   HYPRE_Real weight;
   HYPRE_Real r_norm_0;


   /* We are not checking rel. change for now... */
   

   (lgmres_data -> converged) = 0;
   /*-----------------------------------------------------------------------
    * With relative change convergence test on, it is possible to attempt
    * another iteration with a zero residual. This causes the parameter
    * alpha to go NaN. The guard_zero_residual parameter is to circumvent
    * this. Perhaps it should be set to something non-zero (but small).
    *-----------------------------------------------------------------------*/

   (*(lgmres_functions->CommInfo))(A,&my_id,&num_procs);
   if ( logging>0 || print_level>0 )
   {
      norms          = (lgmres_data -> norms);
      /* not used yet      log_file_name  = (lgmres_data -> log_file_name);*/
      /* fp = fopen(log_file_name,"w"); */
   }

   /* initialize work arrays  - lgmres includes aug_dim*/
   rs = hypre_CTAllocF(HYPRE_Real,k_dim+1+aug_dim,lgmres_functions); 
   c = hypre_CTAllocF(HYPRE_Real,k_dim+aug_dim,lgmres_functions); 
   s = hypre_CTAllocF(HYPRE_Real,k_dim+aug_dim,lgmres_functions); 


   
  /* lgmres mod. - need non-modified hessenberg to avoid aug_dim matvecs */
   hh = hypre_CTAllocF(HYPRE_Real*,k_dim+aug_dim+1,lgmres_functions); 
   for (i=0; i < k_dim+aug_dim+1; i++)
   {	
   	hh[i] = hypre_CTAllocF(HYPRE_Real,k_dim+aug_dim,lgmres_functions); 
   }
   
   (*(lgmres_functions->CopyVector))(b,p[0]);

   /* compute initial residual */
   (*(lgmres_functions->Matvec))(matvec_data,-1.0, A, x, 1.0, p[0]);

   b_norm = sqrt((*(lgmres_functions->InnerProd))(b,b));

   /* Since it is does not diminish performance, attempt to return an error flag
      and notify users when they supply bad input. */
   if (b_norm != 0.) ieee_check = b_norm/b_norm; /* INF -> NaN conversion */
   if (ieee_check != ieee_check)
   {
      /* ...INFs or NaNs in input can make ieee_check a NaN.  This test
         for ieee_check self-equality works on all IEEE-compliant compilers/
         machines, c.f. page 8 of "Lecture Notes on the Status of IEEE 754"
         by W. Kahan, May 31, 1996.  Currently (July 2002) this paper may be
         found at http://HTTP.CS.Berkeley.EDU/~wkahan/ieee754status/IEEE754.PDF */
      if (logging > 0 || print_level > 0)
      {
        hypre_printf("\n\nERROR detected by Hypre ... BEGIN\n");
        hypre_printf("ERROR -- hypre_LGMRESSolve: INFs and/or NaNs detected in input.\n");
        hypre_printf("User probably placed non-numerics in supplied b.\n");
        hypre_printf("Returning error flag += 101.  Program not terminated.\n");
        hypre_printf("ERROR detected by Hypre ... END\n\n\n");
      }
      hypre_error(HYPRE_ERROR_GENERIC);
      return hypre_error_flag;
   }

   r_norm = sqrt((*(lgmres_functions->InnerProd))(p[0],p[0]));
   r_norm_0 = r_norm;

   /* Since it is does not diminish performance, attempt to return an error flag
      and notify users when they supply bad input. */
   if (r_norm != 0.) ieee_check = r_norm/r_norm; /* INF -> NaN conversion */
   if (ieee_check != ieee_check)
   {
      /* ...INFs or NaNs in input can make ieee_check a NaN.  This test
         for ieee_check self-equality works on all IEEE-compliant compilers/
         machines, c.f. page 8 of "Lecture Notes on the Status of IEEE 754"
         by W. Kahan, May 31, 1996.  Currently (July 2002) this paper may be
         found at http://HTTP.CS.Berkeley.EDU/~wkahan/ieee754status/IEEE754.PDF */
      if (logging > 0 || print_level > 0)
      {
        hypre_printf("\n\nERROR detected by Hypre ... BEGIN\n");
        hypre_printf("ERROR -- hypre_LGMRESSolve: INFs and/or NaNs detected in input.\n");
        hypre_printf("User probably placed non-numerics in supplied A or x_0.\n");
        hypre_printf("Returning error flag += 101.  Program not terminated.\n");
        hypre_printf("ERROR detected by Hypre ... END\n\n\n");
      }
      hypre_error(HYPRE_ERROR_GENERIC);
      return hypre_error_flag;
   }

   if ( logging>0 || print_level > 0)
   {
      norms[0] = r_norm;
      if ( print_level>1 && my_id == 0 )
      {
  	 hypre_printf("L2 norm of b: %e\n", b_norm);
         if (b_norm == 0.0)
            hypre_printf("Rel_resid_norm actually contains the residual norm\n");
         hypre_printf("Initial L2 norm of residual: %e\n", r_norm);
      
      }
   }
   iter = 0;

   if (b_norm > 0.0)
   {
/* convergence criterion |r_i|/|b| <= accuracy if |b| > 0 */
     den_norm= b_norm;
   }
   else
   {
/* convergence criterion |r_i|/|r0| <= accuracy if |b| = 0 */
     den_norm= r_norm;
   };

  /* convergence criteria: |r_i| <= max( a_tol, r_tol * den_norm)
      den_norm = |r_0| or |b|
      note: default for a_tol is 0.0, so relative residual criteria is used unless
            user specifies a_tol, or sets r_tol = 0.0, which means absolute
            tol only is checked  */
      
   epsilon = hypre_max(a_tol,r_tol*den_norm);
   
   /* so now our stop criteria is |r_i| <= epsilon */
 

   if ( print_level>1 && my_id == 0 )
   {
      if (b_norm > 0.0)
         {hypre_printf("=============================================\n\n");
          hypre_printf("Iters     resid.norm     conv.rate  rel.res.norm\n");
          hypre_printf("-----    ------------    ---------- ------------\n");
      
          }

      else
         {hypre_printf("=============================================\n\n");
          hypre_printf("Iters     resid.norm     conv.rate\n");
          hypre_printf("-----    ------------    ----------\n");
      
          };
   }

   
   
/*lgmres initialization */
   for (ii=0; ii<aug_dim; ii++) {
      aug_order[ii] = 0;
   }
   aug_ct = 0; /* number of aug. vectors available */



   /* outer iteration cycle */
   while (iter < max_iter)
   {
   /* initialize first term of hessenberg system */

	rs[0] = r_norm;
        if (r_norm == 0.0)
        {
           hypre_TFreeF(c,lgmres_functions); 
           hypre_TFreeF(s,lgmres_functions); 
           hypre_TFreeF(rs,lgmres_functions);
           for (i=0; i < k_dim+aug_dim+1; i++) {
              hypre_TFreeF(hh[i],lgmres_functions);
           }

           hypre_TFreeF(hh,lgmres_functions); 
	   return hypre_error_flag;
           
	}

        /* see if we are already converged and 
           should print the final norm and exit */
	if (r_norm <= epsilon && iter >= min_iter) 
        {
           (*(lgmres_functions->CopyVector))(b,r);
           (*(lgmres_functions->Matvec))(matvec_data,-1.0,A,x,1.0,r);
           r_norm = sqrt((*(lgmres_functions->InnerProd))(r,r));
           if (r_norm  <= epsilon)
           {
              if ( print_level>1 && my_id == 0)
              {
                 hypre_printf("\n\n");
                 hypre_printf("Final L2 norm of residual: %e\n\n", r_norm);
              }
              break;
              }
           else
              if ( print_level>0 && my_id == 0)
                 hypre_printf("false convergence 1\n");
           
	}
        
      	t = 1.0 / r_norm;
        r_norm_last = r_norm;
	
        (*(lgmres_functions->ScaleVector))(t,p[0]);
	i = 0;

        /* lgmres mod: determine number of arnoldi steps to take */
        /* if approx_constant then we keep the space the same size
           even if we don't have the full number of aug vectors yet*/
        if (approx_constant) {
           it_arnoldi = k_dim - aug_ct;
        } else {
           it_arnoldi = k_dim - aug_dim; 
        }
        it_total =  it_arnoldi + aug_ct;
        it_aug = 0; /* keep track of augmented iterations */


        /***RESTART CYCLE (right-preconditioning) ***/
        while (i < it_total && iter < max_iter)
	{
           i++;
           iter++;
           (*(lgmres_functions->ClearVector))(r);


           /*LGMRES_MOD: decide whether this is an arnoldi step or an aug step */ 
           if ( i <= it_arnoldi) 
           { /* Arnoldi */
              precond(precond_data, A, p[i-1], r);
              (*(lgmres_functions->Matvec))(matvec_data, 1.0, A, r, 0.0, p[i]);
           } else 
           { /*lgmres aug step */
              it_aug ++;
              order = i - it_arnoldi - 1; /* which aug step (note i starts at 1) - aug order number at 0*/ 
              for (ii=0; ii<aug_dim; ii++) 
              {
                 if (aug_order[ii] == order) 
                 {
                    spot = ii;
                    break; /* must have this because there will be duplicates before aug_ct = aug_dim */ 
                 }  
              }
              /* copy a_aug_vecs[spot] to p[i] */ 
              (*(lgmres_functions->CopyVector))(a_aug_vecs[spot],p[i]);
              
              /*note: an alternate implementation choice would be to only save the AUGVECS and
                not A_AUGVEC and then apply the PC here to the augvec */
           }
           /*---*/

           /* modified Gram_Schmidt */
           for (j=0; j < i; j++)
           {
              hh[j][i-1] = (*(lgmres_functions->InnerProd))(p[j],p[i]);
              (*(lgmres_functions->Axpy))(-hh[j][i-1],p[j],p[i]);
           }
           t = sqrt((*(lgmres_functions->InnerProd))(p[i],p[i]));
           hh[i][i-1] = t;	
           if (t != 0.0)
           {
              t = 1.0/t;
              (*(lgmres_functions->ScaleVector))(t,p[i]);
           }


           /* done with modified Gram_schmidt and Arnoldi step.
              update factorization of hh */
           for (j = 1; j < i; j++)
           {
              t = hh[j-1][i-1];
              hh[j-1][i-1] = s[j-1]*hh[j][i-1] + c[j-1]*t;
              hh[j][i-1] = -s[j-1]*t + c[j-1]*hh[j][i-1];
           }
           t= hh[i][i-1]*hh[i][i-1];
           t+= hh[i-1][i-1]*hh[i-1][i-1];
           gamma = sqrt(t);
           if (gamma == 0.0) gamma = epsmac;
           c[i-1] = hh[i-1][i-1]/gamma;
           s[i-1] = hh[i][i-1]/gamma;
           rs[i] = -hh[i][i-1]*rs[i-1];
           rs[i]/=  gamma;
           rs[i-1] = c[i-1]*rs[i-1];
           /* determine residual norm */
           hh[i-1][i-1] = s[i-1]*hh[i][i-1] + c[i-1]*hh[i-1][i-1];
           r_norm = fabs(rs[i]);

           /* print ? */
           if ( print_level>0 )
           {
              norms[iter] = r_norm;
              if ( print_level>1 && my_id == 0 )
              {
                 if (b_norm > 0.0)
                    hypre_printf("% 5d    %e    %f   %e\n", iter, 
                           norms[iter],norms[iter]/norms[iter-1],
                           norms[iter]/b_norm);
                 else
                    hypre_printf("% 5d    %e    %f\n", iter, norms[iter],
                           norms[iter]/norms[iter-1]);
              }
           }
           /*convergence factor tolerance */
           if (cf_tol > 0.0)
           {
              cf_ave_0 = cf_ave_1;
              cf_ave_1 = pow( r_norm / r_norm_0, 1.0/(2.0*iter));
              
              weight   = fabs(cf_ave_1 - cf_ave_0);
              weight   = weight / hypre_max(cf_ave_1, cf_ave_0);
              weight   = 1.0 - weight;
#if 0
              hypre_printf("I = %d: cf_new = %e, cf_old = %e, weight = %e\n",
                     i, cf_ave_1, cf_ave_0, weight );
#endif
              if (weight * cf_ave_1 > cf_tol) 
              {
                 break_value = 1;
                 break;
              }
           }
           /* should we exit the restart cycle? (conv. check) */
           if (r_norm <= epsilon && iter >= min_iter)
           {
                 break;
           }
           

	} /*** end of restart cycle ***/

	/* now compute solution, first solve upper triangular system */

	if (break_value) break;
	
	rs[i-1] = rs[i-1]/hh[i-1][i-1];
	for (k = i-2; k >= 0; k--)
	{
           t = 0.0;
           for (j = k+1; j < i; j++)
           {
              t -= hh[k][j]*rs[j];
           }
           t+= rs[k];
           rs[k] = t/hh[k][k];
	}
        /* form linear combination of p's to get solution */
        /* put the new aug_vector in aug_vecs[aug_dim]  - a temp position*/	
        /* i = number of iterations */  
        /* it_aug = number of augmented iterations */ 
        /* it_arnoldi = number of arnoldi iterations */


        /*check if exited early before all arnoldi its */
        if (it_arnoldi > i) it_arnoldi = i; 


        if (!it_aug)
        {
           (*(lgmres_functions->CopyVector))(p[i-1],w);
           (*(lgmres_functions->ScaleVector))(rs[i-1],w);
           for (j = i-2; j >=0; j--)
              (*(lgmres_functions->Axpy))(rs[j], p[j], w);
        }
        else /* need some of the augvecs */
        {
           (*(lgmres_functions->CopyVector))(p[0],w);
           (*(lgmres_functions->ScaleVector))(rs[0],w);

           /* reg. arnoldi directions */  
           for (j = 1; j < it_arnoldi; j++) /*first one already done */
           {
              (*(lgmres_functions->Axpy))(rs[j], p[j], w);
           }
            
           /* augment directions */
           for (ii=0; ii<it_aug; ii++) 
           {
              for (j=0; j<aug_dim; j++) 
              {
                 if (aug_order[j] == ii) 
                 {
                    spot = j;
                    break; /* must have this because there will be
                            * duplicates before aug_ct = aug_dim */ 
                 }  
              }
              (*(lgmres_functions->Axpy))(rs[it_arnoldi+ii], aug_vecs[spot], w);
           }
        }


        /* grab the new aug vector before the prec*/
        (*(lgmres_functions->CopyVector))(w,aug_vecs[aug_dim]);
        
	(*(lgmres_functions->ClearVector))(r);
	/* find correction (in r) (un-wind precond.)*/
        precond(precond_data, A, w, r);

        /* update current solution x (in x) */
	(*(lgmres_functions->Axpy))(1.0,r,x);
         

        /* check for convergence by evaluating the actual residual */
	if (r_norm <= epsilon && iter >= min_iter) 
        {
           /* calculate actual residual norm*/
           (*(lgmres_functions->CopyVector))(b,r);
           (*(lgmres_functions->Matvec))(matvec_data,-1.0,A,x,1.0,r);
           r_norm = sqrt( (*(lgmres_functions->InnerProd))(r,r) );
           
           if (r_norm <= epsilon)
           {
              if ( print_level>1 && my_id == 0 )
              {
                 hypre_printf("\n\n");
                 hypre_printf("Final L2 norm of residual: %e\n\n", r_norm);
              }
              (lgmres_data -> converged) = 1;
              break;
           }
           else /* conv. has not occurred, according to true residual */ 
           {
              if ( print_level>0 && my_id == 0)
                 hypre_printf("false convergence 2\n");
              (*(lgmres_functions->CopyVector))(r,p[0]);
              i = 0;
           }
	} /* end of convergence check */
        
        /* compute residual vector and continue loop */

        /* copy r0 (not scaled) to w*/  
	(*(lgmres_functions->CopyVector))(p[0],w);
	(*(lgmres_functions->ScaleVector))(r_norm_last,w);


	for (j=i ; j > 0; j--)
	{
           rs[j-1] = -s[j-1]*rs[j];
           rs[j] = c[j-1]*rs[j];
	}
        
        if (i) (*(lgmres_functions->Axpy))(rs[i]-1.0,p[i],p[i]);
        for (j=i-1 ; j > 0; j--)
           (*(lgmres_functions->Axpy))(rs[j],p[j],p[i]);
        
        if (i)
        {
           (*(lgmres_functions->Axpy))(rs[0]-1.0,p[0],p[0]);
           (*(lgmres_functions->Axpy))(1.0,p[i],p[0]);
        }

        /* lgmres mod */  
        /* collect aug vector and A*augvector for future restarts -
           only if we will be restarting (i.e. this cycle performed it_total
           iterations). ordering starts at 0.*/
        if (aug_dim > 0) 
        {
           if (!aug_ct) 
           {
              spot = 0;
              aug_ct++;
           } 
           else if (aug_ct < aug_dim) 
           {
              spot = aug_ct;
              aug_ct++;
           } 
           else 
           { /* truncate - already have aug_dim number of vectors*/
              for (ii=0; ii<aug_dim; ii++) 
              {
                 if (aug_order[ii] == (aug_dim-1)) 
                 {
                    spot = ii;
                 }  
              }
           } 
           /* aug_vecs[aug_dim] contains new aug vector */
           (*(lgmres_functions->CopyVector))(aug_vecs[aug_dim], aug_vecs[spot]);
           /*need to normalize */
           tmp_norm = sqrt((*(lgmres_functions->InnerProd))(aug_vecs[spot], aug_vecs[spot]));
           
           tmp_norm = 1.0/tmp_norm;
           (*(lgmres_functions->ScaleVector))(tmp_norm ,aug_vecs[spot]);
           
           /*set new aug vector to order 0  - move all others back one */
           for (ii=0; ii < aug_dim; ii++) 
           {
              aug_order[ii]++;
           } 
           aug_order[spot] = 0; 

           /*now add the A*aug vector to A_AUGVEC(spot) - this is
            * independ. of preconditioning type*/
           /* A*augvec = V*H*y  = r0-rm   (r0 is in w and rm is in p[0])*/
           (*(lgmres_functions->CopyVector))( w, a_aug_vecs[spot]);
           (*(lgmres_functions->ScaleVector))(- 1.0, a_aug_vecs[spot]); /* -r0*/
           (*(lgmres_functions->Axpy))(1.0, p[0],a_aug_vecs[spot]); /* rm - r0 */
           (*(lgmres_functions->ScaleVector))(-tmp_norm, a_aug_vecs[spot]); /* r0-rm /norm */
           
        }
        
   } /* END of iteration while loop */
        

   if ( print_level>1 && my_id == 0 )
          hypre_printf("\n\n"); 

   (lgmres_data -> num_iterations) = iter;
   if (b_norm > 0.0)
      (lgmres_data -> rel_residual_norm) = r_norm/b_norm;
   if (b_norm == 0.0)
      (lgmres_data -> rel_residual_norm) = r_norm;

   if (iter >= max_iter && r_norm > epsilon) hypre_error(HYPRE_ERROR_CONV);
   

   hypre_TFreeF(c,lgmres_functions); 
   hypre_TFreeF(s,lgmres_functions); 
   hypre_TFreeF(rs,lgmres_functions);

   for (i=0; i < k_dim+1+aug_dim; i++)
   {	
   	hypre_TFreeF(hh[i],lgmres_functions);
   }
   hypre_TFreeF(hh,lgmres_functions); 

   return hypre_error_flag;
}
Esempio n. 5
0
HYPRE_Int
hypre_BoomerAMGSolve( void               *amg_vdata,
                   hypre_ParCSRMatrix *A,
                   hypre_ParVector    *f,
                   hypre_ParVector    *u         )
{

   MPI_Comm 	      comm = hypre_ParCSRMatrixComm(A);   

   hypre_ParAMGData   *amg_data = amg_vdata;

   /* Data Structure variables */

   HYPRE_Int      amg_print_level;
   HYPRE_Int      amg_logging;
   HYPRE_Int      cycle_count;
   HYPRE_Int      num_levels;
   /* HYPRE_Int      num_unknowns; */
   HYPRE_Real   tol;

   HYPRE_Int block_mode;
   

   hypre_ParCSRMatrix **A_array;
   hypre_ParVector    **F_array;
   hypre_ParVector    **U_array;

   hypre_ParCSRBlockMatrix **A_block_array;


   /*  Local variables  */

   HYPRE_Int      j;
   HYPRE_Int      Solve_err_flag;
   HYPRE_Int      min_iter;
   HYPRE_Int      max_iter;
   HYPRE_Int      num_procs, my_id;
   HYPRE_Int      additive;
   HYPRE_Int      mult_additive;
   HYPRE_Int      simple;

   HYPRE_Real   alpha = 1.0;
   HYPRE_Real   beta = -1.0;
   HYPRE_Real   cycle_op_count;
   HYPRE_Real   total_coeffs;
   HYPRE_Real   total_variables;
   HYPRE_Real  *num_coeffs;
   HYPRE_Real  *num_variables;
   HYPRE_Real   cycle_cmplxty = 0.0;
   HYPRE_Real   operat_cmplxty;
   HYPRE_Real   grid_cmplxty;
   HYPRE_Real   conv_factor = 0.0;
   HYPRE_Real   resid_nrm = 1.0;
   HYPRE_Real   resid_nrm_init = 0.0;
   HYPRE_Real   relative_resid;
   HYPRE_Real   rhs_norm = 0.0;
   HYPRE_Real   old_resid;
   HYPRE_Real   ieee_check = 0.;

   hypre_ParVector  *Vtemp;
   hypre_ParVector  *Residual;

   hypre_MPI_Comm_size(comm, &num_procs);   
   hypre_MPI_Comm_rank(comm,&my_id);

   amg_print_level    = hypre_ParAMGDataPrintLevel(amg_data);
   amg_logging      = hypre_ParAMGDataLogging(amg_data);
   if ( amg_logging > 1 )
      Residual = hypre_ParAMGDataResidual(amg_data);
   /* num_unknowns  = hypre_ParAMGDataNumUnknowns(amg_data); */
   num_levels       = hypre_ParAMGDataNumLevels(amg_data);
   A_array          = hypre_ParAMGDataAArray(amg_data);
   F_array          = hypre_ParAMGDataFArray(amg_data);
   U_array          = hypre_ParAMGDataUArray(amg_data);

   tol              = hypre_ParAMGDataTol(amg_data);
   min_iter         = hypre_ParAMGDataMinIter(amg_data);
   max_iter         = hypre_ParAMGDataMaxIter(amg_data);
   additive         = hypre_ParAMGDataAdditive(amg_data);
   simple           = hypre_ParAMGDataSimple(amg_data);
   mult_additive    = hypre_ParAMGDataMultAdditive(amg_data);

   A_array[0] = A;
   F_array[0] = f;
   U_array[0] = u;

   block_mode = hypre_ParAMGDataBlockMode(amg_data);

   A_block_array          = hypre_ParAMGDataABlockArray(amg_data);


/*   Vtemp = hypre_ParVectorCreate(hypre_ParCSRMatrixComm(A_array[0]),
                                 hypre_ParCSRMatrixGlobalNumRows(A_array[0]),
                                 hypre_ParCSRMatrixRowStarts(A_array[0]));
   hypre_ParVectorInitialize(Vtemp);
   hypre_ParVectorSetPartitioningOwner(Vtemp,0);
   hypre_ParAMGDataVtemp(amg_data) = Vtemp;
*/
   Vtemp = hypre_ParAMGDataVtemp(amg_data);


   /*-----------------------------------------------------------------------
    *    Write the solver parameters
    *-----------------------------------------------------------------------*/


   if (my_id == 0 && amg_print_level > 1)
      hypre_BoomerAMGWriteSolverParams(amg_data); 

   /*-----------------------------------------------------------------------
    *    Initialize the solver error flag and assorted bookkeeping variables
    *-----------------------------------------------------------------------*/

   Solve_err_flag = 0;

   total_coeffs = 0;
   total_variables = 0;
   cycle_count = 0;
   operat_cmplxty = 0;
   grid_cmplxty = 0;

   /*-----------------------------------------------------------------------
    *     write some initial info
    *-----------------------------------------------------------------------*/

   if (my_id == 0 && amg_print_level > 1 && tol > 0.)
     hypre_printf("\n\nAMG SOLUTION INFO:\n");


   /*-----------------------------------------------------------------------
    *    Compute initial fine-grid residual and print 
    *-----------------------------------------------------------------------*/

   if (amg_print_level > 1 || amg_logging > 1)
   {
     if ( amg_logging > 1 ) {
        hypre_ParVectorCopy(F_array[0], Residual );
        if (tol > 0)
	   hypre_ParCSRMatrixMatvec(alpha, A_array[0], U_array[0], beta, Residual );
        resid_nrm = sqrt(hypre_ParVectorInnerProd( Residual, Residual ));
     }
     else {
        hypre_ParVectorCopy(F_array[0], Vtemp);
        if (tol > 0)
           hypre_ParCSRMatrixMatvec(alpha, A_array[0], U_array[0], beta, Vtemp);
        resid_nrm = sqrt(hypre_ParVectorInnerProd(Vtemp, Vtemp));
     }

     /* Since it is does not diminish performance, attempt to return an error flag
        and notify users when they supply bad input. */
     if (resid_nrm != 0.) ieee_check = resid_nrm/resid_nrm; /* INF -> NaN conversion */
     if (ieee_check != ieee_check)
     {
        /* ...INFs or NaNs in input can make ieee_check a NaN.  This test
           for ieee_check self-equality works on all IEEE-compliant compilers/
           machines, c.f. page 8 of "Lecture Notes on the Status of IEEE 754"
           by W. Kahan, May 31, 1996.  Currently (July 2002) this paper may be
           found at http://HTTP.CS.Berkeley.EDU/~wkahan/ieee754status/IEEE754.PDF */
        if (amg_print_level > 0)
        {
          hypre_printf("\n\nERROR detected by Hypre ...  BEGIN\n");
          hypre_printf("ERROR -- hypre_BoomerAMGSolve: INFs and/or NaNs detected in input.\n");
          hypre_printf("User probably placed non-numerics in supplied A, x_0, or b.\n");
          hypre_printf("ERROR detected by Hypre ...  END\n\n\n");
        }
        hypre_error(HYPRE_ERROR_GENERIC);
        return hypre_error_flag;
     }

     resid_nrm_init = resid_nrm;
     rhs_norm = sqrt(hypre_ParVectorInnerProd(f, f));
     if (rhs_norm)
     {
       relative_resid = resid_nrm_init / rhs_norm;
     }
     else
     {
       relative_resid = resid_nrm_init;
     }
   }
   else
   {
     relative_resid = 1.;
   }

   if (my_id == 0 && amg_print_level > 1)
   {     
      hypre_printf("                                            relative\n");
      hypre_printf("               residual        factor       residual\n");
      hypre_printf("               --------        ------       --------\n");
      hypre_printf("    Initial    %e                 %e\n",resid_nrm_init,
              relative_resid);
   }

   /*-----------------------------------------------------------------------
    *    Main V-cycle loop
    *-----------------------------------------------------------------------*/
   
   while ((relative_resid >= tol || cycle_count < min_iter)
          && cycle_count < max_iter)
   {
      hypre_ParAMGDataCycleOpCount(amg_data) = 0;   
      /* Op count only needed for one cycle */

      if ((additive < 0 || additive >= num_levels) 
	   && (mult_additive < 0 || mult_additive >= num_levels)
	   && (simple < 0 || simple >= num_levels) )
         hypre_BoomerAMGCycle(amg_data, F_array, U_array); 
      else
         hypre_BoomerAMGAdditiveCycle(amg_data); 

      /*---------------------------------------------------------------
       *    Compute  fine-grid residual and residual norm
       *----------------------------------------------------------------*/

      if (amg_print_level > 1 || amg_logging > 1 || tol > 0.)
      {
        old_resid = resid_nrm;

        if ( amg_logging > 1 ) {
           hypre_ParCSRMatrixMatvecOutOfPlace(alpha, A_array[0], U_array[0], beta, F_array[0], Residual );
           resid_nrm = sqrt(hypre_ParVectorInnerProd( Residual, Residual ));
        }
        else {
           hypre_ParCSRMatrixMatvecOutOfPlace(alpha, A_array[0], U_array[0], beta, F_array[0], Vtemp);
           resid_nrm = sqrt(hypre_ParVectorInnerProd(Vtemp, Vtemp));
        }

        if (old_resid) conv_factor = resid_nrm / old_resid;
        else conv_factor = resid_nrm;
        if (rhs_norm)
        {
           relative_resid = resid_nrm / rhs_norm;
        }
        else
        {
           relative_resid = resid_nrm;
        }

        hypre_ParAMGDataRelativeResidualNorm(amg_data) = relative_resid;
      }

      ++cycle_count;

      hypre_ParAMGDataNumIterations(amg_data) = cycle_count;
#ifdef CUMNUMIT
      ++hypre_ParAMGDataCumNumIterations(amg_data);
#endif

      if (my_id == 0 && amg_print_level > 1)
      { 
         hypre_printf("    Cycle %2d   %e    %f     %e \n", cycle_count,
                 resid_nrm, conv_factor, relative_resid);
      }
   }

   if (cycle_count == max_iter && tol > 0.)
   {
      Solve_err_flag = 1;
      hypre_error(HYPRE_ERROR_CONV);
   }

   /*-----------------------------------------------------------------------
    *    Compute closing statistics
    *-----------------------------------------------------------------------*/

   if (cycle_count > 0 && resid_nrm_init) 
     conv_factor = pow((resid_nrm/resid_nrm_init),(1.0/(HYPRE_Real) cycle_count));
   else
     conv_factor = 1.;

   if (amg_print_level > 1) 
   {
      num_coeffs       = hypre_CTAlloc(HYPRE_Real, num_levels);
      num_variables    = hypre_CTAlloc(HYPRE_Real, num_levels);
      num_coeffs[0]    = hypre_ParCSRMatrixDNumNonzeros(A);
      num_variables[0] = hypre_ParCSRMatrixGlobalNumRows(A);

      if (block_mode)
      {
         for (j = 1; j < num_levels; j++)
         {
            num_coeffs[j]    = (HYPRE_Real) hypre_ParCSRBlockMatrixNumNonzeros(A_block_array[j]);
            num_variables[j] = (HYPRE_Real) hypre_ParCSRBlockMatrixGlobalNumRows(A_block_array[j]);
         }
         num_coeffs[0]    = hypre_ParCSRBlockMatrixDNumNonzeros(A_block_array[0]);
         num_variables[0] = hypre_ParCSRBlockMatrixGlobalNumRows(A_block_array[0]);

      }
      else
      {
         for (j = 1; j < num_levels; j++)
         {
            num_coeffs[j]    = (HYPRE_Real) hypre_ParCSRMatrixNumNonzeros(A_array[j]);
            num_variables[j] = (HYPRE_Real) hypre_ParCSRMatrixGlobalNumRows(A_array[j]);
         }
      }
   

      for (j=0;j<hypre_ParAMGDataNumLevels(amg_data);j++)
      {
         total_coeffs += num_coeffs[j];
         total_variables += num_variables[j];
      }

      cycle_op_count = hypre_ParAMGDataCycleOpCount(amg_data);

      if (num_variables[0])
         grid_cmplxty = total_variables / num_variables[0];
      if (num_coeffs[0])
      {
         operat_cmplxty = total_coeffs / num_coeffs[0];
         cycle_cmplxty = cycle_op_count / num_coeffs[0];
      }

      if (my_id == 0)
      {
         if (Solve_err_flag == 1)
         {
            hypre_printf("\n\n==============================================");
            hypre_printf("\n NOTE: Convergence tolerance was not achieved\n");
            hypre_printf("      within the allowed %d V-cycles\n",max_iter);
            hypre_printf("==============================================");
         }
         hypre_printf("\n\n Average Convergence Factor = %f",conv_factor);
         hypre_printf("\n\n     Complexity:    grid = %f\n",grid_cmplxty);
         hypre_printf("                operator = %f\n",operat_cmplxty);
         hypre_printf("                   cycle = %f\n\n\n\n",cycle_cmplxty);
      }

      hypre_TFree(num_coeffs);
      hypre_TFree(num_variables);
   }

   return hypre_error_flag;
}
Esempio n. 6
0
HYPRE_Int
hypre_ParVectorReadIJ( MPI_Comm             comm,
                       const char          *filename,
                       HYPRE_Int                 *base_j_ptr,
                       hypre_ParVector    **vector_ptr)
{
   HYPRE_Int               global_size;
   hypre_ParVector  *vector;
   hypre_Vector     *local_vector;
   double           *local_data;
   HYPRE_Int              *partitioning;
   HYPRE_Int               base_j;

   HYPRE_Int               myid, num_procs, i, j, J;
   char              new_filename[255];
   FILE             *file;

   hypre_MPI_Comm_size(comm, &num_procs);
   hypre_MPI_Comm_rank(comm, &myid);
  
   hypre_sprintf(new_filename,"%s.%05d", filename, myid);

   if ((file = fopen(new_filename, "r")) == NULL)
   {
      hypre_printf("Error: can't open output file %s\n", new_filename);
      hypre_error(HYPRE_ERROR_GENERIC);
      return hypre_error_flag;
   }

   hypre_fscanf(file, "%d", &global_size);
#ifdef HYPRE_NO_GLOBAL_PARTITION
/* this may need to be changed so that the base is available in the file! */
   partitioning = hypre_CTAlloc(HYPRE_Int,2);

   hypre_fscanf(file, "%d", partitioning);
   for (i = 0; i < 2; i++)
   {
      hypre_fscanf(file, "%d", partitioning+i);
   }
#else
   partitioning = hypre_CTAlloc(HYPRE_Int,num_procs+1);

   hypre_fscanf(file, "%d", partitioning);
   for (i = 1; i <= num_procs; i++)
   {
      hypre_fscanf(file, "%d", partitioning+i);
      partitioning[i] -= partitioning[0];
   }
   base_j = partitioning[0];
   partitioning[0] = 0;
#endif
   vector = hypre_ParVectorCreate(comm, global_size,
                                  partitioning);

   hypre_ParVectorInitialize(vector);

   local_vector = hypre_ParVectorLocalVector(vector);
   local_data   = hypre_VectorData(local_vector);

#ifdef HYPRE_NO_GLOBAL_PARTITION
   for (j = 0; j < partitioning[1] - partitioning[0]; j++)
#else
   for (j = 0; j < partitioning[myid+1] - partitioning[myid]; j++)
#endif
   {
      hypre_fscanf(file, "%d %le", &J, local_data + j);
   }

   fclose(file);

   *base_j_ptr = base_j;
   *vector_ptr = vector;

   /* multivector code not written yet >>> */
   hypre_assert( hypre_ParVectorNumVectors(vector) == 1 );
   if ( hypre_ParVectorNumVectors(vector) != 1 ) hypre_error(HYPRE_ERROR_GENERIC);

   return hypre_error_flag;
}
Esempio n. 7
0
HYPRE_Int
hypre_BiCGSTABSolve(void  *bicgstab_vdata,
                 void  *A,
                 void  *b,
		 void  *x)
{
	hypre_BiCGSTABData  *bicgstab_data   = (hypre_BiCGSTABData*)bicgstab_vdata;
   hypre_BiCGSTABFunctions *bicgstab_functions = bicgstab_data->functions;

   HYPRE_Int               min_iter     = (bicgstab_data -> min_iter);
   HYPRE_Int 		     max_iter     = (bicgstab_data -> max_iter);
   HYPRE_Int 		     stop_crit    = (bicgstab_data -> stop_crit);
   HYPRE_Real 	     r_tol     = (bicgstab_data -> tol);
   HYPRE_Real 	     cf_tol       = (bicgstab_data -> cf_tol);
   void             *matvec_data  = (bicgstab_data -> matvec_data);
   HYPRE_Real        a_tol        = (bicgstab_data -> a_tol);
  
   

   void             *r            = (bicgstab_data -> r);
   void             *r0           = (bicgstab_data -> r0);
   void             *s            = (bicgstab_data -> s);
   void             *v           = (bicgstab_data -> v);
   void             *p            = (bicgstab_data -> p);
   void             *q            = (bicgstab_data -> q);

   HYPRE_Int 	           (*precond)(void*,void*,void*,void*)   = (bicgstab_functions -> precond);
   HYPRE_Int 	            *precond_data = (HYPRE_Int*)(bicgstab_data -> precond_data);

   /* logging variables */
   HYPRE_Int             logging        = (bicgstab_data -> logging);
   HYPRE_Int             print_level    = (bicgstab_data -> print_level);
   HYPRE_Real     *norms          = (bicgstab_data -> norms);
   /*   char           *log_file_name  = (bicgstab_data -> log_file_name);
     FILE           *fp; */
   
   HYPRE_Int        iter; 
   HYPRE_Int        my_id, num_procs;
   HYPRE_Real alpha, beta, gamma, epsilon, temp, res, r_norm, b_norm;
   HYPRE_Real epsmac = 1.e-128; 
   HYPRE_Real ieee_check = 0.;
   HYPRE_Real cf_ave_0 = 0.0;
   HYPRE_Real cf_ave_1 = 0.0;
   HYPRE_Real weight;
   HYPRE_Real r_norm_0;
   HYPRE_Real den_norm;
   HYPRE_Real gamma_numer;
   HYPRE_Real gamma_denom;

   (bicgstab_data -> converged) = 0;

   (*(bicgstab_functions->CommInfo))(A,&my_id,&num_procs);
   if (logging > 0 || print_level > 0)
   {
      norms          = (bicgstab_data -> norms);
      /* log_file_name  = (bicgstab_data -> log_file_name);
         fp = fopen(log_file_name,"w"); */
   }

   /* initialize work arrays */
   (*(bicgstab_functions->CopyVector))(b,r0);

   /* compute initial residual */

   (*(bicgstab_functions->Matvec))(matvec_data,-1.0, A, x, 1.0, r0);
   (*(bicgstab_functions->CopyVector))(r0,r);
   (*(bicgstab_functions->CopyVector))(r0,p);

   b_norm = sqrt((*(bicgstab_functions->InnerProd))(b,b));

   /* Since it is does not diminish performance, attempt to return an error flag
      and notify users when they supply bad input. */
   if (b_norm != 0.) ieee_check = b_norm/b_norm; /* INF -> NaN conversion */
   if (ieee_check != ieee_check)
   {
      /* ...INFs or NaNs in input can make ieee_check a NaN.  This test
         for ieee_check self-equality works on all IEEE-compliant compilers/
         machines, c.f. page 8 of "Lecture Notes on the Status of IEEE 754"
         by W. Kahan, May 31, 1996.  Currently (July 2002) this paper may be
         found at http://HTTP.CS.Berkeley.EDU/~wkahan/ieee754status/IEEE754.PDF */
      if (logging > 0 || print_level > 0)
      {
        hypre_printf("\n\nERROR detected by Hypre ...  BEGIN\n");
        hypre_printf("ERROR -- hypre_BiCGSTABSolve: INFs and/or NaNs detected in input.\n");
        hypre_printf("User probably placed non-numerics in supplied b.\n");
        hypre_printf("Returning error flag += 101.  Program not terminated.\n");
        hypre_printf("ERROR detected by Hypre ...  END\n\n\n");
      }
      hypre_error(HYPRE_ERROR_GENERIC);
      return hypre_error_flag;
   }

   res = (*(bicgstab_functions->InnerProd))(r0,r0);
   r_norm = sqrt(res);
   r_norm_0 = r_norm;
 
   /* Since it is does not diminish performance, attempt to return an error flag
      and notify users when they supply bad input. */
   if (r_norm != 0.) ieee_check = r_norm/r_norm; /* INF -> NaN conversion */
   if (ieee_check != ieee_check)
   {
      /* ...INFs or NaNs in input can make ieee_check a NaN.  This test
         for ieee_check self-equality works on all IEEE-compliant compilers/
         machines, c.f. page 8 of "Lecture Notes on the Status of IEEE 754"
         by W. Kahan, May 31, 1996.  Currently (July 2002) this paper may be
         found at http://HTTP.CS.Berkeley.EDU/~wkahan/ieee754status/IEEE754.PDF */
      if (logging > 0 || print_level > 0)
      {
        hypre_printf("\n\nERROR detected by Hypre ...  BEGIN\n");
        hypre_printf("ERROR -- hypre_BiCGSTABSolve: INFs and/or NaNs detected in input.\n");
        hypre_printf("User probably placed non-numerics in supplied A or x_0.\n");
        hypre_printf("Returning error flag += 101.  Program not terminated.\n");
        hypre_printf("ERROR detected by Hypre ...  END\n\n\n");
      }

      hypre_error(HYPRE_ERROR_GENERIC);
      return hypre_error_flag;
   }

   if (logging > 0 || print_level > 0)
   {
      norms[0] = r_norm;
      if (print_level > 0 && my_id == 0)
      {
   	     hypre_printf("L2 norm of b: %e\n", b_norm);
         if (b_norm == 0.0)
            hypre_printf("Rel_resid_norm actually contains the residual norm\n");
         hypre_printf("Initial L2 norm of residual: %e\n", r_norm);
      }
   }
   iter = 0;

   if (b_norm > 0.0)
   {
      /* convergence criterion |r_i| <= r_tol*|b| if |b| > 0 */
      den_norm = b_norm;
   }
   else
   {
      /* convergence criterion |r_i| <= r_tol*|r0| if |b| = 0 */
      den_norm = r_norm;
   };

   /* convergence criterion |r_i| <= r_tol/a_tol , absolute residual norm*/
   if (stop_crit)
   {
      if (a_tol == 0.0) /* this is for backwards compatibility
                           (accomodating setting stop_crit to 1, but not setting a_tol) -
                           eventually we will get rid of the stop_crit flag as with GMRES */
         epsilon = r_tol;
      else
         epsilon = a_tol; /* this means new interface fcn called */
      
   }
   else /* default convergence test (stop_crit = 0)*/
   {
      
      /* convergence criteria: |r_i| <= max( a_tol, r_tol * den_norm)
      den_norm = |r_0| or |b|
      note: default for a_tol is 0.0, so relative residual criteria is used unless
            user also specifies a_tol or sets r_tol = 0.0, which means absolute
            tol only is checked  */
      
      epsilon = hypre_max(a_tol, r_tol*den_norm);
   
   }
   
   
   if (print_level > 0 && my_id == 0)
   {
      if (b_norm > 0.0)
         {hypre_printf("=============================================\n\n");
          hypre_printf("Iters     resid.norm     conv.rate  rel.res.norm\n");
          hypre_printf("-----    ------------    ---------- ------------\n");
      }
      else
         {hypre_printf("=============================================\n\n");
          hypre_printf("Iters     resid.norm     conv.rate\n");
          hypre_printf("-----    ------------    ----------\n");
      
      }
   }

   (bicgstab_data -> num_iterations) = iter;
   if (b_norm > 0.0)
      (bicgstab_data -> rel_residual_norm) = r_norm/b_norm;
   /* check for convergence before starting */
   if (r_norm == 0.0)
   {
	   return hypre_error_flag;
   }
   else if (r_norm <= epsilon && iter >= min_iter) 
   {
       if (print_level > 0 && my_id == 0)
       {
          hypre_printf("\n\n");
          hypre_printf("Tolerance and min_iter requirements satisfied by initial data.\n");
          hypre_printf("Final L2 norm of residual: %e\n\n", r_norm);
       }
       (bicgstab_data -> converged) = 1;
       return hypre_error_flag;
   }
   /* Start BiCGStab iterations */
   while (iter < max_iter)
   {
        iter++;

	(*(bicgstab_functions->ClearVector))(v);
        precond(precond_data, A, p, v);
        (*(bicgstab_functions->Matvec))(matvec_data,1.0,A,v,0.0,q);
      	temp = (*(bicgstab_functions->InnerProd))(r0,q);
      	if (fabs(temp) >= epsmac)
	   alpha = res/temp;
	else
	{
	   hypre_printf("BiCGSTAB broke down!! divide by near zero\n");
	   return(1);
	}
	(*(bicgstab_functions->Axpy))(alpha,v,x);
	(*(bicgstab_functions->Axpy))(-alpha,q,r);
	(*(bicgstab_functions->ClearVector))(v);
        precond(precond_data, A, r, v);
        (*(bicgstab_functions->Matvec))(matvec_data,1.0,A,v,0.0,s);
      	/* Handle case when gamma = 0.0/0.0 as 0.0 and not NAN */
        gamma_numer = (*(bicgstab_functions->InnerProd))(r,s);
        gamma_denom = (*(bicgstab_functions->InnerProd))(s,s);
        if ((gamma_numer == 0.0) && (gamma_denom == 0.0))
            gamma = 0.0;
        else
            gamma= gamma_numer/gamma_denom;
	(*(bicgstab_functions->Axpy))(gamma,v,x);
	(*(bicgstab_functions->Axpy))(-gamma,s,r);
    /* residual is now updated, must immediately check for convergence */
	r_norm = sqrt((*(bicgstab_functions->InnerProd))(r,r));
	if (logging > 0 || print_level > 0)
	{
	   norms[iter] = r_norm;
	}
    if (print_level > 0 && my_id == 0)
    {
        if (b_norm > 0.0)
           hypre_printf("% 5d    %e    %f   %e\n", iter, norms[iter],
                      norms[iter]/norms[iter-1], norms[iter]/b_norm);
        else
           hypre_printf("% 5d    %e    %f\n", iter, norms[iter],
		                             norms[iter]/norms[iter-1]);
	}
    /* check for convergence, evaluate actual residual */
	if (r_norm <= epsilon && iter >= min_iter) 
    {
	   (*(bicgstab_functions->CopyVector))(b,r);
           (*(bicgstab_functions->Matvec))(matvec_data,-1.0,A,x,1.0,r);
	   r_norm = sqrt((*(bicgstab_functions->InnerProd))(r,r));
	   if (r_norm <= epsilon)
       {
           if (print_level > 0 && my_id == 0)
           {
              hypre_printf("\n\n");
              hypre_printf("Final L2 norm of residual: %e\n\n", r_norm);
           }
           (bicgstab_data -> converged) = 1;
           break;
       }
    }
    /*--------------------------------------------------------------------
     * Optional test to see if adequate progress is being made.
     * The average convergence factor is recorded and compared
     * against the tolerance 'cf_tol'. The weighting factor is
     * intended to pay more attention to the test when an accurate
     * estimate for average convergence factor is available.
     *--------------------------------------------------------------------*/
    if (cf_tol > 0.0)
    {
       cf_ave_0 = cf_ave_1;
       cf_ave_1 = pow( r_norm / r_norm_0, 1.0/(2.0*iter));

       weight   = fabs(cf_ave_1 - cf_ave_0);
       weight   = weight / hypre_max(cf_ave_1, cf_ave_0);
       weight   = 1.0 - weight;
       if (weight * cf_ave_1 > cf_tol) break;
    }

      	if (fabs(res) >= epsmac)
           beta = 1.0/res;
	else
	{
	   hypre_printf("BiCGSTAB broke down!! res=0 \n");
	   return(2);
	}
        res = (*(bicgstab_functions->InnerProd))(r0,r);
        beta *= res;    
	(*(bicgstab_functions->Axpy))(-gamma,q,p);
      	if (fabs(gamma) >= epsmac)
           (*(bicgstab_functions->ScaleVector))((beta*alpha/gamma),p);
	else
	{
	   hypre_printf("BiCGSTAB broke down!! gamma=0 \n");
	   return(3);
	}
	(*(bicgstab_functions->Axpy))(1.0,r,p);
   } /* end while loop */
    
   (bicgstab_data -> num_iterations) = iter;
   if (b_norm > 0.0)
      (bicgstab_data -> rel_residual_norm) = r_norm/b_norm;
   if (b_norm == 0.0)
      (bicgstab_data -> rel_residual_norm) = r_norm;

   if (iter >= max_iter && r_norm > epsilon) hypre_error(HYPRE_ERROR_CONV);


   return hypre_error_flag;
}
Esempio n. 8
0
HYPRE_Int 
HYPRE_SStructSplitSetup( HYPRE_SStructSolver solver,
                         HYPRE_SStructMatrix A,
                         HYPRE_SStructVector b,
                         HYPRE_SStructVector x )
{
   hypre_SStructVector     *y;
   HYPRE_Int                nparts;
   HYPRE_Int               *nvars;
   void                 ****smatvec_data;
   HYPRE_Int            (***ssolver_solve)();
   HYPRE_Int            (***ssolver_destroy)();
   void                  ***ssolver_data;
   HYPRE_Int                ssolver          = (solver -> ssolver);

   MPI_Comm                 comm;
   hypre_SStructGrid       *grid;
   hypre_SStructPMatrix    *pA;
   hypre_SStructPVector    *px;
   hypre_SStructPVector    *py;
   hypre_StructMatrix      *sA;
   hypre_StructVector      *sx;
   hypre_StructVector      *sy;
   HYPRE_StructMatrix      sAH;
   HYPRE_StructVector      sxH;
   HYPRE_StructVector      syH;
   HYPRE_Int              (*ssolve)();
   HYPRE_Int              (*sdestroy)();
   void                    *sdata;

   HYPRE_Int                part, vi, vj;

   comm = hypre_SStructVectorComm(b);
   grid = hypre_SStructVectorGrid(b);
   HYPRE_SStructVectorCreate(comm, grid, &y);
   HYPRE_SStructVectorInitialize(y);
   HYPRE_SStructVectorAssemble(y);

   nparts = hypre_SStructMatrixNParts(A);
   nvars = hypre_TAlloc(HYPRE_Int, nparts);
   smatvec_data    = hypre_TAlloc(void ***, nparts);
   ssolver_solve   = (HYPRE_Int (***)()) hypre_MAlloc((sizeof(HYPRE_Int (**)()) * nparts));
   ssolver_destroy = (HYPRE_Int (***)()) hypre_MAlloc((sizeof(HYPRE_Int (**)()) * nparts));
   ssolver_data    = hypre_TAlloc(void **, nparts);
   for (part = 0; part < nparts; part++)
   {
      pA = hypre_SStructMatrixPMatrix(A, part);
      px = hypre_SStructVectorPVector(x, part);
      py = hypre_SStructVectorPVector(y, part);
      nvars[part] = hypre_SStructPMatrixNVars(pA);

      smatvec_data[part]    = hypre_TAlloc(void **, nvars[part]);
      ssolver_solve[part]   =
         (HYPRE_Int (**)()) hypre_MAlloc((sizeof(HYPRE_Int (*)()) * nvars[part]));
      ssolver_destroy[part] =
         (HYPRE_Int (**)()) hypre_MAlloc((sizeof(HYPRE_Int (*)()) * nvars[part]));
      ssolver_data[part]    = hypre_TAlloc(void *, nvars[part]);
      for (vi = 0; vi < nvars[part]; vi++)
      {
         smatvec_data[part][vi] = hypre_TAlloc(void *, nvars[part]);
         for (vj = 0; vj < nvars[part]; vj++)
         {
            sA = hypre_SStructPMatrixSMatrix(pA, vi, vj);
            sx = hypre_SStructPVectorSVector(px, vj);
            smatvec_data[part][vi][vj] = NULL;
            if (sA != NULL)
            {
               smatvec_data[part][vi][vj] = hypre_StructMatvecCreate();
               hypre_StructMatvecSetup(smatvec_data[part][vi][vj], sA, sx);
            }
         }

         sA = hypre_SStructPMatrixSMatrix(pA, vi, vi);
         sx = hypre_SStructPVectorSVector(px, vi);
         sy = hypre_SStructPVectorSVector(py, vi);
         sAH = (HYPRE_StructMatrix) sA;
         sxH = (HYPRE_StructVector) sx;
         syH = (HYPRE_StructVector) sy;
         switch(ssolver)
         {
            default:
               /* If no solver is matched, use Jacobi, but throw and error */
               if (ssolver != HYPRE_Jacobi)
               {
                  hypre_error(HYPRE_ERROR_GENERIC);
               } /* don't break */
            case HYPRE_Jacobi:
               HYPRE_StructJacobiCreate(comm, (HYPRE_StructSolver *)&sdata);
               HYPRE_StructJacobiSetMaxIter(sdata, 1);
               HYPRE_StructJacobiSetTol(sdata, 0.0);
               if (solver -> zero_guess)
               {
                  HYPRE_StructJacobiSetZeroGuess(sdata);
               }
               HYPRE_StructJacobiSetup(sdata, sAH, syH, sxH);
               ssolve = HYPRE_StructJacobiSolve;
               sdestroy = HYPRE_StructJacobiDestroy;
               break;
            case HYPRE_SMG:
               HYPRE_StructSMGCreate(comm, (HYPRE_StructSolver *)&sdata);
               HYPRE_StructSMGSetMemoryUse(sdata, 0);
               HYPRE_StructSMGSetMaxIter(sdata, 1);
               HYPRE_StructSMGSetTol(sdata, 0.0);
               if (solver -> zero_guess)
               {
                  HYPRE_StructSMGSetZeroGuess(sdata);
               }
               HYPRE_StructSMGSetNumPreRelax(sdata, 1);
               HYPRE_StructSMGSetNumPostRelax(sdata, 1);
               HYPRE_StructSMGSetLogging(sdata, 0);
               HYPRE_StructSMGSetPrintLevel(sdata, 0);
               HYPRE_StructSMGSetup(sdata, sAH, syH, sxH);
               ssolve = HYPRE_StructSMGSolve;
               sdestroy = HYPRE_StructSMGDestroy;
               break;
            case HYPRE_PFMG:
               HYPRE_StructPFMGCreate(comm, (HYPRE_StructSolver *)&sdata);
               HYPRE_StructPFMGSetMaxIter(sdata, 1);
               HYPRE_StructPFMGSetTol(sdata, 0.0);
               if (solver -> zero_guess)
               {
                  HYPRE_StructPFMGSetZeroGuess(sdata);
               }
               HYPRE_StructPFMGSetRelaxType(sdata, 1);
               HYPRE_StructPFMGSetNumPreRelax(sdata, 1);
               HYPRE_StructPFMGSetNumPostRelax(sdata, 1);
               HYPRE_StructPFMGSetLogging(sdata, 0);
               HYPRE_StructPFMGSetPrintLevel(sdata, 0);
               HYPRE_StructPFMGSetup(sdata, sAH, syH, sxH);
               ssolve = HYPRE_StructPFMGSolve;
               sdestroy = HYPRE_StructPFMGDestroy;
               break;
         }
         ssolver_solve[part][vi]   = ssolve;
         ssolver_destroy[part][vi] = sdestroy;
         ssolver_data[part][vi]    = sdata;
      }
   }

   (solver -> y)               = y;
   (solver -> nparts)          = nparts;
   (solver -> nvars)           = nvars;
   (solver -> smatvec_data)    = smatvec_data;
   (solver -> ssolver_solve)   = ssolver_solve;
   (solver -> ssolver_destroy) = ssolver_destroy;
   (solver -> ssolver_data)    = ssolver_data;
   if ((solver -> tol) > 0.0)
   {
      hypre_SStructMatvecCreate(&(solver -> matvec_data));
      hypre_SStructMatvecSetup((solver -> matvec_data), A, x);
   }

   return hypre_error_flag;
}
Esempio n. 9
0
int HYPRE_IJVectorCreate( MPI_Comm comm,
                          HYPRE_BigInt jlower,
                          HYPRE_BigInt jupper,
                          HYPRE_IJVector *vector )
{
   hypre_IJVector *vec;
   int num_procs, my_id;
   HYPRE_BigInt *partitioning;
 
#ifdef HYPRE_NO_GLOBAL_PARTITION
   HYPRE_BigInt  row0, rowN;
#else
  HYPRE_BigInt *recv_buf;
  HYPRE_BigInt *info;
  int i, i2;
#endif

   vec = hypre_CTAlloc(hypre_IJVector, 1);
   
   if (!vec)
   {  
      printf("Out of memory -- HYPRE_IJVectorCreate\n");
      hypre_error(HYPRE_ERROR_MEMORY);
      return hypre_error_flag;
   }

   MPI_Comm_size(comm, &num_procs);
   MPI_Comm_rank(comm, &my_id);

   if (jlower > jupper+1 || jlower < 0)
   {
      hypre_error_in_arg(2);
      return hypre_error_flag;
   }
   if (jupper < -1)
   {
      hypre_error_in_arg(3);
      return hypre_error_flag;
   }


#ifdef HYPRE_NO_GLOBAL_PARTITION

   partitioning = hypre_CTAlloc(HYPRE_BigInt, 2);

   partitioning[0] = jlower;
   partitioning[1] = jupper+1;

      
   /* now we need the global number of rows as well
      as the global first row index */

   /* proc 0 has the first row  */
   if (my_id==0) 
   {
      row0 = jlower;
   }
   MPI_Bcast(&row0, 1, MPI_HYPRE_BIG_INT, 0, comm);
   /* proc (num_procs-1) has the last row  */   
   if (my_id == (num_procs-1))
   {
      rowN = jupper;
   }
   MPI_Bcast(&rowN, 1, MPI_HYPRE_BIG_INT, num_procs-1, comm);

   hypre_IJVectorGlobalFirstRow(vec) = row0;
   hypre_IJVectorGlobalNumRows(vec) = rowN - row0 + 1;
   
#else

   info = hypre_CTAlloc(HYPRE_BigInt,2);
   recv_buf = hypre_CTAlloc(HYPRE_BigInt, 2*num_procs);
   partitioning = hypre_CTAlloc(HYPRE_BigInt, num_procs+1);

   info[0] = jlower;
   info[1] = jupper;

   MPI_Allgather(info, 2, MPI_HYPRE_BIG_INT, recv_buf, 2, MPI_HYPRE_BIG_INT, comm);

   partitioning[0] = recv_buf[0];
   for (i=0; i < num_procs-1; i++)
   {
      i2 = i+i;
      if (recv_buf[i2+1] != (recv_buf[i2+2]-1))
      {
         printf("Inconsistent partitioning -- HYPRE_IJVectorCreate\n");  
	 hypre_error(HYPRE_ERROR_GENERIC);
         return hypre_error_flag;
      }
      else
	 partitioning[i+1] = recv_buf[i2+2];
   }
   i2 = (num_procs-1)*2;
   partitioning[num_procs] = recv_buf[i2+1]+1;

   hypre_TFree(info);
   hypre_TFree(recv_buf);


   hypre_IJVectorGlobalFirstRow(vec) = partitioning[0];
   hypre_IJVectorGlobalNumRows(vec)= partitioning[num_procs]-partitioning[0];
   


#endif


   hypre_IJVectorComm(vec)         = comm;
   hypre_IJVectorPartitioning(vec) = partitioning;
   hypre_IJVectorObjectType(vec)   = HYPRE_UNITIALIZED;
   hypre_IJVectorObject(vec)       = NULL;
   hypre_IJVectorTranslator(vec)   = NULL;

   *vector = (HYPRE_IJVector) vec;
  
   return hypre_error_flag;
}
Esempio n. 10
0
File: gmres.c Progetto: LLNL/COGENT
HYPRE_Int
hypre_GMRESSolve(void  *gmres_vdata,
                 void  *A,
                 void  *b,
		 void  *x)
{
   hypre_GMRESData  *gmres_data   = gmres_vdata;
   hypre_GMRESFunctions *gmres_functions = gmres_data->functions;
   HYPRE_Int 		     k_dim        = (gmres_data -> k_dim);
   HYPRE_Int               min_iter     = (gmres_data -> min_iter);
   HYPRE_Int 		     max_iter     = (gmres_data -> max_iter);
   HYPRE_Int               rel_change   = (gmres_data -> rel_change);
   HYPRE_Int         skip_real_r_check  = (gmres_data -> skip_real_r_check);
   double 	     r_tol        = (gmres_data -> tol);
   double 	     cf_tol       = (gmres_data -> cf_tol);
   double            a_tol        = (gmres_data -> a_tol);
   void             *matvec_data  = (gmres_data -> matvec_data);

   void             *r            = (gmres_data -> r);
   void             *w            = (gmres_data -> w);
   /* note: w_2 is only allocated if rel_change = 1 */
   void             *w_2          = (gmres_data -> w_2); 

   void            **p            = (gmres_data -> p);


   HYPRE_Int 	           (*precond)()   = (gmres_functions -> precond);
   HYPRE_Int 	            *precond_data = (gmres_data -> precond_data);

   HYPRE_Int             print_level    = (gmres_data -> print_level);
   HYPRE_Int             logging        = (gmres_data -> logging);

   double         *norms          = (gmres_data -> norms);
/* not used yet   char           *log_file_name  = (gmres_data -> log_file_name);*/
/*   FILE           *fp; */
   
   HYPRE_Int        break_value = 0;
   HYPRE_Int	      i, j, k;
   double     *rs, **hh, *c, *s, *rs_2; 
   HYPRE_Int        iter; 
   HYPRE_Int        my_id, num_procs;
   double     epsilon, gamma, t, r_norm, b_norm, den_norm, x_norm;
   double     w_norm;
   
   double     epsmac = 1.e-16; 
   double     ieee_check = 0.;

   double     guard_zero_residual; 
   double     cf_ave_0 = 0.0;
   double     cf_ave_1 = 0.0;
   double     weight;
   double     r_norm_0;
   double     relative_error = 1.0;

   HYPRE_Int        rel_change_passed = 0, num_rel_change_check = 0;

   double     real_r_norm_old, real_r_norm_new;

   (gmres_data -> converged) = 0;
   /*-----------------------------------------------------------------------
    * With relative change convergence test on, it is possible to attempt
    * another iteration with a zero residual. This causes the parameter
    * alpha to go NaN. The guard_zero_residual parameter is to circumvent
    * this. Perhaps it should be set to something non-zero (but small).
    *-----------------------------------------------------------------------*/
   guard_zero_residual = 0.0;

   (*(gmres_functions->CommInfo))(A,&my_id,&num_procs);
   if ( logging>0 || print_level>0 )
   {
      norms          = (gmres_data -> norms);
   }

   /* initialize work arrays */
   rs = hypre_CTAllocF(double,k_dim+1,gmres_functions); 
   c = hypre_CTAllocF(double,k_dim,gmres_functions); 
   s = hypre_CTAllocF(double,k_dim,gmres_functions); 
   if (rel_change) rs_2 = hypre_CTAllocF(double,k_dim+1,gmres_functions); 
   


   hh = hypre_CTAllocF(double*,k_dim+1,gmres_functions); 
   for (i=0; i < k_dim+1; i++)
   {	
   	hh[i] = hypre_CTAllocF(double,k_dim,gmres_functions); 
   }

   (*(gmres_functions->CopyVector))(b,p[0]);

   /* compute initial residual */
   (*(gmres_functions->Matvec))(matvec_data,-1.0, A, x, 1.0, p[0]);

   b_norm = sqrt((*(gmres_functions->InnerProd))(b,b));
   real_r_norm_old = b_norm;

   /* Since it is does not diminish performance, attempt to return an error flag
      and notify users when they supply bad input. */
   if (b_norm != 0.) ieee_check = b_norm/b_norm; /* INF -> NaN conversion */
   if (ieee_check != ieee_check)
   {
      /* ...INFs or NaNs in input can make ieee_check a NaN.  This test
         for ieee_check self-equality works on all IEEE-compliant compilers/
         machines, c.f. page 8 of "Lecture Notes on the Status of IEEE 754"
         by W. Kahan, May 31, 1996.  Currently (July 2002) this paper may be
         found at http://HTTP.CS.Berkeley.EDU/~wkahan/ieee754status/IEEE754.PDF */
      if (logging > 0 || print_level > 0)
      {
        hypre_printf("\n\nERROR detected by Hypre ... BEGIN\n");
        hypre_printf("ERROR -- hypre_GMRESSolve: INFs and/or NaNs detected in input.\n");
        hypre_printf("User probably placed non-numerics in supplied b.\n");
        hypre_printf("Returning error flag += 101.  Program not terminated.\n");
        hypre_printf("ERROR detected by Hypre ... END\n\n\n");
      }
      hypre_error(HYPRE_ERROR_GENERIC);
      return hypre_error_flag;
   }

   r_norm = sqrt((*(gmres_functions->InnerProd))(p[0],p[0]));
   r_norm_0 = r_norm;

   /* Since it is does not diminish performance, attempt to return an error flag
      and notify users when they supply bad input. */
   if (r_norm != 0.) ieee_check = r_norm/r_norm; /* INF -> NaN conversion */
   if (ieee_check != ieee_check)
   {
      /* ...INFs or NaNs in input can make ieee_check a NaN.  This test
         for ieee_check self-equality works on all IEEE-compliant compilers/
         machines, c.f. page 8 of "Lecture Notes on the Status of IEEE 754"
         by W. Kahan, May 31, 1996.  Currently (July 2002) this paper may be
         found at http://HTTP.CS.Berkeley.EDU/~wkahan/ieee754status/IEEE754.PDF */
      if (logging > 0 || print_level > 0)
      {
        hypre_printf("\n\nERROR detected by Hypre ... BEGIN\n");
        hypre_printf("ERROR -- hypre_GMRESSolve: INFs and/or NaNs detected in input.\n");
        hypre_printf("User probably placed non-numerics in supplied A or x_0.\n");
        hypre_printf("Returning error flag += 101.  Program not terminated.\n");
        hypre_printf("ERROR detected by Hypre ... END\n\n\n");
      }
      hypre_error(HYPRE_ERROR_GENERIC);
      return hypre_error_flag;
   }

   if ( logging>0 || print_level > 0)
   {
      norms[0] = r_norm;
      if ( print_level>1 && my_id == 0 )
      {
  	 hypre_printf("L2 norm of b: %e\n", b_norm);
         if (b_norm == 0.0)
            hypre_printf("Rel_resid_norm actually contains the residual norm\n");
         hypre_printf("Initial L2 norm of residual: %e\n", r_norm);
      
      }
   }
   iter = 0;

   if (b_norm > 0.0)
   {
     /* convergence criterion |r_i|/|b| <= accuracy if |b| > 0 */
     den_norm= b_norm;
   }
   else
   {
     /* convergence criterion |r_i|/|r0| <= accuracy if |b| = 0 */
     den_norm= r_norm;
   };


   /* convergence criteria: |r_i| <= max( a_tol, r_tol * den_norm)
      den_norm = |r_0| or |b|
      note: default for a_tol is 0.0, so relative residual criteria is used unless
            user specifies a_tol, or sets r_tol = 0.0, which means absolute
            tol only is checked  */
      
   epsilon = hypre_max(a_tol,r_tol*den_norm);
   
   /* so now our stop criteria is |r_i| <= epsilon */

   if ( print_level>1 && my_id == 0 )
   {
      if (b_norm > 0.0)
         {hypre_printf("=============================================\n\n");
          hypre_printf("Iters     resid.norm     conv.rate  rel.res.norm\n");
          hypre_printf("-----    ------------    ---------- ------------\n");
      
          }

      else
         {hypre_printf("=============================================\n\n");
          hypre_printf("Iters     resid.norm     conv.rate\n");
          hypre_printf("-----    ------------    ----------\n");
      
          };
   }


   /* once the rel. change check has passed, we do not want to check it again */
   rel_change_passed = 0;


   /* outer iteration cycle */
   while (iter < max_iter)
   {
   /* initialize first term of hessenberg system */

	rs[0] = r_norm;
        if (r_norm == 0.0)
        {
           hypre_TFreeF(c,gmres_functions); 
           hypre_TFreeF(s,gmres_functions); 
           hypre_TFreeF(rs,gmres_functions);
           if (rel_change)  hypre_TFreeF(rs_2,gmres_functions);
           for (i=0; i < k_dim+1; i++) hypre_TFreeF(hh[i],gmres_functions);
           hypre_TFreeF(hh,gmres_functions); 
	   return hypre_error_flag;
           
	}

        /* see if we are already converged and 
           should print the final norm and exit */
	if (r_norm  <= epsilon && iter >= min_iter) 
        {
           if (!rel_change) /* shouldn't exit after no iterations if
                             * relative change is on*/
           {
              (*(gmres_functions->CopyVector))(b,r);
              (*(gmres_functions->Matvec))(matvec_data,-1.0,A,x,1.0,r);
              r_norm = sqrt((*(gmres_functions->InnerProd))(r,r));
              if (r_norm  <= epsilon)
              {
                 if ( print_level>1 && my_id == 0)
                 {
                    hypre_printf("\n\n");
                    hypre_printf("Final L2 norm of residual: %e\n\n", r_norm);
                 }
                 break;
              }
              else
                 if ( print_level>0 && my_id == 0)
                    hypre_printf("false convergence 1\n");
           }
	}

      
        
      	t = 1.0 / r_norm;
	(*(gmres_functions->ScaleVector))(t,p[0]);
	i = 0;

        /***RESTART CYCLE (right-preconditioning) ***/
        while (i < k_dim && iter < max_iter)
	{
           i++;
           iter++;
           (*(gmres_functions->ClearVector))(r);
           precond(precond_data, A, p[i-1], r);
           (*(gmres_functions->Matvec))(matvec_data, 1.0, A, r, 0.0, p[i]);
           /* modified Gram_Schmidt */
           for (j=0; j < i; j++)
           {
              hh[j][i-1] = (*(gmres_functions->InnerProd))(p[j],p[i]);
              (*(gmres_functions->Axpy))(-hh[j][i-1],p[j],p[i]);
           }
           t = sqrt((*(gmres_functions->InnerProd))(p[i],p[i]));
           hh[i][i-1] = t;	
           if (t != 0.0)
           {
              t = 1.0/t;
              (*(gmres_functions->ScaleVector))(t,p[i]);
           }
           /* done with modified Gram_schmidt and Arnoldi step.
              update factorization of hh */
           for (j = 1; j < i; j++)
           {
              t = hh[j-1][i-1];
              hh[j-1][i-1] = s[j-1]*hh[j][i-1] + c[j-1]*t;
              hh[j][i-1] = -s[j-1]*t + c[j-1]*hh[j][i-1];
           }
           t= hh[i][i-1]*hh[i][i-1];
           t+= hh[i-1][i-1]*hh[i-1][i-1];
           gamma = sqrt(t);
           if (gamma == 0.0) gamma = epsmac;
           c[i-1] = hh[i-1][i-1]/gamma;
           s[i-1] = hh[i][i-1]/gamma;
           rs[i] = -hh[i][i-1]*rs[i-1];
           rs[i]/=  gamma;
           rs[i-1] = c[i-1]*rs[i-1];
           /* determine residual norm */
           hh[i-1][i-1] = s[i-1]*hh[i][i-1] + c[i-1]*hh[i-1][i-1];
           r_norm = fabs(rs[i]);

           /* print ? */
           if ( print_level>0 )
           {
              norms[iter] = r_norm;
              if ( print_level>1 && my_id == 0 )
              {
                 if (b_norm > 0.0)
                    hypre_printf("% 5d    %e    %f   %e\n", iter, 
                           norms[iter],norms[iter]/norms[iter-1],
                           norms[iter]/b_norm);
                 else
                    hypre_printf("% 5d    %e    %f\n", iter, norms[iter],
                           norms[iter]/norms[iter-1]);
              }
           }
           /*convergence factor tolerance */
           if (cf_tol > 0.0)
           {
              cf_ave_0 = cf_ave_1;
              cf_ave_1 = pow( r_norm / r_norm_0, 1.0/(2.0*iter));
              
              weight   = fabs(cf_ave_1 - cf_ave_0);
              weight   = weight / hypre_max(cf_ave_1, cf_ave_0);
              weight   = 1.0 - weight;
#if 0
              hypre_printf("I = %d: cf_new = %e, cf_old = %e, weight = %e\n",
                     i, cf_ave_1, cf_ave_0, weight );
#endif
              if (weight * cf_ave_1 > cf_tol) 
              {
                 break_value = 1;
                 break;
              }
           }
           /* should we exit the restart cycle? (conv. check) */
           if (r_norm <= epsilon && iter >= min_iter)
           {
              if (rel_change && !rel_change_passed)
              {
                 
                 /* To decide whether to break here: to actually
                  determine the relative change requires the approx
                  solution (so a triangular solve) and a
                  precond. solve - so if we have to do this many
                  times, it will be expensive...(unlike cg where is
                  is relatively straightforward)

                  previously, the intent (there was a bug), was to
                  exit the restart cycle based on the residual norm
                  and check the relative change outside the cycle.
                  Here we will check the relative here as we don't
                  want to exit the restart cycle prematurely */
                 
                 for (k=0; k<i; k++) /* extra copy of rs so we don't need
                                        to change the later solve */
                    rs_2[k] = rs[k];

                 /* solve tri. system*/
                 rs_2[i-1] = rs_2[i-1]/hh[i-1][i-1];
                 for (k = i-2; k >= 0; k--)
                 {
                    t = 0.0;
                    for (j = k+1; j < i; j++)
                    {
                       t -= hh[k][j]*rs_2[j];
                    }
                    t+= rs_2[k];
                    rs_2[k] = t/hh[k][k];
                 }
                 
                 (*(gmres_functions->CopyVector))(p[i-1],w);
                 (*(gmres_functions->ScaleVector))(rs_2[i-1],w);
                 for (j = i-2; j >=0; j--)
                    (*(gmres_functions->Axpy))(rs_2[j], p[j], w);
                    
                 (*(gmres_functions->ClearVector))(r);
                 /* find correction (in r) */
                 precond(precond_data, A, w, r);
                 /* copy current solution (x) to w (don't want to over-write x)*/
                 (*(gmres_functions->CopyVector))(x,w);

                 /* add the correction */
                 (*(gmres_functions->Axpy))(1.0,r,w);

                 /* now w is the approx solution  - get the norm*/
                 x_norm = sqrt( (*(gmres_functions->InnerProd))(w,w) );

                 if ( !(x_norm <= guard_zero_residual ))
                    /* don't divide by zero */
                 {  /* now get  x_i - x_i-1 */
                    
                    if (num_rel_change_check)
                    {
                       /* have already checked once so we can avoid another precond.
                          solve */
                       (*(gmres_functions->CopyVector))(w, r);
                       (*(gmres_functions->Axpy))(-1.0, w_2, r);
                       /* now r contains x_i - x_i-1*/

                       /* save current soln w in w_2 for next time */
                       (*(gmres_functions->CopyVector))(w, w_2);
                    }
                    else
                    {
                       /* first time to check rel change*/

                       /* first save current soln w in w_2 for next time */
                       (*(gmres_functions->CopyVector))(w, w_2);

                       /* for relative change take x_(i-1) to be 
                          x + M^{-1}[sum{j=0..i-2} rs_j p_j ]. 
                          Now
                          x_i - x_{i-1}= {x + M^{-1}[sum{j=0..i-1} rs_j p_j ]}
                          - {x + M^{-1}[sum{j=0..i-2} rs_j p_j ]}
                          = M^{-1} rs_{i-1}{p_{i-1}} */
                       
                       (*(gmres_functions->ClearVector))(w);
                       (*(gmres_functions->Axpy))(rs_2[i-1], p[i-1], w);
                       (*(gmres_functions->ClearVector))(r);
                       /* apply the preconditioner */
                       precond(precond_data, A, w, r);
                       /* now r contains x_i - x_i-1 */          
                    }
                    /* find the norm of x_i - x_i-1 */          
                    w_norm = sqrt( (*(gmres_functions->InnerProd))(r,r) );
                    relative_error = w_norm/x_norm;
                    if (relative_error <= r_tol)
                    {
                       rel_change_passed = 1;
                       break;
                    }
                 }
                 else
                 {
                    rel_change_passed = 1;
                    break;

                 }
                 num_rel_change_check++;
              }
           else /* no relative change */
              {
                 break;
              }
           }
           

	} /*** end of restart cycle ***/

	/* now compute solution, first solve upper triangular system */

	if (break_value) break;
	
	rs[i-1] = rs[i-1]/hh[i-1][i-1];
	for (k = i-2; k >= 0; k--)
	{
           t = 0.0;
           for (j = k+1; j < i; j++)
           {
              t -= hh[k][j]*rs[j];
           }
           t+= rs[k];
           rs[k] = t/hh[k][k];
	}

        (*(gmres_functions->CopyVector))(p[i-1],w);
        (*(gmres_functions->ScaleVector))(rs[i-1],w);
        for (j = i-2; j >=0; j--)
                (*(gmres_functions->Axpy))(rs[j], p[j], w);

	(*(gmres_functions->ClearVector))(r);
	/* find correction (in r) */
        precond(precond_data, A, w, r);

        /* update current solution x (in x) */
	(*(gmres_functions->Axpy))(1.0,r,x);
         

        /* check for convergence by evaluating the actual residual */
	if (r_norm  <= epsilon && iter >= min_iter)
        {
           if (skip_real_r_check)
           {
              (gmres_data -> converged) = 1;
              break;
           }

           /* calculate actual residual norm*/
           (*(gmres_functions->CopyVector))(b,r);
           (*(gmres_functions->Matvec))(matvec_data,-1.0,A,x,1.0,r);
           real_r_norm_new = r_norm = sqrt( (*(gmres_functions->InnerProd))(r,r) );

           if (r_norm <= epsilon)
           {
              if (rel_change && !rel_change_passed) /* calculate the relative change */
              {

                 /* calculate the norm of the solution */
                 x_norm = sqrt( (*(gmres_functions->InnerProd))(x,x) );
               
                 if ( !(x_norm <= guard_zero_residual ))
                    /* don't divide by zero */
                 {
                    
                    /* for relative change take x_(i-1) to be 
                       x + M^{-1}[sum{j=0..i-2} rs_j p_j ]. 
                       Now
                       x_i - x_{i-1}= {x + M^{-1}[sum{j=0..i-1} rs_j p_j ]}
                       - {x + M^{-1}[sum{j=0..i-2} rs_j p_j ]}
                       = M^{-1} rs_{i-1}{p_{i-1}} */
                    (*(gmres_functions->ClearVector))(w);
                    (*(gmres_functions->Axpy))(rs[i-1], p[i-1], w);
                    (*(gmres_functions->ClearVector))(r);
                    /* apply the preconditioner */
                    precond(precond_data, A, w, r);
                    /* find the norm of x_i - x_i-1 */          
                    w_norm = sqrt( (*(gmres_functions->InnerProd))(r,r) );
                    relative_error= w_norm/x_norm;
                    if ( relative_error < r_tol )
                    {
                       (gmres_data -> converged) = 1;
                       if ( print_level>1 && my_id == 0 )
                       {
                          hypre_printf("\n\n");
                          hypre_printf("Final L2 norm of residual: %e\n\n", r_norm);
                       }
                       break;
                    }
                 }
                 else
                 {
                    (gmres_data -> converged) = 1;
                    if ( print_level>1 && my_id == 0 )
                    {
                       hypre_printf("\n\n");
                       hypre_printf("Final L2 norm of residual: %e\n\n", r_norm);
                    }
                    break;
                 }

              }
              else /* don't need to check rel. change */
              {
                 if ( print_level>1 && my_id == 0 )
                 {
                    hypre_printf("\n\n");
                    hypre_printf("Final L2 norm of residual: %e\n\n", r_norm);
                 }
                 (gmres_data -> converged) = 1;
                 break;
              }
           }
           else /* conv. has not occurred, according to true residual */
           {
              /* exit if the real residual norm has not decreased */
              if (real_r_norm_new >= real_r_norm_old)
              {
                 if (print_level > 1 && my_id == 0)
                 {
                    hypre_printf("\n\n");
                    hypre_printf("Final L2 norm of residual: %e\n\n", r_norm);
                 }
                 (gmres_data -> converged) = 1;
                 break;
              }

              /* report discrepancy between real/GMRES residuals and restart */
              if ( print_level>0 && my_id == 0)
                 hypre_printf("false convergence 2, L2 norm of residual: %e\n", r_norm);
              (*(gmres_functions->CopyVector))(r,p[0]);
              i = 0;
              real_r_norm_old = real_r_norm_new;
           }
	} /* end of convergence check */

        /* compute residual vector and continue loop */
	for (j=i ; j > 0; j--)
	{
           rs[j-1] = -s[j-1]*rs[j];
           rs[j] = c[j-1]*rs[j];
	}
        
        if (i) (*(gmres_functions->Axpy))(rs[i]-1.0,p[i],p[i]);
        for (j=i-1 ; j > 0; j--)
           (*(gmres_functions->Axpy))(rs[j],p[j],p[i]);
        
        if (i)
        {
           (*(gmres_functions->Axpy))(rs[0]-1.0,p[0],p[0]);
           (*(gmres_functions->Axpy))(1.0,p[i],p[0]);
        }
   } /* END of iteration while loop */


   if ( print_level>1 && my_id == 0 )
          hypre_printf("\n\n"); 

   (gmres_data -> num_iterations) = iter;
   if (b_norm > 0.0)
      (gmres_data -> rel_residual_norm) = r_norm/b_norm;
   if (b_norm == 0.0)
      (gmres_data -> rel_residual_norm) = r_norm;

   if (iter >= max_iter && r_norm > epsilon) hypre_error(HYPRE_ERROR_CONV);
   

   hypre_TFreeF(c,gmres_functions); 
   hypre_TFreeF(s,gmres_functions); 
   hypre_TFreeF(rs,gmres_functions);
   if (rel_change)  hypre_TFreeF(rs_2,gmres_functions);

   for (i=0; i < k_dim+1; i++)
   {	
   	hypre_TFreeF(hh[i],gmres_functions);
   }
   hypre_TFreeF(hh,gmres_functions); 

   return hypre_error_flag;
}
Esempio n. 11
0
HYPRE_Int
HYPRE_IJMatrixCreate( MPI_Comm        comm,
                      HYPRE_Int       ilower,
                      HYPRE_Int       iupper,
                      HYPRE_Int       jlower,
                      HYPRE_Int       jupper,
                      HYPRE_IJMatrix *matrix )
{
   HYPRE_Int *row_partitioning;
   HYPRE_Int *col_partitioning;
   HYPRE_Int *info;
   HYPRE_Int num_procs;
   HYPRE_Int myid;

   hypre_IJMatrix *ijmatrix;

#ifdef HYPRE_NO_GLOBAL_PARTITION
   HYPRE_Int  row0, col0, rowN, colN;
#else
 HYPRE_Int *recv_buf;
   HYPRE_Int i, i4;
   HYPRE_Int square;
#endif

   ijmatrix = hypre_CTAlloc(hypre_IJMatrix, 1);

   hypre_IJMatrixComm(ijmatrix)         = comm;
   hypre_IJMatrixObject(ijmatrix)       = NULL;
   hypre_IJMatrixTranslator(ijmatrix)   = NULL;
   hypre_IJMatrixObjectType(ijmatrix)   = HYPRE_UNITIALIZED;
   hypre_IJMatrixAssembleFlag(ijmatrix) = 0;
   hypre_IJMatrixPrintLevel(ijmatrix) = 0;

   hypre_MPI_Comm_size(comm,&num_procs);
   hypre_MPI_Comm_rank(comm, &myid);
   

   if (ilower > iupper+1 || ilower < 0)
   {
      hypre_error_in_arg(2);
      hypre_TFree(ijmatrix);
      return hypre_error_flag;
   }

   if (iupper < -1)
   {
      hypre_error_in_arg(3);
      hypre_TFree(ijmatrix);
      return hypre_error_flag;
   }

   if (jlower > jupper+1 || jlower < 0)
   {
      hypre_error_in_arg(4);
      hypre_TFree(ijmatrix);
      return hypre_error_flag;
   }

   if (jupper < -1)
   {
      hypre_error_in_arg(5);
      hypre_TFree(ijmatrix);
      return hypre_error_flag;
   }

#ifdef HYPRE_NO_GLOBAL_PARTITION

   info = hypre_CTAlloc(HYPRE_Int,2);

   row_partitioning = hypre_CTAlloc(HYPRE_Int, 2);
   col_partitioning = hypre_CTAlloc(HYPRE_Int, 2);

   row_partitioning[0] = ilower;
   row_partitioning[1] = iupper+1;
   col_partitioning[0] = jlower;
   col_partitioning[1] = jupper+1;

   /* now we need the global number of rows and columns as well
      as the global first row and column index */

   /* proc 0 has the first row and col */
   if (myid==0) 
   {
      info[0] = ilower;
      info[1] = jlower;
   }
   hypre_MPI_Bcast(info, 2, HYPRE_MPI_INT, 0, comm);
   row0 = info[0];
   col0 = info[1];
   
   /* proc (num_procs-1) has the last row and col */   
   if (myid == (num_procs-1))
   {
      info[0] = iupper;
      info[1] = jupper;
   }
   hypre_MPI_Bcast(info, 2, HYPRE_MPI_INT, num_procs-1, comm);

   rowN = info[0];
   colN = info[1];

   hypre_IJMatrixGlobalFirstRow(ijmatrix) = row0;
   hypre_IJMatrixGlobalFirstCol(ijmatrix) = col0;
   hypre_IJMatrixGlobalNumRows(ijmatrix) = rowN - row0 + 1;
   hypre_IJMatrixGlobalNumCols(ijmatrix) = colN - col0 + 1;
   
   hypre_TFree(info);
   

#else

   info = hypre_CTAlloc(HYPRE_Int,4);
   recv_buf = hypre_CTAlloc(HYPRE_Int,4*num_procs);
   row_partitioning = hypre_CTAlloc(HYPRE_Int, num_procs+1);

   info[0] = ilower;
   info[1] = iupper;
   info[2] = jlower;
   info[3] = jupper;
  
   /* Generate row- and column-partitioning through information exchange
      across all processors, check whether the matrix is square, and
      if the partitionings match. i.e. no overlaps or gaps,
      if there are overlaps or gaps in the row partitioning or column
      partitioning , ierr will be set to -9 or -10, respectively */

   hypre_MPI_Allgather(info,4,HYPRE_MPI_INT,recv_buf,4,HYPRE_MPI_INT,comm);

   row_partitioning[0] = recv_buf[0];
   square = 1;
   for (i=0; i < num_procs-1; i++)
   {
      i4 = 4*i;
      if ( recv_buf[i4+1] != (recv_buf[i4+4]-1) )
      {
         hypre_error(HYPRE_ERROR_GENERIC);
         hypre_TFree(ijmatrix);
         hypre_TFree(info);
         hypre_TFree(recv_buf);
         hypre_TFree(row_partitioning);
   	 return hypre_error_flag;
      }
      else
	 row_partitioning[i+1] = recv_buf[i4+4];
	 
      if ((square && (recv_buf[i4]   != recv_buf[i4+2])) ||
                    (recv_buf[i4+1] != recv_buf[i4+3])  )
      {
         square = 0;
      }
   }	
   i4 = (num_procs-1)*4;
   row_partitioning[num_procs] = recv_buf[i4+1]+1;

   if ((recv_buf[i4] != recv_buf[i4+2]) || (recv_buf[i4+1] != recv_buf[i4+3])) 
      square = 0;

   if (square)
      col_partitioning = row_partitioning;
   else
   {   
      col_partitioning = hypre_CTAlloc(HYPRE_Int,num_procs+1);
      col_partitioning[0] = recv_buf[2];
      for (i=0; i < num_procs-1; i++)
      {
         i4 = 4*i;
         if (recv_buf[i4+3] != recv_buf[i4+6]-1)
         {
           hypre_error(HYPRE_ERROR_GENERIC);
           hypre_TFree(ijmatrix);
           hypre_TFree(info);
           hypre_TFree(recv_buf);
           hypre_TFree(row_partitioning);
           hypre_TFree(col_partitioning);
   	   return hypre_error_flag;
         }
         else
   	   col_partitioning[i+1] = recv_buf[i4+6];
      }
      col_partitioning[num_procs] = recv_buf[num_procs*4-1]+1;
   }

   hypre_IJMatrixGlobalFirstRow(ijmatrix) = row_partitioning[0];
   hypre_IJMatrixGlobalFirstCol(ijmatrix) = col_partitioning[0];
   hypre_IJMatrixGlobalNumRows(ijmatrix) = row_partitioning[num_procs] - 
      row_partitioning[0];
   hypre_IJMatrixGlobalNumCols(ijmatrix) = col_partitioning[num_procs] - 
      col_partitioning[0];
   
   hypre_TFree(info);
   hypre_TFree(recv_buf);
   
#endif

   hypre_IJMatrixRowPartitioning(ijmatrix) = row_partitioning;
   hypre_IJMatrixColPartitioning(ijmatrix) = col_partitioning;

   *matrix = (HYPRE_IJMatrix) ijmatrix;
  
   return hypre_error_flag;
}