C++ (Cpp) hypre_max примеры использования

Язык программирования: C++ (Cpp)

Метод/Функция: hypre_max

Примеров на hotexamples.com: 28

C++ (Cpp) hypre_max - 28 примеров найдено. Это лучшие примеры C++ (Cpp) кода для hypre_max, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Пример #1

Показать файл

Файл: par_vector.c Проект: IanLee1521/hypre

HYPRE_Int
hypre_FillResponseParToVectorAll( void       *p_recv_contact_buf, 
                                  HYPRE_Int   contact_size,
                                  HYPRE_Int   contact_proc,
                                  void       *ro, 
                                  MPI_Comm    comm,
                                  void      **p_send_response_buf, 
                                  HYPRE_Int  *response_message_size )
{
   HYPRE_Int     myid;
   HYPRE_Int     i, index, count, elength;

   HYPRE_Int    *recv_contact_buf = (HYPRE_Int * ) p_recv_contact_buf;

   hypre_DataExchangeResponse  *response_obj = ro;  

   hypre_ProcListElements      *send_proc_obj = response_obj->data2;   

   hypre_MPI_Comm_rank(comm, &myid );

   /*check to see if we need to allocate more space in send_proc_obj for ids*/
   if (send_proc_obj->length == send_proc_obj->storage_length)
   {
      send_proc_obj->storage_length +=10; /*add space for 10 more processors*/
      send_proc_obj->id = hypre_TReAlloc(send_proc_obj->id,HYPRE_Int, 
                                         send_proc_obj->storage_length);
      send_proc_obj->vec_starts =
         hypre_TReAlloc(send_proc_obj->vec_starts,HYPRE_Int,
                        send_proc_obj->storage_length + 1);
   }
  
   /*initialize*/ 
   count = send_proc_obj->length;
   index = send_proc_obj->vec_starts[count]; /*this is the number of elements*/

   /*send proc*/ 
   send_proc_obj->id[count] = contact_proc; 

   /*do we need more storage for the elements?*/
   if (send_proc_obj->element_storage_length < index + contact_size)
   {
      elength = hypre_max(contact_size, 10);   
      elength += index;
      send_proc_obj->elements = hypre_TReAlloc(send_proc_obj->elements, 
                                               HYPRE_Int, elength);
      send_proc_obj->element_storage_length = elength; 
   }
   /*populate send_proc_obj*/
   for (i=0; i< contact_size; i++) 
   { 
      send_proc_obj->elements[index++] = recv_contact_buf[i];
   }
   send_proc_obj->vec_starts[count+1] = index;
   send_proc_obj->length++;

   /*output - no message to return (confirmation) */
   *response_message_size = 0; 
  
   return hypre_error_flag;
}

Пример #2

Показать файл

Файл: smg.c Проект: 5432935/crossbridge

int
hypre_SMGSetNumPreRelax( void *smg_vdata,
                         int   num_pre_relax )
{
   hypre_SMGData *smg_data = smg_vdata;
   int            ierr = 0;
 
   (smg_data -> num_pre_relax) = hypre_max(num_pre_relax,1);
 
   return ierr;
}

Пример #3

Показать файл

Файл: smg_relax.c Проект: OpenSpeedShop/openspeedshop-test-suite

int
hypre_SMGRelaxSetNumPreRelax( void *relax_vdata,
                              int   num_pre_relax )
{
   hypre_SMGRelaxData *relax_data = relax_vdata;
   int                 ierr = 0;

   (relax_data -> num_pre_relax) = hypre_max(num_pre_relax,1);

   return ierr;
}

Пример #4

Показать файл

Файл: box_algebra.c Проект: LLNL/COGENT

HYPRE_Int
hypre_IntersectBoxes( hypre_Box *box1,
                      hypre_Box *box2,
                      hypre_Box *ibox )
{
   HYPRE_Int d;

   /* find x, y, and z bounds */
   for (d = 0; d < 3; d++)
   {
      hypre_BoxIMinD(ibox, d) =
         hypre_max(hypre_BoxIMinD(box1, d), hypre_BoxIMinD(box2, d));
      hypre_BoxIMaxD(ibox, d) =
         hypre_min(hypre_BoxIMaxD(box1, d), hypre_BoxIMaxD(box2, d));
   }

   return hypre_error_flag;
}

Пример #5

Показать файл

Файл: box_algebra.c Проект: ngholka/patki-power

int
hypre_IntersectBoxes( hypre_Box *box1,
                      hypre_Box *box2,
                      hypre_Box *ibox )
{
   int ierr = 0;
   int d;

   /* find x, y, and z bounds */
   for (d = 0; d < 3; d++)
   {
      hypre_BoxIMinD(ibox, d) =
         hypre_max(hypre_BoxIMinD(box1, d), hypre_BoxIMinD(box2, d));
      hypre_BoxIMaxD(ibox, d) =
         hypre_min(hypre_BoxIMaxD(box1, d), hypre_BoxIMaxD(box2, d));
   }

   return ierr;
}

Пример #6

Показать файл

Файл: box_neighbors.c Проект: 5432935/crossbridge

int
hypre_BoxNeighborsAssemble( hypre_BoxNeighbors *neighbors,
                            int                 max_distance,
                            int                 prune )
{
   hypre_BoxArray      *boxes;
   int                 *procs;
   int                 *ids;
   int                  first_local;
   int                  num_local;
   int                  num_periodic;

   int                  keep_box;
   int                  num_boxes;

   hypre_RankLink      *rank_link;

   hypre_Box           *local_box;
   hypre_Box           *neighbor_box;

   int                  distance;
   int                  distance_index[3];

   int                  diff;
   int                  i, j, d, ilocal, inew;

   int                  ierr = 0;

   /*---------------------------------------------
    * Find neighboring boxes
    *---------------------------------------------*/

   boxes           = hypre_BoxNeighborsBoxes(neighbors);
   procs           = hypre_BoxNeighborsProcs(neighbors);
   ids             = hypre_BoxNeighborsIDs(neighbors);
   first_local     = hypre_BoxNeighborsFirstLocal(neighbors);
   num_local       = hypre_BoxNeighborsNumLocal(neighbors);
   num_periodic    = hypre_BoxNeighborsNumPeriodic(neighbors);

   /*---------------------------------------------
    * Find neighboring boxes
    *---------------------------------------------*/

   inew = 0;
   num_boxes = 0;
   hypre_ForBoxI(i, boxes)
      {
         keep_box = 0;
         for (j = 0; j < num_local + num_periodic; j++)
         {
            ilocal = first_local + j;
            if (i != ilocal)
            {
               local_box = hypre_BoxArrayBox(boxes, ilocal);
               neighbor_box = hypre_BoxArrayBox(boxes, i);

               /* compute distance info */
               distance = 0;
               for (d = 0; d < 3; d++)
               {
                  distance_index[d] = 0;

                  diff = hypre_BoxIMinD(neighbor_box, d) -
                     hypre_BoxIMaxD(local_box, d);
                  if (diff > 0)
                  {
                     distance_index[d] = 1;
                     distance = hypre_max(distance, diff);
                  }

                  diff = hypre_BoxIMinD(local_box, d) -
                     hypre_BoxIMaxD(neighbor_box, d);
                  if (diff > 0)
                  {
                     distance_index[d] = -1;
                     distance = hypre_max(distance, diff);
                  }
               }

               /* create new rank_link */
               if (distance <= max_distance)
               {
                  keep_box = 1;

                  if (j < num_local)
                  {
                     hypre_RankLinkCreate(num_boxes, &rank_link);
                     hypre_RankLinkNext(rank_link) =
                        hypre_BoxNeighborsRankLink(neighbors, j,
                                                   distance_index[0],
                                                   distance_index[1],
                                                   distance_index[2]);
                     hypre_BoxNeighborsRankLink(neighbors, j,
                                                distance_index[0],
                                                distance_index[1],
                                                distance_index[2]) = rank_link;
                  }
               }
            }
            else
            {
               keep_box = 1;
            }
         }

         if (prune)
         {
            /* use procs array to store which boxes to keep */
            if (keep_box)
            {
               procs[i] = -procs[i];
               if (inew < i)
               {
                  procs[inew] = i;
               }
               inew = i + 1;
               
               num_boxes++;
            }
         }
         else
         {
            /* keep all of the boxes */
            num_boxes++;
         }
      }

Пример #7

Показать файл

Файл: gmres.c Проект: arnabd88/CIVL-NewFlowCB

int
hypre_GMRESSolve(void  *gmres_vdata,
                 void  *A,
                 void  *b,
		 void  *x)
{
   hypre_GMRESData  *gmres_data   = gmres_vdata;
   hypre_GMRESFunctions *gmres_functions = gmres_data->functions;
   int 		     k_dim        = (gmres_data -> k_dim);
   int               min_iter     = (gmres_data -> min_iter);
   int 		     max_iter     = (gmres_data -> max_iter);
   int               rel_change   = (gmres_data -> rel_change);
   int 		     stop_crit    = (gmres_data -> stop_crit);
   double 	     accuracy     = (gmres_data -> tol);
   double 	     cf_tol       = (gmres_data -> cf_tol);
   void             *matvec_data  = (gmres_data -> matvec_data);

   void             *r            = (gmres_data -> r);
   void             *w            = (gmres_data -> w);
   void            **p            = (gmres_data -> p);

   int 	           (*precond)(void*, void*, void*, void*)   = (gmres_functions -> precond);
   int 	            *precond_data = (gmres_data -> precond_data);

   int             print_level    = (gmres_data -> print_level);
   int             logging        = (gmres_data -> logging);

   double         *norms          = (gmres_data -> norms);
/* not used yet   char           *log_file_name  = (gmres_data -> log_file_name);*/
/*   FILE           *fp; */
   
   int        ierr = 0;
   int        break_value = 0;
   int	      i, j, k;
   double     *rs, **hh, *c, *s;
   int        iter; 
   int        my_id, num_procs;
   double     epsilon, gamma, t, r_norm, b_norm, den_norm, x_norm;
   double     epsmac = 1.e-16; 
   double     ieee_check = 0.;

   double     guard_zero_residual; 
   double     cf_ave_0 = 0.0;
   double     cf_ave_1 = 0.0;
   double     weight;
   double     r_norm_0;
   double     relative_error;

   (gmres_data -> converged) = 0;
   /*-----------------------------------------------------------------------
    * With relative change convergence test on, it is possible to attempt
    * another iteration with a zero residual. This causes the parameter
    * alpha to go NaN. The guard_zero_residual parameter is to circumvent
    * this. Perhaps it should be set to something non-zero (but small).
    *-----------------------------------------------------------------------*/
   guard_zero_residual = 0.0;

   (*(gmres_functions->CommInfo))(A,&my_id,&num_procs);
   if ( logging>0 || print_level>0 )
   {
      norms          = (gmres_data -> norms);
      /* not used yet      log_file_name  = (gmres_data -> log_file_name);*/
      /* fp = fopen(log_file_name,"w"); */
   }

   /* initialize work arrays */
   rs = hypre_CTAllocF(double,k_dim+1,gmres_functions); 
   c = hypre_CTAllocF(double,k_dim,gmres_functions); 
   s = hypre_CTAllocF(double,k_dim,gmres_functions); 

   hh = hypre_CTAllocF(double*,k_dim+1,gmres_functions); 
   for (i=0; i < k_dim+1; i++)
   {	
   	hh[i] = hypre_CTAllocF(double,k_dim,gmres_functions); 
   }

   (*(gmres_functions->CopyVector))(b,p[0]);

   /* compute initial residual */
   (*(gmres_functions->Matvec))(matvec_data,-1.0, A, x, 1.0, p[0]);

   b_norm = sqrt((*(gmres_functions->InnerProd))(b,b));

   /* Since it is does not diminish performance, attempt to return an error flag
      and notify users when they supply bad input. */
   if (b_norm != 0.) ieee_check = b_norm/b_norm; /* INF -> NaN conversion */
   if (ieee_check != ieee_check)
   {
      /* ...INFs or NaNs in input can make ieee_check a NaN.  This test
         for ieee_check self-equality works on all IEEE-compliant compilers/
         machines, c.f. page 8 of "Lecture Notes on the Status of IEEE 754"
         by W. Kahan, May 31, 1996.  Currently (July 2002) this paper may be
         found at http://HTTP.CS.Berkeley.EDU/~wkahan/ieee754status/IEEE754.PDF */
      if (logging > 0 || print_level > 0)
      {
        printf("\n\nERROR detected by Hypre ... BEGIN\n");
        printf("ERROR -- hypre_GMRESSolve: INFs and/or NaNs detected in input.\n");
        printf("User probably placed non-numerics in supplied b.\n");
        printf("Returning error flag += 101.  Program not terminated.\n");
        printf("ERROR detected by Hypre ... END\n\n\n");
      }
      ierr += 101;
      return ierr;
   }

   r_norm = sqrt((*(gmres_functions->InnerProd))(p[0],p[0]));
   r_norm_0 = r_norm;

   /* Since it is does not diminish performance, attempt to return an error flag
      and notify users when they supply bad input. */
   if (r_norm != 0.) ieee_check = r_norm/r_norm; /* INF -> NaN conversion */
   if (ieee_check != ieee_check)
   {
      /* ...INFs or NaNs in input can make ieee_check a NaN.  This test
         for ieee_check self-equality works on all IEEE-compliant compilers/
         machines, c.f. page 8 of "Lecture Notes on the Status of IEEE 754"
         by W. Kahan, May 31, 1996.  Currently (July 2002) this paper may be
         found at http://HTTP.CS.Berkeley.EDU/~wkahan/ieee754status/IEEE754.PDF */
      if (logging > 0 || print_level > 0)
      {
        printf("\n\nERROR detected by Hypre ... BEGIN\n");
        printf("ERROR -- hypre_GMRESSolve: INFs and/or NaNs detected in input.\n");
        printf("User probably placed non-numerics in supplied A or x_0.\n");
        printf("Returning error flag += 101.  Program not terminated.\n");
        printf("ERROR detected by Hypre ... END\n\n\n");
      }
      ierr += 101;
      return ierr;
   }

   if ( logging>0 || print_level > 0)
   {
      norms[0] = r_norm;
      if ( print_level>1 && my_id == 0 )
      {
  	 printf("L2 norm of b: %e\n", b_norm);
         if (b_norm == 0.0)
            printf("Rel_resid_norm actually contains the residual norm\n");
         printf("Initial L2 norm of residual: %e\n", r_norm);
      
      }
   }
   iter = 0;

   if (b_norm > 0.0)
   {
/* convergence criterion |r_i|/|b| <= accuracy if |b| > 0 */
     den_norm= b_norm;
   }
   else
   {
/* convergence criterion |r_i|/|r0| <= accuracy if |b| = 0 */
     den_norm= r_norm;
   };

   epsilon= accuracy;

/* convergence criterion |r_i| <= accuracy , absolute residual norm*/
   if ( stop_crit && !rel_change )
      epsilon = accuracy;

   if ( print_level>1 && my_id == 0 )
   {
      if (b_norm > 0.0)
         {printf("=============================================\n\n");
          printf("Iters     resid.norm     conv.rate  rel.res.norm\n");
          printf("-----    ------------    ---------- ------------\n");
      
          }

      else
         {printf("=============================================\n\n");
          printf("Iters     resid.norm     conv.rate\n");
          printf("-----    ------------    ----------\n");
      
          };
   }

  /* set the relative_error to initially bypass the stopping criterion */
   if (rel_change)
   {
      relative_error= epsilon + 1.;
   }

   while (iter < max_iter)
   {
   /* initialize first term of hessenberg system */

	rs[0] = r_norm;
        if (r_norm == 0.0)
        {
           hypre_TFreeF(c,gmres_functions); 
           hypre_TFreeF(s,gmres_functions); 
           hypre_TFreeF(rs,gmres_functions);
           for (i=0; i < k_dim+1; i++) hypre_TFreeF(hh[i],gmres_functions);
           hypre_TFreeF(hh,gmres_functions); 
	   ierr = 0;
	   return ierr;
	}

	if (r_norm/den_norm <= epsilon && iter >= min_iter) 
        {
                if (rel_change)
                {
                   if (relative_error <= epsilon)
                   {
		      (*(gmres_functions->CopyVector))(b,r);
          	      (*(gmres_functions->Matvec))(matvec_data,-1.0,A,x,1.0,r);
		      r_norm = sqrt((*(gmres_functions->InnerProd))(r,r));
		      if (r_norm/den_norm <= epsilon)
                      {
                         if ( print_level>1 && my_id == 0)
                         {
                            printf("\n\n");
                            printf("Final L2 norm of residual: %e\n\n", r_norm);
                         }
                         break;
                      }
                      else
                      if ( print_level>0 && my_id == 0)
                           printf("false convergence 1\n");
                   }
                }
                else
                {
                   (*(gmres_functions->CopyVector))(b,r);
                   (*(gmres_functions->Matvec))(matvec_data,-1.0,A,x,1.0,r);
                   r_norm = sqrt((*(gmres_functions->InnerProd))(r,r));
                   if (r_norm/den_norm <= epsilon)
                   {
                       if ( print_level>1 && my_id == 0)
                       {
                            printf("\n\n");
                            printf("Final L2 norm of residual: %e\n\n", r_norm);
                       }
                       break;
                   }
                   else
                      if ( print_level>0 && my_id == 0)
                           printf("false convergence 1\n");
                }

	}

      	t = 1.0 / r_norm;
	(*(gmres_functions->ScaleVector))(t,p[0]);
	i = 0;
	while (i < k_dim && ( (r_norm/den_norm > epsilon || iter < min_iter)
                         || ((rel_change) && relative_error > epsilon) )
                         && iter < max_iter)
	{
		i++;
		iter++;
		(*(gmres_functions->ClearVector))(r);
		precond(precond_data, A, p[i-1], r);
		(*(gmres_functions->Matvec))(matvec_data, 1.0, A, r, 0.0, p[i]);
		/* modified Gram_Schmidt */
		for (j=0; j < i; j++)
		{
			hh[j][i-1] = (*(gmres_functions->InnerProd))(p[j],p[i]);
			(*(gmres_functions->Axpy))(-hh[j][i-1],p[j],p[i]);
		}
		t = sqrt((*(gmres_functions->InnerProd))(p[i],p[i]));
		hh[i][i-1] = t;	
		if (t != 0.0)
		{
			t = 1.0/t;
			(*(gmres_functions->ScaleVector))(t,p[i]);
		}
		/* done with modified Gram_schmidt and Arnoldi step.
		   update factorization of hh */
		for (j = 1; j < i; j++)
		{
			t = hh[j-1][i-1];
			hh[j-1][i-1] = c[j-1]*t + s[j-1]*hh[j][i-1];		
			hh[j][i-1] = -s[j-1]*t + c[j-1]*hh[j][i-1];
		}
		gamma = sqrt(hh[i-1][i-1]*hh[i-1][i-1] + hh[i][i-1]*hh[i][i-1]);
		if (gamma == 0.0) gamma = epsmac;
		c[i-1] = hh[i-1][i-1]/gamma;
		s[i-1] = hh[i][i-1]/gamma;
		rs[i] = -s[i-1]*rs[i-1];
		rs[i-1] = c[i-1]*rs[i-1];
		/* determine residual norm */
		hh[i-1][i-1] = c[i-1]*hh[i-1][i-1] + s[i-1]*hh[i][i-1];
		r_norm = fabs(rs[i]);
		if ( print_level>0 )
		{
		   norms[iter] = r_norm;
                   if ( print_level>1 && my_id == 0 )
   		   {
      		      if (b_norm > 0.0)
             	         printf("% 5d    %e    %f   %e\n", iter, 
				norms[iter],norms[iter]/norms[iter-1],
 	             		norms[iter]/b_norm);
      		      else
             	         printf("% 5d    %e    %f\n", iter, norms[iter],
				norms[iter]/norms[iter-1]);
   		   }
		}
                if (cf_tol > 0.0)
                {
                   cf_ave_0 = cf_ave_1;
           	   cf_ave_1 = pow( r_norm / r_norm_0, 1.0/(2.0*iter));

           	   weight   = fabs(cf_ave_1 - cf_ave_0);
           	   weight   = weight / hypre_max(cf_ave_1, cf_ave_0);
           	   weight   = 1.0 - weight;
#if 0
           	   printf("I = %d: cf_new = %e, cf_old = %e, weight = %e\n",
                	i, cf_ave_1, cf_ave_0, weight );
#endif
           	   if (weight * cf_ave_1 > cf_tol) 
		   {
		      break_value = 1;
		      break;
		   }
        	}

	}
	/* now compute solution, first solve upper triangular system */

	if (break_value) break;
	
	rs[i-1] = rs[i-1]/hh[i-1][i-1];
	for (k = i-2; k >= 0; k--)
	{
		t = rs[k];
		for (j = k+1; j < i; j++)
		{
			t -= hh[k][j]*rs[j];
		}
		rs[k] = t/hh[k][k];
	}
	
	(*(gmres_functions->CopyVector))(p[0],w);
	(*(gmres_functions->ScaleVector))(rs[0],w);
	for (j = 1; j < i; j++)
		(*(gmres_functions->Axpy))(rs[j], p[j], w);

	(*(gmres_functions->ClearVector))(r);
	precond(precond_data, A, w, r);

	(*(gmres_functions->Axpy))(1.0,r,x);

/* check for convergence, evaluate actual residual */
	if (r_norm/den_norm <= epsilon && iter >= min_iter) 
        {
                if (rel_change)
                {
                   x_norm = sqrt( (*(gmres_functions->InnerProd))(x,x) );
                   if ( x_norm<=guard_zero_residual ) break; /* don't divide by 0 */
		   r_norm = sqrt( (*(gmres_functions->InnerProd))(r,r) );
                   relative_error= r_norm/x_norm;
                }

		(*(gmres_functions->CopyVector))(b,r);
          	(*(gmres_functions->Matvec))(matvec_data,-1.0,A,x,1.0,r);
		r_norm = sqrt( (*(gmres_functions->InnerProd))(r,r) );
		if (r_norm/den_norm <= epsilon)
                {
                   if ( print_level>1 && my_id == 0 )
                   {
                      printf("\n\n");
                      printf("Final L2 norm of residual: %e\n\n", r_norm);
                   }
                   if (rel_change && r_norm > guard_zero_residual)
                      /* Also test on relative change of iterates, x_i - x_(i-1) */
                   {  /* At this point r = x_i - x_(i-1) */
                      x_norm = sqrt( (*(gmres_functions->InnerProd))(x,x) );
                      if ( x_norm<=guard_zero_residual ) break; /* don't divide by 0 */
                      if ( relative_error < epsilon )
                      {
                         (gmres_data -> converged) = 1;
                         break;
                      }
                   }
                   else
                   {
                      (gmres_data -> converged) = 1;
                      break;
                   }
                }
		else 
                {
                   if ( print_level>0 && my_id == 0)
                      printf("false convergence 2\n");
		   (*(gmres_functions->CopyVector))(r,p[0]);
		   i = 0;
		}
	}

/* compute residual vector and continue loop */

	for (j=i ; j > 0; j--)
	{
		rs[j-1] = -s[j-1]*rs[j];
		rs[j] = c[j-1]*rs[j];
	}

	if (i) (*(gmres_functions->Axpy))(rs[0]-1.0,p[0],p[0]);
	for (j=1; j < i+1; j++)
		(*(gmres_functions->Axpy))(rs[j],p[j],p[0]);	
   }

   if ( print_level>1 && my_id == 0 )
          printf("\n\n"); 

   (gmres_data -> num_iterations) = iter;
   if (b_norm > 0.0)
      (gmres_data -> rel_residual_norm) = r_norm/b_norm;
   if (b_norm == 0.0)
      (gmres_data -> rel_residual_norm) = r_norm;

   if (iter >= max_iter && r_norm/den_norm > epsilon) ierr = 1;

   hypre_TFreeF(c,gmres_functions); 
   hypre_TFreeF(s,gmres_functions); 
   hypre_TFreeF(rs,gmres_functions);
 
   for (i=0; i < k_dim+1; i++)
   {	
   	hypre_TFreeF(hh[i],gmres_functions);
   }
   hypre_TFreeF(hh,gmres_functions); 

   return ierr;
}

Пример #8

Показать файл

Файл: par_nodal_systems.c Проект: LLNL/COGENT

HYPRE_Int
hypre_BoomerAMGCreateNodalA(hypre_ParCSRMatrix    *A,
                            HYPRE_Int                    num_functions,
                            HYPRE_Int                   *dof_func,
                            HYPRE_Int                    option,
                            HYPRE_Int                    diag_option,     
                            hypre_ParCSRMatrix   **AN_ptr)
{
   MPI_Comm 	       comm            = hypre_ParCSRMatrixComm(A);
   hypre_CSRMatrix    *A_diag          = hypre_ParCSRMatrixDiag(A);
   HYPRE_Int                *A_diag_i        = hypre_CSRMatrixI(A_diag);
   double             *A_diag_data     = hypre_CSRMatrixData(A_diag);


   hypre_CSRMatrix    *A_offd          = hypre_ParCSRMatrixOffd(A);
   HYPRE_Int                *A_offd_i        = hypre_CSRMatrixI(A_offd);
   double             *A_offd_data     = hypre_CSRMatrixData(A_offd);
   HYPRE_Int                *A_diag_j        = hypre_CSRMatrixJ(A_diag);
   HYPRE_Int                *A_offd_j        = hypre_CSRMatrixJ(A_offd);

   HYPRE_Int 		      *row_starts      = hypre_ParCSRMatrixRowStarts(A);
   HYPRE_Int 		      *col_map_offd    = hypre_ParCSRMatrixColMapOffd(A);
   HYPRE_Int                 num_variables   = hypre_CSRMatrixNumRows(A_diag);
   HYPRE_Int 		       num_nonzeros_offd = 0;
   HYPRE_Int 		       num_cols_offd = 0;
                  
   hypre_ParCSRMatrix *AN;
   hypre_CSRMatrix    *AN_diag;
   HYPRE_Int                *AN_diag_i;
   HYPRE_Int                *AN_diag_j;
   double             *AN_diag_data; 
   hypre_CSRMatrix    *AN_offd;
   HYPRE_Int                *AN_offd_i;
   HYPRE_Int                *AN_offd_j;
   double             *AN_offd_data; 
   HYPRE_Int		      *col_map_offd_AN;
   HYPRE_Int		      *new_col_map_offd;
   HYPRE_Int		      *row_starts_AN;
   HYPRE_Int		       AN_num_nonzeros_diag = 0;
   HYPRE_Int		       AN_num_nonzeros_offd = 0;
   HYPRE_Int		       num_cols_offd_AN;
   HYPRE_Int		       new_num_cols_offd;
                 
   hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A);
   HYPRE_Int		       num_sends;
   HYPRE_Int		       num_recvs;
   HYPRE_Int		      *send_procs;
   HYPRE_Int		      *send_map_starts;
   HYPRE_Int		      *send_map_elmts;
   HYPRE_Int		      *new_send_map_elmts;
   HYPRE_Int		      *recv_procs;
   HYPRE_Int		      *recv_vec_starts;

   hypre_ParCSRCommPkg *comm_pkg_AN;
   HYPRE_Int		      *send_procs_AN;
   HYPRE_Int		      *send_map_starts_AN;
   HYPRE_Int		      *send_map_elmts_AN;
   HYPRE_Int		      *recv_procs_AN;
   HYPRE_Int		      *recv_vec_starts_AN;

   HYPRE_Int                 i, j, k, k_map;
                      
   HYPRE_Int                 ierr = 0;

   HYPRE_Int		       index, row;
   HYPRE_Int		       start_index;
   HYPRE_Int		       num_procs;
   HYPRE_Int		       node, cnt;
   HYPRE_Int		       mode;
   HYPRE_Int		       new_send_elmts_size;

   HYPRE_Int		       global_num_nodes;
   HYPRE_Int		       num_nodes;
   HYPRE_Int		       num_fun2;
   HYPRE_Int		      *map_to_node;
   HYPRE_Int		      *map_to_map;
   HYPRE_Int		      *counter;

   double sum;
   double *data;
   

   hypre_MPI_Comm_size(comm,&num_procs);

   if (!comm_pkg)
   {
      hypre_MatvecCommPkgCreate(A);
      comm_pkg = hypre_ParCSRMatrixCommPkg(A);
   }

   mode = fabs(option);

   comm_pkg_AN = NULL;
   col_map_offd_AN = NULL;

#ifdef HYPRE_NO_GLOBAL_PARTITION
   row_starts_AN = hypre_CTAlloc(HYPRE_Int, 2);

   for (i=0; i < 2; i++)
   {
      row_starts_AN[i] = row_starts[i]/num_functions;
      if (row_starts_AN[i]*num_functions < row_starts[i])
      {
	  hypre_printf("nodes not properly aligned or incomplete info!\n");
	  return (87);
      }
   }
   
   global_num_nodes = hypre_ParCSRMatrixGlobalNumRows(A)/num_functions;


#else
   row_starts_AN = hypre_CTAlloc(HYPRE_Int, num_procs+1);

  for (i=0; i < num_procs+1; i++)
   {
      row_starts_AN[i] = row_starts[i]/num_functions;
      if (row_starts_AN[i]*num_functions < row_starts[i])
      {
	  hypre_printf("nodes not properly aligned or incomplete info!\n");
	  return (87);
      }
   }
   
   global_num_nodes = row_starts_AN[num_procs];

#endif

 
   num_nodes =  num_variables/num_functions;
   num_fun2 = num_functions*num_functions;

   map_to_node = hypre_CTAlloc(HYPRE_Int, num_variables);
   AN_diag_i = hypre_CTAlloc(HYPRE_Int, num_nodes+1);
   counter = hypre_CTAlloc(HYPRE_Int, num_nodes);
   for (i=0; i < num_variables; i++)
      map_to_node[i] = i/num_functions;
   for (i=0; i < num_nodes; i++)
      counter[i] = -1;

   AN_num_nonzeros_diag = 0;
   row = 0;
   for (i=0; i < num_nodes; i++)
   {
      AN_diag_i[i] = AN_num_nonzeros_diag;
      for (j=0; j < num_functions; j++)
      {
	 for (k=A_diag_i[row]; k < A_diag_i[row+1]; k++)
	 {
	    k_map = map_to_node[A_diag_j[k]];
	    if (counter[k_map] < i)
	    {
	       counter[k_map] = i;
	       AN_num_nonzeros_diag++;
	    }
	 }
	 row++;
      }
   }
   AN_diag_i[num_nodes] = AN_num_nonzeros_diag;

   AN_diag_j = hypre_CTAlloc(HYPRE_Int, AN_num_nonzeros_diag);	
   AN_diag_data = hypre_CTAlloc(double, AN_num_nonzeros_diag);	

   AN_diag = hypre_CSRMatrixCreate(num_nodes,num_nodes,AN_num_nonzeros_diag);
   hypre_CSRMatrixI(AN_diag) = AN_diag_i;
   hypre_CSRMatrixJ(AN_diag) = AN_diag_j;
   hypre_CSRMatrixData(AN_diag) = AN_diag_data;
       
   for (i=0; i < num_nodes; i++)
      counter[i] = -1;
   index = 0;
   start_index = 0;
   row = 0;

   switch (mode)
   {
      case 1:  /* frobenius norm */
      {
         for (i=0; i < num_nodes; i++)
         {
            for (j=0; j < num_functions; j++)
            {
	       for (k=A_diag_i[row]; k < A_diag_i[row+1]; k++)
	       {
	          k_map = map_to_node[A_diag_j[k]];
	          if (counter[k_map] < start_index)
	          {
	             counter[k_map] = index;
	             AN_diag_j[index] = k_map;
	             AN_diag_data[index] = A_diag_data[k]*A_diag_data[k];
	             index++;
	          }
	          else
	          {
	             AN_diag_data[counter[k_map]] += 
				A_diag_data[k]*A_diag_data[k];
	          }
	       }
	       row++;
            }
            start_index = index;
         }
         for (i=0; i < AN_num_nonzeros_diag; i++)
            AN_diag_data[i] = sqrt(AN_diag_data[i]);

      }
      break;
      
      case 2:  /* sum of abs. value of all elements in each block */
      {
         for (i=0; i < num_nodes; i++)
         {
            for (j=0; j < num_functions; j++)
            {
	       for (k=A_diag_i[row]; k < A_diag_i[row+1]; k++)
	       {
	          k_map = map_to_node[A_diag_j[k]];
	          if (counter[k_map] < start_index)
	          {
	             counter[k_map] = index;
	             AN_diag_j[index] = k_map;
	             AN_diag_data[index] = fabs(A_diag_data[k]);
	             index++;
	          }
	          else
	          {
	             AN_diag_data[counter[k_map]] += fabs(A_diag_data[k]);
	          }
	       }
	       row++;
            }
            start_index = index;
         }
         for (i=0; i < AN_num_nonzeros_diag; i++)
            AN_diag_data[i] /= num_fun2;
      }
      break;

      case 3:  /* largest element of each block (sets true value - not abs. value) */
      {

         for (i=0; i < num_nodes; i++)
         {
            for (j=0; j < num_functions; j++)
            {
      	       for (k=A_diag_i[row]; k < A_diag_i[row+1]; k++)
      	       {
      	          k_map = map_to_node[A_diag_j[k]];
      	          if (counter[k_map] < start_index)
      	          {
      	             counter[k_map] = index;
      	             AN_diag_j[index] = k_map;
      	             AN_diag_data[index] = A_diag_data[k];
      	             index++;
      	          }
      	          else
      	          {
      	             if (fabs(A_diag_data[k]) > 
				fabs(AN_diag_data[counter[k_map]]))
      	                AN_diag_data[counter[k_map]] = A_diag_data[k];
      	          }
      	       }
      	       row++;
            }
            start_index = index;
         }
      }
      break;

      case 4:  /* inf. norm (row-sum)  */
      {

         data = hypre_CTAlloc(double, AN_num_nonzeros_diag*num_functions);

         for (i=0; i < num_nodes; i++)
         {
            for (j=0; j < num_functions; j++)
            {
	       for (k=A_diag_i[row]; k < A_diag_i[row+1]; k++)
	       {
	          k_map = map_to_node[A_diag_j[k]];
	          if (counter[k_map] < start_index)
	          {
	             counter[k_map] = index;
	             AN_diag_j[index] = k_map;
	             data[index*num_functions + j] = fabs(A_diag_data[k]);
	             index++;
	          }
	          else
	          {
	             data[(counter[k_map])*num_functions + j] += fabs(A_diag_data[k]);
	          }
	       }
	       row++;
            }
            start_index = index;
         }
         for (i=0; i < AN_num_nonzeros_diag; i++)
         {
            AN_diag_data[i]  = data[i*num_functions];
            
            for (j=1; j< num_functions; j++)
            {
               AN_diag_data[i]  = hypre_max( AN_diag_data[i],data[i*num_functions+j]);
            }
         }
         hypre_TFree(data);
      
      }
      break;

      case 6:  /* sum of all elements in each block */
      {
         for (i=0; i < num_nodes; i++)
         {
            for (j=0; j < num_functions; j++)
            {
	       for (k=A_diag_i[row]; k < A_diag_i[row+1]; k++)
	       {
	          k_map = map_to_node[A_diag_j[k]];
	          if (counter[k_map] < start_index)
	          {
	             counter[k_map] = index;
	             AN_diag_j[index] = k_map;
	             AN_diag_data[index] = (A_diag_data[k]);
	             index++;
	          }
	          else
	          {
	             AN_diag_data[counter[k_map]] += (A_diag_data[k]);
	          }
	       }
	       row++;
            }
            start_index = index;
         }
      }
      break;

   }

   if (diag_option ==1 )
   {
      /* make the diag entry the negative of the sum of off-diag entries (DO MORE BELOW) */
      for (i=0; i < num_nodes; i++)
      {
         index = AN_diag_i[i]; 
         sum = 0.0;
         for (k = AN_diag_i[i]+1; k < AN_diag_i[i+1]; k++)
         {
            sum += AN_diag_data[k];
            
         }
         AN_diag_data[index] = -sum;
      }
      
   }
   else if (diag_option == 2)
   {
      
      /*  make all diagonal entries negative */
      /* the diagonal is the first element listed in each row - */
      
      for (i=0; i < num_nodes; i++)
      {
         index = AN_diag_i[i];
         AN_diag_data[index] = - AN_diag_data[index];
      }
   }






   num_nonzeros_offd = A_offd_i[num_variables];
   AN_offd_i = hypre_CTAlloc(HYPRE_Int, num_nodes+1);

   num_cols_offd_AN = 0;

   if (comm_pkg)
   {
      comm_pkg_AN = hypre_CTAlloc(hypre_ParCSRCommPkg,1);
      hypre_ParCSRCommPkgComm(comm_pkg_AN) = comm;
      num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
      hypre_ParCSRCommPkgNumSends(comm_pkg_AN) = num_sends;
      num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg);
      hypre_ParCSRCommPkgNumRecvs(comm_pkg_AN) = num_recvs;
      send_procs = hypre_ParCSRCommPkgSendProcs(comm_pkg);
      send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg);
      send_map_elmts = hypre_ParCSRCommPkgSendMapElmts(comm_pkg);
      recv_procs = hypre_ParCSRCommPkgRecvProcs(comm_pkg);
      recv_vec_starts = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg);
      send_procs_AN = NULL;
      send_map_elmts_AN = NULL;
      if (num_sends) 
      {
         send_procs_AN = hypre_CTAlloc(HYPRE_Int,num_sends);
         send_map_elmts_AN = hypre_CTAlloc(HYPRE_Int,send_map_starts[num_sends]);
      }
      send_map_starts_AN = hypre_CTAlloc(HYPRE_Int,num_sends+1);
      recv_vec_starts_AN = hypre_CTAlloc(HYPRE_Int,num_recvs+1);
      recv_procs_AN = NULL;
      if (num_recvs) recv_procs_AN = hypre_CTAlloc(HYPRE_Int,num_recvs);
      for (i=0; i < num_sends; i++)
         send_procs_AN[i] = send_procs[i];
      for (i=0; i < num_recvs; i++)
         recv_procs_AN[i] = recv_procs[i];

      send_map_starts_AN[0] = 0;
      cnt = 0;
      for (i=0; i < num_sends; i++)
      {
	 k_map = send_map_starts[i];
	 if (send_map_starts[i+1]-k_map)
            send_map_elmts_AN[cnt++] = send_map_elmts[k_map]/num_functions;
         for (j=send_map_starts[i]+1; j < send_map_starts[i+1]; j++)
         {
            node = send_map_elmts[j]/num_functions;
            if (node > send_map_elmts_AN[cnt-1])
	       send_map_elmts_AN[cnt++] = node; 
         }
         send_map_starts_AN[i+1] = cnt;
      }
      hypre_ParCSRCommPkgSendProcs(comm_pkg_AN) = send_procs_AN;
      hypre_ParCSRCommPkgSendMapStarts(comm_pkg_AN) = send_map_starts_AN;
      hypre_ParCSRCommPkgSendMapElmts(comm_pkg_AN) = send_map_elmts_AN;
      hypre_ParCSRCommPkgRecvProcs(comm_pkg_AN) = recv_procs_AN;
      hypre_ParCSRCommPkgRecvVecStarts(comm_pkg_AN) = recv_vec_starts_AN;
   }

   num_cols_offd = hypre_CSRMatrixNumCols(A_offd);
   if (num_cols_offd)
   {
      if (num_cols_offd > num_variables)
      {
         hypre_TFree(map_to_node);
         map_to_node = hypre_CTAlloc(HYPRE_Int,num_cols_offd);
      }

      num_cols_offd_AN = 1;
      map_to_node[0] = col_map_offd[0]/num_functions;
      for (i=1; i < num_cols_offd; i++)
      {
         map_to_node[i] = col_map_offd[i]/num_functions;
         if (map_to_node[i] > map_to_node[i-1]) num_cols_offd_AN++;
      }
      
      if (num_cols_offd_AN > num_nodes)
      {
         hypre_TFree(counter);
         counter = hypre_CTAlloc(HYPRE_Int,num_cols_offd_AN);
      }

      map_to_map = NULL;
      col_map_offd_AN = NULL;
      map_to_map = hypre_CTAlloc(HYPRE_Int, num_cols_offd);
      col_map_offd_AN = hypre_CTAlloc(HYPRE_Int,num_cols_offd_AN);
      col_map_offd_AN[0] = map_to_node[0];
      recv_vec_starts_AN[0] = 0;
      cnt = 1;
      for (i=0; i < num_recvs; i++)
      {
         for (j=recv_vec_starts[i]; j < recv_vec_starts[i+1]; j++)
         {
            node = map_to_node[j];
	    if (node > col_map_offd_AN[cnt-1])
	    {
	       col_map_offd_AN[cnt++] = node; 
	    }
	    map_to_map[j] = cnt-1;
         }
         recv_vec_starts_AN[i+1] = cnt;
      }

      for (i=0; i < num_cols_offd_AN; i++)
         counter[i] = -1;

      AN_num_nonzeros_offd = 0;
      row = 0;
      for (i=0; i < num_nodes; i++)
      {
         AN_offd_i[i] = AN_num_nonzeros_offd;
         for (j=0; j < num_functions; j++)
         {
	    for (k=A_offd_i[row]; k < A_offd_i[row+1]; k++)
	    {
	       k_map = map_to_map[A_offd_j[k]];
	       if (counter[k_map] < i)
	       {
	          counter[k_map] = i;
	          AN_num_nonzeros_offd++;
	       }
	    }
	    row++;
         }
      }
      AN_offd_i[num_nodes] = AN_num_nonzeros_offd;
   }

       
   AN_offd = hypre_CSRMatrixCreate(num_nodes,num_cols_offd_AN,	
		AN_num_nonzeros_offd);
   hypre_CSRMatrixI(AN_offd) = AN_offd_i;
   if (AN_num_nonzeros_offd)
   {
      AN_offd_j = hypre_CTAlloc(HYPRE_Int, AN_num_nonzeros_offd);	
      AN_offd_data = hypre_CTAlloc(double, AN_num_nonzeros_offd);	
      hypre_CSRMatrixJ(AN_offd) = AN_offd_j;
      hypre_CSRMatrixData(AN_offd) = AN_offd_data;
   
      for (i=0; i < num_cols_offd_AN; i++)
         counter[i] = -1;
      index = 0;
      row = 0;
      AN_offd_i[0] = 0;
      start_index = 0;
      switch (mode)
      {
         case 1: /* frobenius norm */
         {
            for (i=0; i < num_nodes; i++)
            {
               for (j=0; j < num_functions; j++)
               {
	          for (k=A_offd_i[row]; k < A_offd_i[row+1]; k++)
	          {
	             k_map = map_to_map[A_offd_j[k]];
	             if (counter[k_map] < start_index)
	             {
	                counter[k_map] = index;
	                AN_offd_j[index] = k_map;
	                AN_offd_data[index] = A_offd_data[k]*A_offd_data[k];
	                index++;
	             }
	             else
	             {
	                AN_offd_data[counter[k_map]] += 
				A_offd_data[k]*A_offd_data[k];
	             }
	          }
	          row++;
               }
               start_index = index;
            }
            for (i=0; i < AN_num_nonzeros_offd; i++)
	       AN_offd_data[i] = sqrt(AN_offd_data[i]);
         }
         break;
      
         case 2:  /* sum of abs. value of all elements in block */
         {
            for (i=0; i < num_nodes; i++)
            {
               for (j=0; j < num_functions; j++)
               {
	          for (k=A_offd_i[row]; k < A_offd_i[row+1]; k++)
	          {
	             k_map = map_to_map[A_offd_j[k]];
	             if (counter[k_map] < start_index)
	             {
	                counter[k_map] = index;
	                AN_offd_j[index] = k_map;
	                AN_offd_data[index] = fabs(A_offd_data[k]);
	                index++;
	             }
	             else
	             {
	                AN_offd_data[counter[k_map]] += fabs(A_offd_data[k]);
	             }
	          }
	          row++;
               }
               start_index = index;
            }
            for (i=0; i < AN_num_nonzeros_offd; i++)
               AN_offd_data[i] /= num_fun2;
         }
         break;

         case 3: /* largest element in each block (not abs. value ) */
         {
            for (i=0; i < num_nodes; i++)
            {
               for (j=0; j < num_functions; j++)
               {
      	          for (k=A_offd_i[row]; k < A_offd_i[row+1]; k++)
      	          {
      	             k_map = map_to_map[A_offd_j[k]];
      	             if (counter[k_map] < start_index)
      	             {
      	                counter[k_map] = index;
      	                AN_offd_j[index] = k_map;
      	                AN_offd_data[index] = A_offd_data[k];
      	                index++;
      	             }
      	             else
      	             {
      	                if (fabs(A_offd_data[k]) > 
				fabs(AN_offd_data[counter[k_map]]))
      	                   AN_offd_data[counter[k_map]] = A_offd_data[k];
      	             }
      	          }
      	          row++;
               }
               start_index = index;
            }
         }
         break;
         
         case 4:  /* inf. norm (row-sum)  */
         {
            
            data = hypre_CTAlloc(double, AN_num_nonzeros_offd*num_functions);
            
            for (i=0; i < num_nodes; i++)
            {
               for (j=0; j < num_functions; j++)
               {
                  for (k=A_offd_i[row]; k < A_offd_i[row+1]; k++)
                  {
                     k_map = map_to_map[A_offd_j[k]];
                     if (counter[k_map] < start_index)
                     {
                        counter[k_map] = index;
                        AN_offd_j[index] = k_map;
                        data[index*num_functions + j] = fabs(A_offd_data[k]);
                        index++;
                     }
                     else
                     {
                        data[(counter[k_map])*num_functions + j] += fabs(A_offd_data[k]);
                     }
                  }
                  row++;
               }
               start_index = index;
            }
            for (i=0; i < AN_num_nonzeros_offd; i++)
            {
               AN_offd_data[i]  = data[i*num_functions];
               
               for (j=1; j< num_functions; j++)
               {
                  AN_offd_data[i]  = hypre_max( AN_offd_data[i],data[i*num_functions+j]);
               }
            }
            hypre_TFree(data);
            
         }
         break;
         
         case 6:  /* sum of value of all elements in block */
         {
            for (i=0; i < num_nodes; i++)
            {
               for (j=0; j < num_functions; j++)
               {
                  for (k=A_offd_i[row]; k < A_offd_i[row+1]; k++)
                  {
                     k_map = map_to_map[A_offd_j[k]];
                     if (counter[k_map] < start_index)
                     {
                        counter[k_map] = index;
                        AN_offd_j[index] = k_map;
                        AN_offd_data[index] = (A_offd_data[k]);
                        index++;
                     }
                     else
                     {
                        AN_offd_data[counter[k_map]] += (A_offd_data[k]);
                     }
                  }
                  row++;
               }
               start_index = index;
            }
            
         }
         break;
      }
   
      hypre_TFree(map_to_map);
   }

   if (diag_option ==1 )
   {
      /* make the diag entry the negative of the sum of off-diag entries (here we are adding the 
         off_diag contribution)*/
      /* the diagonal is the first element listed in each row of AN_diag_data - */
      for (i=0; i < num_nodes; i++)
      {
         sum = 0.0;
         for (k = AN_offd_i[i]; k < AN_offd_i[i+1]; k++)
         {
            sum += AN_offd_data[k];
            
         }
         index = AN_diag_i[i];/* location of diag entry in data */ 
         AN_diag_data[index] -= sum; /* subtract from current value */
      }
      
   }

    
   AN = hypre_ParCSRMatrixCreate(comm, global_num_nodes, global_num_nodes,
		row_starts_AN, row_starts_AN, num_cols_offd_AN,
		AN_num_nonzeros_diag, AN_num_nonzeros_offd);

   /* we already created the diag and offd matrices - so we don't need the ones
      created above */
   hypre_CSRMatrixDestroy(hypre_ParCSRMatrixDiag(AN));
   hypre_CSRMatrixDestroy(hypre_ParCSRMatrixOffd(AN));
   hypre_ParCSRMatrixDiag(AN) = AN_diag;
   hypre_ParCSRMatrixOffd(AN) = AN_offd;


   hypre_ParCSRMatrixColMapOffd(AN) = col_map_offd_AN;
   hypre_ParCSRMatrixCommPkg(AN) = comm_pkg_AN;

   new_num_cols_offd = num_functions*num_cols_offd_AN;

   if (new_num_cols_offd > num_cols_offd)
   {
      new_col_map_offd = hypre_CTAlloc(HYPRE_Int, new_num_cols_offd);
      cnt = 0;
      for (i=0; i < num_cols_offd_AN; i++)
      {
	 for (j=0; j < num_functions; j++)
         {
 	    new_col_map_offd[cnt++] = num_functions*col_map_offd_AN[i]+j;
         }
      }
      cnt = 0;
      for (i=0; i < num_cols_offd; i++)
      {
         while (col_map_offd[i] >  new_col_map_offd[cnt])
            cnt++;
         col_map_offd[i] = cnt++;
      }
      for (i=0; i < num_recvs+1; i++)
      {
         recv_vec_starts[i] = num_functions*recv_vec_starts_AN[i];
      }

      for (i=0; i < num_nonzeros_offd; i++)
      {
         j = A_offd_j[i];
	 A_offd_j[i] = col_map_offd[j];
      }
      hypre_ParCSRMatrixColMapOffd(A) = new_col_map_offd;
      hypre_CSRMatrixNumCols(A_offd) = new_num_cols_offd;
      hypre_TFree(col_map_offd);
   }
 
   hypre_TFree(map_to_node);
   new_send_elmts_size = send_map_starts_AN[num_sends]*num_functions;

   if (new_send_elmts_size > send_map_starts[num_sends])
   {
      new_send_map_elmts = hypre_CTAlloc(HYPRE_Int,new_send_elmts_size);
      cnt = 0;
      send_map_starts[0] = 0;
      for (i=0; i < num_sends; i++)
      {
         send_map_starts[i+1] = send_map_starts_AN[i+1]*num_functions;
         for (j=send_map_starts_AN[i]; j < send_map_starts_AN[i+1]; j++)
	 {
            for (k=0; k < num_functions; k++)
	       new_send_map_elmts[cnt++] = send_map_elmts_AN[j]*num_functions+k;
	 }
      }
      hypre_TFree(send_map_elmts);
      hypre_ParCSRCommPkgSendMapElmts(comm_pkg) = new_send_map_elmts;
   }
 
   *AN_ptr        = AN;

   hypre_TFree(counter);

   return (ierr);
}

Пример #9

Показать файл

Файл: par_csr_matop_marked.c Проект: LLNL/COGENT

hypre_ParCSRMatrix * hypre_ParMatMinus_F(
   hypre_ParCSRMatrix * P, hypre_ParCSRMatrix * C, HYPRE_Int * CF_marker )
/* hypre_ParMatMinus_F subtracts selected rows of its second argument
   from selected rows of its first argument.  The marker array
   determines which rows are affected - those for which CF_marker<0.
   The result is returned as a new matrix.
*/
{
   /*
     If P=(Pik),C=(Cik), i in Fine+Coarse, k in Coarse, we want
        new Pik = Pik - Cik, for Fine i only, all k.
     This computation is purely local.
   */
   /* This is _not_ a general-purpose matrix subtraction function.
      This is written for an interpolation problem where it is known that C(i,k)
      exists whenever P(i,k) does (because C=A*P where A has nonzero diagonal elements).
   */

   hypre_ParCSRMatrix *Pnew;
   hypre_CSRMatrix *P_diag = hypre_ParCSRMatrixDiag(P);
   hypre_CSRMatrix *P_offd = hypre_ParCSRMatrixOffd(P);
   hypre_CSRMatrix *C_diag = hypre_ParCSRMatrixDiag(C);
   hypre_CSRMatrix *C_offd = hypre_ParCSRMatrixOffd(C);
   hypre_CSRMatrix *Pnew_diag;
   hypre_CSRMatrix *Pnew_offd;

   double          *P_diag_data = hypre_CSRMatrixData(P_diag);
   HYPRE_Int             *P_diag_i = hypre_CSRMatrixI(P_diag);
   HYPRE_Int             *P_diag_j = hypre_CSRMatrixJ(P_diag);
   double          *P_offd_data = hypre_CSRMatrixData(P_offd);
   HYPRE_Int             *P_offd_i = hypre_CSRMatrixI(P_offd);
   HYPRE_Int             *P_offd_j = hypre_CSRMatrixJ(P_offd);
   HYPRE_Int             *P_col_map_offd = hypre_ParCSRMatrixColMapOffd( P );
   double          *C_diag_data = hypre_CSRMatrixData(C_diag);
   HYPRE_Int             *C_diag_i = hypre_CSRMatrixI(C_diag);
   HYPRE_Int             *C_diag_j = hypre_CSRMatrixJ(C_diag);
   double          *C_offd_data = hypre_CSRMatrixData(C_offd);
   HYPRE_Int             *C_offd_i = hypre_CSRMatrixI(C_offd);
   HYPRE_Int             *C_offd_j = hypre_CSRMatrixJ(C_offd);
   HYPRE_Int             *C_col_map_offd = hypre_ParCSRMatrixColMapOffd( C );
   HYPRE_Int             *Pnew_diag_i;
   HYPRE_Int             *Pnew_diag_j;
   double          *Pnew_diag_data;
   HYPRE_Int             *Pnew_offd_i;
   HYPRE_Int             *Pnew_offd_j;
   double          *Pnew_offd_data;
   HYPRE_Int             *Pnew_j2m;
   HYPRE_Int             *Pnew_col_map_offd;

   HYPRE_Int	num_rows_diag_C = hypre_CSRMatrixNumRows(C_diag);
   /* HYPRE_Int	num_rows_offd_C = hypre_CSRMatrixNumRows(C_offd); */
   HYPRE_Int	num_cols_offd_C = hypre_CSRMatrixNumCols(C_offd);
   HYPRE_Int	num_cols_offd_P = hypre_CSRMatrixNumCols(P_offd);
   HYPRE_Int  num_cols_offd_Pnew, num_rows_offd_Pnew;
   
   HYPRE_Int              i1, jmin, jmax, jrange, jrangem1;
   HYPRE_Int              j, m, mc, mp, jc, jp, jP, jC, jg, jCg, jPg;
   double           dc, dp;

/*   Pnew = hypre_ParCSRMatrixCompleteClone( C );*/

   Pnew = hypre_ParCSRMatrixUnion( C, P );
;
   hypre_ParCSRMatrixZero_F( Pnew, CF_marker );  /* fine rows of Pnew set to 0 */
   hypre_ParCSRMatrixCopy_C( Pnew, C, CF_marker ); /* coarse rows of Pnew copied from C (or P) */
   /* ...Zero_F may not be needed depending on how Pnew is made */
   Pnew_diag = hypre_ParCSRMatrixDiag(Pnew);
   Pnew_offd = hypre_ParCSRMatrixOffd(Pnew);
   Pnew_diag_i = hypre_CSRMatrixI(Pnew_diag);
   Pnew_diag_j = hypre_CSRMatrixJ(Pnew_diag);
   Pnew_offd_i = hypre_CSRMatrixI(Pnew_offd);
   Pnew_offd_j = hypre_CSRMatrixJ(Pnew_offd);
   Pnew_diag_data = hypre_CSRMatrixData(Pnew_diag);
   Pnew_offd_data = hypre_CSRMatrixData(Pnew_offd);
   Pnew_col_map_offd = hypre_ParCSRMatrixColMapOffd( Pnew );
   num_rows_offd_Pnew = hypre_CSRMatrixNumRows(Pnew_offd);
   num_cols_offd_Pnew = hypre_CSRMatrixNumCols(Pnew_offd);


   /* Find the j-ranges, needed to allocate a "reverse lookup" array. */
   /* This is the max j - min j over P and Pnew (which here is a copy of C).
      Only the diag block is considered. */
   /* For scalability reasons (jrange can get big) this won't work for the offd block.
      Also, indexing is more complicated in the offd block (c.f. col_map_offd).
      It's not clear, though whether the "quadratic" algorithm I'm using for the offd
      block is really any slower than the more complicated "linear" algorithm here. */
   jrange = 0;
   jrangem1=-1;
   for ( i1 = 0; i1 < num_rows_diag_C; i1++ )
   {
      if ( CF_marker[i1]<0 && hypre_CSRMatrixNumNonzeros(Pnew_diag)>0 )  /* only Fine rows matter */
      {
         jmin = Pnew_diag_j[ Pnew_diag_i[i1] ];
         jmax = Pnew_diag_j[ Pnew_diag_i[i1+1]-1 ];
         jrangem1 = jmax-jmin;
         jrange = hypre_max(jrange,jrangem1+1);
         /* If columns (of a given row) were in increasing order, the above would be sufficient.
            If not, the following would be necessary (and sufficient) */
         jmin = Pnew_diag_j[ Pnew_diag_i[i1] ];
         jmax = Pnew_diag_j[ Pnew_diag_i[i1] ];
         for ( m=Pnew_diag_i[i1]+1; m<Pnew_diag_i[i1+1]; ++m )
         {
            j = Pnew_diag_j[m];
            jmin = hypre_min( jmin, j );
            jmax = hypre_max( jmax, j );
         }
         for ( m=P_diag_i[i1]; m<P_diag_i[i1+1]; ++m )
         {
            j = P_diag_j[m];
            jmin = hypre_min( jmin, j );
            jmax = hypre_max( jmax, j );
         }
         jrangem1 = jmax-jmin;
         jrange = hypre_max(jrange,jrangem1+1);
      }
   }


   /*-----------------------------------------------------------------------
    *  Loop over Pnew_diag rows.  Construct a temporary reverse array:
    *  If j is a column number, Pnew_j2m[j] is the array index for j, i.e.
    *  Pnew_diag_j[ Pnew_j2m[j] ] = j
    *-----------------------------------------------------------------------*/

   Pnew_j2m = hypre_CTAlloc( HYPRE_Int, jrange );

   for ( i1 = 0; i1 < num_rows_diag_C; i1++ )
   {
      if ( CF_marker[i1]<0 && hypre_CSRMatrixNumNonzeros(Pnew_diag)>0 )  /* Fine data only */
      {
         /* just needed for an assertion below... */
         for ( j=0; j<jrange; ++j ) Pnew_j2m[j] = -1;
         jmin = Pnew_diag_j[ Pnew_diag_i[i1] ];
            /* If columns (of a given row) were in increasing order, the above line would be sufficient.
               If not, the following loop would have to be added (or store the jmin computed above )*/
         for ( m=Pnew_diag_i[i1]+1; m<Pnew_diag_i[i1+1]; ++m )
         {
            j = Pnew_diag_j[m];
            jmin = hypre_min( jmin, j );
         }
         for ( m=P_diag_i[i1]; m<P_diag_i[i1+1]; ++m )
         {
            j = P_diag_j[m];
            jmin = hypre_min( jmin, j );
         }
         for ( m = Pnew_diag_i[i1]; m<Pnew_diag_i[i1+1]; ++m )
         {
            j = Pnew_diag_j[m];
            hypre_assert( j-jmin>=0 );
            hypre_assert( j-jmin<jrange );
            Pnew_j2m[ j-jmin ] = m;
         }

         /*-----------------------------------------------------------------------
          *  Loop over C_diag data for the current row.
          *  Subtract each C data entry from the corresponding Pnew entry.
          *-----------------------------------------------------------------------*/

         for ( mc=C_diag_i[i1]; mc<C_diag_i[i1+1]; ++mc )
         {
            jc = C_diag_j[mc];
            dc = C_diag_data[mc];
            m = Pnew_j2m[jc-jmin];
            hypre_assert( m>=0 );
            Pnew_diag_data[m] -= dc;
         }

         /*-----------------------------------------------------------------------
          *  Loop over P_diag data for the current row.
          *  Add each P data entry from the corresponding Pnew entry.
          *-----------------------------------------------------------------------*/

         for ( mp=P_diag_i[i1]; mp<P_diag_i[i1+1]; ++mp )
         {
            jp = P_diag_j[mp];
            dp = P_diag_data[mp];
            m = Pnew_j2m[jp-jmin];
            hypre_assert( m>=0 );
            Pnew_diag_data[m] += dp;
         }
      }
   }

         /*-----------------------------------------------------------------------
          * Repeat for the offd block.
          *-----------------------------------------------------------------------*/

   for ( i1 = 0; i1 < num_rows_offd_Pnew; i1++ )
   {
      if ( CF_marker[i1]<0 && hypre_CSRMatrixNumNonzeros(Pnew_offd)>0 )  /* Fine data only */
      {
         if ( num_cols_offd_Pnew )
         {
            /*  This is a simple quadratic algorithm.  If necessary I may try
               to implement the ideas used on the diag block later. */
            for ( m = Pnew_offd_i[i1]; m<Pnew_offd_i[i1+1]; ++m )
            {
               j = Pnew_offd_j[m];
               jg = Pnew_col_map_offd[j];
               Pnew_offd_data[m] = 0;
               if ( num_cols_offd_C )
                  for ( mc=C_offd_i[i1]; mc<C_offd_i[i1+1]; ++mc )
                  {
                     jC = C_offd_j[mc];
                     jCg = C_col_map_offd[jC];
                     if ( jCg==jg ) Pnew_offd_data[m] -= C_offd_data[mc];
                  }
               if ( num_cols_offd_P )
                  for ( mp=P_offd_i[i1]; mp<P_offd_i[i1+1]; ++mp )
                  {
                     jP = P_offd_j[mp];
                     jPg = P_col_map_offd[jP];
                     if ( jPg==jg ) Pnew_offd_data[m] += P_offd_data[mp];
                  }
            }
         }
      }
   }


   hypre_TFree(Pnew_j2m);

   return Pnew;
}

Пример #10

Показать файл

Файл: hypre_merge_sort.c Проект: ducpdx/hypre

/**
 * Partition the input so that
 * a1[0:*out1) and a2[0:*out2) contain the smallest k elements
 */
static void kth_element(
   HYPRE_Int *out1, HYPRE_Int *out2,
   HYPRE_Int *a1, HYPRE_Int *a2, HYPRE_Int n1, HYPRE_Int n2, HYPRE_Int k)
{
   // either of the inputs is empty
   if (n1 == 0)
   {
      *out1 = 0; *out2 = k;
      return;
   }
   if (n2 == 0)
   {
      *out1 = k; *out2 = 0;
      return;
   }
   if (k >= n1 + n2)
   {
      *out1 = n1; *out2 = n2;
      return;
   }

   // one is greater than the other
   if (k < n1 && a1[k] <= a2[0])
   {
      *out1 = k; *out2 = 0;
      return;
   }
   if (k - n1 >= 0 && a2[k - n1] >= a1[n1 - 1])
   {
      *out1 = n1; *out2 = k - n1;
      return;
   }
   if (k < n2 && a2[k] <= a1[0])
   {
      *out1 = 0; *out2 = k;
      return;
   }
   if (k - n2 >= 0 && a1[k - n2] >= a2[n2 - 1])
   {
      *out1 = k - n2; *out2 = n2;
      return;
   }
   // now k > 0

   // faster to do binary search on the shorter sequence
   if (n1 > n2)
   {
      SWAP(HYPRE_Int, n1, n2);
      SWAP(HYPRE_Int *, a1, a2);
      SWAP(HYPRE_Int *, out1, out2);
   }

   if (k < (n1 + n2)/2)
   {
      kth_element_(out1, out2, a1, a2, 0, hypre_min(n1 - 1, k), n1, n2, k);
   }
   else
   {
      // when k is big, faster to find (n1 + n2 - k)th biggest element
      HYPRE_Int offset1 = hypre_max(k - n2, 0), offset2 = hypre_max(k - n1, 0);
      HYPRE_Int new_k = k - offset1 - offset2;

      HYPRE_Int new_n1 = hypre_min(n1 - offset1, new_k + 1);
      HYPRE_Int new_n2 = hypre_min(n2 - offset2, new_k + 1);
      kth_element_(out1, out2, a1 + offset1, a2 + offset2, 0, new_n1 - 1, new_n1, new_n2, new_k);

      *out1 += offset1;
      *out2 += offset2;
   }
#ifdef DBG_MERGE_SORT
   assert(*out1 + *out2 == k);
#endif
}

Пример #11

Показать файл

Файл: new_assemble.c Проект: 8l/insieme

int hypre_StructGridAssembleWithAP( hypre_StructGrid *grid )
{
 


   int                  ierr = 0;
   int                  tmp_i;
   
   int                  size, global_num_boxes, num_local_boxes;
   int                  i, j, d, k, index;
   int                  num_procs, myid;
   int                  *sendbuf8, *recvbuf8, *sendbuf2, *recvbuf2;
   int                  min_box_size, max_box_size;
   int                  global_min_box_size, global_max_box_size;
   int                 *ids;
   int                  max_regions, max_refinements, ologp;
   double               gamma;
   hypre_Index          min_index, max_index;
   
  
   int                  prune;
       
   hypre_Box           *box;
   

   MPI_Comm             comm         = hypre_StructGridComm(grid);
   hypre_Box           *bounding_box = hypre_StructGridBoundingBox(grid);
   hypre_BoxArray      *local_boxes  = hypre_StructGridBoxes(grid);
   int                  dim          = hypre_StructGridDim(grid);
   hypre_BoxNeighbors  *neighbors    = hypre_StructGridNeighbors(grid);
   int                  max_distance = hypre_StructGridMaxDistance(grid);
   hypre_IndexRef       periodic     = hypre_StructGridPeriodic(grid);

   int                 *local_boxnums;

   double               dbl_global_size, tmp_dbl;
   
   hypre_BoxArray       *my_partition;
   int                  *part_ids, *part_boxnums;
     
   int                  *proc_array, proc_count, proc_alloc, count;
   int                  *tmp_proc_ids = NULL;
   
   int                  max_response_size;
   int                  *ap_proc_ids, *send_buf, *send_buf_starts;
   int                  *response_buf, *response_buf_starts;

   hypre_BoxArray      *neighbor_boxes, *n_boxes_copy;
   int                 *neighbor_proc_ids, *neighbor_boxnums;

   int                 *order_index, *delete_array;
   int                 tmp_id, start, first_local;
   
   int                 grow, grow_array[6];
   hypre_Box           *grow_box;
   
   
   int                  *numghost;
   int                   ghostsize;
   hypre_Box            *ghostbox;

   hypre_StructAssumedPart     *assumed_part;
   hypre_DataExchangeResponse  response_obj;
 
   int                  px = hypre_IndexX(periodic);
   int                  py = hypre_IndexY(periodic);
   int                  pz = hypre_IndexZ(periodic);

   int                  i_periodic = px ? 1 : 0;
   int                  j_periodic = py ? 1 : 0;
   int                  k_periodic = pz ? 1 : 0;

   int                  num_periods, multiple_ap, p;
   hypre_Box           *result_box, *period_box;
   hypre_Index         *pshifts;

   hypre_IndexRef       pshift;

#if NEIGH_PRINT
   double               start_time, end_time;
   
#endif



/*---------------------------------------------
  Step 1:  Initializations
  -----------------------------------------------*/

   prune = 1; /* default is to prune */ 
   
   num_local_boxes = hypre_BoxArraySize(local_boxes);
  
   num_periods = (1+2*i_periodic) * (1+2*j_periodic) * (1+2*k_periodic);


   MPI_Comm_size(comm, &num_procs);
   MPI_Comm_rank(comm, &myid);


 
/*---------------------------------------------
  Step 2:  Determine the global size, total number of boxes,
           and global bounding box.
           Also get the min and max box sizes
           since it is convenient to do so.
  -----------------------------------------------*/

   if (neighbors == NULL) 
   {
    
      /*these may not be needed - check later */
      ids =   hypre_TAlloc(int, num_local_boxes);
    
      /* for the vol and number of boxes */
      sendbuf2 = hypre_CTAlloc(int, 2);
      recvbuf2 = hypre_CTAlloc(int, 2);
      size = 0;
     
      bounding_box = hypre_BoxCreate();
      grow_box = hypre_BoxCreate();
      

      if (num_local_boxes) 
      {
         
         min_box_size = hypre_BoxVolume( hypre_BoxArrayBox(local_boxes, 0));
         max_box_size = hypre_BoxVolume( hypre_BoxArrayBox(local_boxes, 0));


         /* initialize min and max */
         for (d=0; d<3; d++)
         {
            hypre_IndexD(min_index, d) = pow(2,30); 
            hypre_IndexD(max_index, d) = -pow(2,30);
         }
         

         hypre_ForBoxI(i, local_boxes)
         {
            box = hypre_BoxArrayBox(local_boxes, i);
            /* get global size and number of boxes */ 
            tmp_i = hypre_BoxVolume(box);
            size += tmp_i;
            min_box_size = hypre_min(min_box_size, tmp_i);
            max_box_size = hypre_max(max_box_size, tmp_i);


            /* set id */  
            ids[i] = i;


            /* 1/3/05 we need this for the case of holes in the domain. (I had
               commented
               it out on 12/04 - as I thought this was not necessary. */
            
            
            /* zero volume boxes - still look at for getting the bounding box */
            if (hypre_BoxVolume(box) == 0) /* zero volume boxes - still count */
            {
               hypre_CopyBox(box, grow_box);
               for (d = 0; d < 3; d++)
               {
                  if(!hypre_BoxSizeD(box, d))
                  {
                     grow = (hypre_BoxIMinD(box, d) - hypre_BoxIMaxD(box, d) + 1)/2;
                     grow_array[2*d] = grow;
                     grow_array[2*d+1] = grow;
                  }
                  else
                  {
                     grow_array[2*d] = 0;
                     grow_array[2*d+1] = 0;
                  }
               }   
               /* expand the box */
               hypre_BoxExpand(grow_box, grow_array);
               box = grow_box; /*pointer copy*/
            }     
            /*now we have a vol > 0 box */
    
         
            for (d = 0; d < dim; d++) /* for each dimension */
            {
               hypre_IndexD(min_index, d) = hypre_min( hypre_IndexD(min_index, d), 
                                                       hypre_BoxIMinD(box, d));
               hypre_IndexD(max_index, d) = hypre_max( hypre_IndexD(max_index, d), 
                                                       hypre_BoxIMaxD(box, d));
            }
                        
         }/*end for each box loop */

         /* bounding box extents */ 
         hypre_BoxSetExtents(bounding_box, min_index, max_index);
   
      }

Пример #12

Показать файл

Файл: par_cycle.c Проект: Chang-Liu-0520/hypre

HYPRE_Int
hypre_BoomerAMGCycle( void              *amg_vdata, 
                   hypre_ParVector  **F_array,
                   hypre_ParVector  **U_array   )
{
   hypre_ParAMGData *amg_data = amg_vdata;

   HYPRE_Solver *smoother;
   /* Data Structure variables */

   hypre_ParCSRMatrix    **A_array;
   hypre_ParCSRMatrix    **P_array;
   hypre_ParCSRMatrix    **R_array;
   hypre_ParVector    *Utemp;
   hypre_ParVector    *Vtemp;
   hypre_ParVector    *Rtemp;
   hypre_ParVector    *Ptemp;
   hypre_ParVector    *Ztemp;
   hypre_ParVector    *Aux_U;
   hypre_ParVector    *Aux_F;

   hypre_ParCSRBlockMatrix    **A_block_array;
   hypre_ParCSRBlockMatrix    **P_block_array;
   hypre_ParCSRBlockMatrix    **R_block_array;

   HYPRE_Real   *Ztemp_data;
   HYPRE_Real   *Ptemp_data;
   HYPRE_Int     **CF_marker_array;
   /* HYPRE_Int     **unknown_map_array;
   HYPRE_Int     **point_map_array;
   HYPRE_Int     **v_at_point_array; */

   HYPRE_Real    cycle_op_count;   
   HYPRE_Int       cycle_type;
   HYPRE_Int       num_levels;
   HYPRE_Int       max_levels;

   HYPRE_Real   *num_coeffs;
   HYPRE_Int      *num_grid_sweeps;   
   HYPRE_Int      *grid_relax_type;   
   HYPRE_Int     **grid_relax_points;  

   HYPRE_Int     block_mode;
   
   HYPRE_Real  *max_eig_est;
   HYPRE_Real  *min_eig_est;
   HYPRE_Int      cheby_order;
   HYPRE_Real   cheby_fraction;

 /* Local variables  */ 
   HYPRE_Int      *lev_counter;
   HYPRE_Int       Solve_err_flag;
   HYPRE_Int       k;
   HYPRE_Int       i, j, jj;
   HYPRE_Int       level;
   HYPRE_Int       cycle_param;
   HYPRE_Int       coarse_grid;
   HYPRE_Int       fine_grid;
   HYPRE_Int       Not_Finished;
   HYPRE_Int       num_sweep;
   HYPRE_Int       cg_num_sweep = 1;
   HYPRE_Int       relax_type;
   HYPRE_Int       relax_points;
   HYPRE_Int       relax_order;
   HYPRE_Int       relax_local;
   HYPRE_Int       old_version = 0;
   HYPRE_Real   *relax_weight;
   HYPRE_Real   *omega;
   HYPRE_Real    alfa, beta, gammaold;
   HYPRE_Real    gamma = 1.0;
   HYPRE_Int       local_size;
/*   HYPRE_Int      *smooth_option; */
   HYPRE_Int       smooth_type;
   HYPRE_Int       smooth_num_levels;
   HYPRE_Int       num_threads, my_id;

   HYPRE_Real    alpha;
   HYPRE_Real  **l1_norms = NULL;
   HYPRE_Real   *l1_norms_level;

   HYPRE_Int seq_cg = 0;

   MPI_Comm comm;

#if 0
   HYPRE_Real   *D_mat;
   HYPRE_Real   *S_vec;
#endif
   
   /* Acquire data and allocate storage */

   num_threads = hypre_NumThreads();

   A_array           = hypre_ParAMGDataAArray(amg_data);
   P_array           = hypre_ParAMGDataPArray(amg_data);
   R_array           = hypre_ParAMGDataRArray(amg_data);
   CF_marker_array   = hypre_ParAMGDataCFMarkerArray(amg_data);
   Vtemp             = hypre_ParAMGDataVtemp(amg_data);
   Rtemp             = hypre_ParAMGDataRtemp(amg_data);
   Ptemp             = hypre_ParAMGDataPtemp(amg_data);
   Ztemp             = hypre_ParAMGDataZtemp(amg_data);
   num_levels        = hypre_ParAMGDataNumLevels(amg_data);
   max_levels        = hypre_ParAMGDataMaxLevels(amg_data);
   cycle_type        = hypre_ParAMGDataCycleType(amg_data);

   A_block_array     = hypre_ParAMGDataABlockArray(amg_data);
   P_block_array     = hypre_ParAMGDataPBlockArray(amg_data);
   R_block_array     = hypre_ParAMGDataRBlockArray(amg_data);
   block_mode        = hypre_ParAMGDataBlockMode(amg_data);

   num_grid_sweeps     = hypre_ParAMGDataNumGridSweeps(amg_data);
   grid_relax_type     = hypre_ParAMGDataGridRelaxType(amg_data);
   grid_relax_points   = hypre_ParAMGDataGridRelaxPoints(amg_data);
   relax_order         = hypre_ParAMGDataRelaxOrder(amg_data);
   relax_weight        = hypre_ParAMGDataRelaxWeight(amg_data); 
   omega               = hypre_ParAMGDataOmega(amg_data); 
   smooth_type         = hypre_ParAMGDataSmoothType(amg_data); 
   smooth_num_levels   = hypre_ParAMGDataSmoothNumLevels(amg_data); 
   l1_norms            = hypre_ParAMGDataL1Norms(amg_data); 
   /* smooth_option       = hypre_ParAMGDataSmoothOption(amg_data); */

   max_eig_est = hypre_ParAMGDataMaxEigEst(amg_data);
   min_eig_est = hypre_ParAMGDataMinEigEst(amg_data);
   cheby_order = hypre_ParAMGDataChebyOrder(amg_data);
   cheby_fraction = hypre_ParAMGDataChebyFraction(amg_data);

   cycle_op_count = hypre_ParAMGDataCycleOpCount(amg_data);

   lev_counter = hypre_CTAlloc(HYPRE_Int, num_levels);

   if (hypre_ParAMGDataParticipate(amg_data)) seq_cg = 1;

   /* Initialize */

   Solve_err_flag = 0;

   if (grid_relax_points) old_version = 1;

   num_coeffs = hypre_CTAlloc(HYPRE_Real, num_levels);
   num_coeffs[0]    = hypre_ParCSRMatrixDNumNonzeros(A_array[0]);
   comm = hypre_ParCSRMatrixComm(A_array[0]);
   hypre_MPI_Comm_rank(comm,&my_id);

   if (block_mode)
   {
      for (j = 1; j < num_levels; j++)
         num_coeffs[j] = hypre_ParCSRBlockMatrixNumNonzeros(A_block_array[j]);
      
   }
   else 
   {
       for (j = 1; j < num_levels; j++)
         num_coeffs[j] = hypre_ParCSRMatrixDNumNonzeros(A_array[j]);
   }
   
   /*---------------------------------------------------------------------
    *    Initialize cycling control counter
    *
    *     Cycling is controlled using a level counter: lev_counter[k]
    *     
    *     Each time relaxation is performed on level k, the
    *     counter is decremented by 1. If the counter is then
    *     negative, we go to the next finer level. If non-
    *     negative, we go to the next coarser level. The
    *     following actions control cycling:
    *     
    *     a. lev_counter[0] is initialized to 1.
    *     b. lev_counter[k] is initialized to cycle_type for k>0.
    *     
    *     c. During cycling, when going down to level k, lev_counter[k]
    *        is set to the max of (lev_counter[k],cycle_type)
    *---------------------------------------------------------------------*/

   Not_Finished = 1;

   lev_counter[0] = 1;
   for (k = 1; k < num_levels; ++k) 
   {
      lev_counter[k] = cycle_type;
   }

   level = 0;
   cycle_param = 1;

   smoother = hypre_ParAMGDataSmoother(amg_data);

   if (smooth_num_levels > 0)
   {
      if (smooth_type == 7 || smooth_type == 8
          || smooth_type == 17 || smooth_type == 18
          || smooth_type == 9 || smooth_type == 19)
      {
         HYPRE_Int actual_local_size = hypre_ParVectorActualLocalSize(Vtemp);
         Utemp = hypre_ParVectorCreate(comm,hypre_ParVectorGlobalSize(Vtemp),
                        hypre_ParVectorPartitioning(Vtemp));
         hypre_ParVectorOwnsPartitioning(Utemp) = 0;
         local_size 
            = hypre_VectorSize(hypre_ParVectorLocalVector(Vtemp));
         if (local_size < actual_local_size)
         {
            hypre_VectorData(hypre_ParVectorLocalVector(Utemp)) =
	 	hypre_CTAlloc(HYPRE_Complex, actual_local_size);
            hypre_ParVectorActualLocalSize(Utemp) = actual_local_size;
         }
         else
	     hypre_ParVectorInitialize(Utemp);
      }
   }
   
  
   /*---------------------------------------------------------------------
    * Main loop of cycling
    *--------------------------------------------------------------------*/
  
   while (Not_Finished)
   {
      if (num_levels > 1) 
      {
        local_size 
            = hypre_VectorSize(hypre_ParVectorLocalVector(F_array[level]));
        hypre_VectorSize(hypre_ParVectorLocalVector(Vtemp)) = local_size;
        if (smooth_num_levels <= level)
	{
           cg_num_sweep = 1;
           num_sweep = num_grid_sweeps[cycle_param];
           Aux_U = U_array[level];
           Aux_F = F_array[level];
	}
	else if (smooth_type > 9)
	{
           hypre_VectorSize(hypre_ParVectorLocalVector(Ztemp)) = local_size;
           hypre_VectorSize(hypre_ParVectorLocalVector(Rtemp)) = local_size;
           hypre_VectorSize(hypre_ParVectorLocalVector(Ptemp)) = local_size;
           Ztemp_data = hypre_VectorData(hypre_ParVectorLocalVector(Ztemp));
           Ptemp_data = hypre_VectorData(hypre_ParVectorLocalVector(Ptemp));
           hypre_ParVectorSetConstantValues(Ztemp,0);
           alpha = -1.0;
           beta = 1.0;
           hypre_ParCSRMatrixMatvecOutOfPlace(alpha, A_array[level], 
                                U_array[level], beta, F_array[level], Rtemp);
	   cg_num_sweep = hypre_ParAMGDataSmoothNumSweeps(amg_data);
           num_sweep = num_grid_sweeps[cycle_param];
           Aux_U = Ztemp;
           Aux_F = Rtemp;
	}
	else 
	{
           cg_num_sweep = 1;
	   num_sweep = hypre_ParAMGDataSmoothNumSweeps(amg_data);
           Aux_U = U_array[level];
           Aux_F = F_array[level];
	}
        relax_type = grid_relax_type[cycle_param];
      }
      else /* AB: 4/08: removed the max_levels > 1 check - should do this when max-levels = 1 also */
      {
        /* If no coarsening occurred, apply a simple smoother once */
        Aux_U = U_array[level];
        Aux_F = F_array[level];
        num_sweep = 1;
        /* TK: Use the user relax type (instead of 0) to allow for setting a
           convergent smoother (e.g. in the solution of singular problems). */
        relax_type = hypre_ParAMGDataUserRelaxType(amg_data);
      }

      if (l1_norms != NULL)
         l1_norms_level = l1_norms[level];
      else
         l1_norms_level = NULL;

      if (cycle_param == 3 && seq_cg)
      {
         hypre_seqAMGCycle(amg_data, level, F_array, U_array);
      }
      else
      {
         
        /*------------------------------------------------------------------
         * Do the relaxation num_sweep times
         *-----------------------------------------------------------------*/
         for (jj = 0; jj < cg_num_sweep; jj++)
         {
	   if (smooth_num_levels > level && smooth_type > 9)
              hypre_ParVectorSetConstantValues(Aux_U,0);

           for (j = 0; j < num_sweep; j++)
           {
              if (num_levels == 1 && max_levels > 1)
              {
                 relax_points = 0;
                 relax_local = 0;
              }
              else
              {
                 if (old_version)
		    relax_points = grid_relax_points[cycle_param][j];
                 relax_local = relax_order;
              }

              /*-----------------------------------------------
               * VERY sloppy approximation to cycle complexity
               *-----------------------------------------------*/
              if (old_version && level < num_levels -1)
              {
                 switch (relax_points)
                 {
                    case 1:
                    cycle_op_count += num_coeffs[level+1];
                    break;
  
                    case -1: 
                    cycle_op_count += (num_coeffs[level]-num_coeffs[level+1]); 
                    break;
                 }
              }
	      else
              {
                 cycle_op_count += num_coeffs[level]; 
              }
              /*-----------------------------------------------
                Choose Smoother
                -----------------------------------------------*/

              if (smooth_num_levels > level && 
			(smooth_type == 7 || smooth_type == 8 ||
			smooth_type == 9 || smooth_type == 19 ||
			smooth_type == 17 || smooth_type == 18))
              {
                 hypre_VectorSize(hypre_ParVectorLocalVector(Utemp)) = local_size;
                 alpha = -1.0;
                 beta = 1.0;
                 hypre_ParCSRMatrixMatvecOutOfPlace(alpha, A_array[level], 
                                U_array[level], beta, Aux_F, Vtemp);
                 if (smooth_type == 8 || smooth_type == 18)
                    HYPRE_ParCSRParaSailsSolve(smoother[level],
                                 (HYPRE_ParCSRMatrix) A_array[level],
                                 (HYPRE_ParVector) Vtemp,
                                 (HYPRE_ParVector) Utemp);
                 else if (smooth_type == 7 || smooth_type == 17)
                    HYPRE_ParCSRPilutSolve(smoother[level],
                                 (HYPRE_ParCSRMatrix) A_array[level],
                                 (HYPRE_ParVector) Vtemp,
                                 (HYPRE_ParVector) Utemp);
                 else if (smooth_type == 9 || smooth_type == 19)
                    HYPRE_EuclidSolve(smoother[level],
                                 (HYPRE_ParCSRMatrix) A_array[level],
                                 (HYPRE_ParVector) Vtemp,
                                 (HYPRE_ParVector) Utemp);
                 hypre_ParVectorAxpy(relax_weight[level],Utemp,Aux_U);
	      }
              else if (smooth_num_levels > level &&
			(smooth_type == 6 || smooth_type == 16))
              {
                 HYPRE_SchwarzSolve(smoother[level],
                                 (HYPRE_ParCSRMatrix) A_array[level],
                                 (HYPRE_ParVector) Aux_F,
                                  (HYPRE_ParVector) Aux_U);
              }
              /*else if (relax_type == 99)*/
              else if (relax_type == 9 || relax_type == 99)
              { /* Gaussian elimination */
                 hypre_GaussElimSolve(amg_data, level, relax_type);
              }
              else if (relax_type == 18)
              {   /* L1 - Jacobi*/
                 if (relax_order == 1 && cycle_param < 3)
                 {
                    /* need to do CF - so can't use the AMS one */
                    HYPRE_Int i;
                    HYPRE_Int loc_relax_points[2];
                    if (cycle_type < 2)
                    {
                       loc_relax_points[0] = 1;
                       loc_relax_points[1] = -1;
                    }
                    else
                    {
                       loc_relax_points[0] = -1;
                       loc_relax_points[1] = 1;
                    }
                    for (i=0; i < 2; i++)
                       hypre_ParCSRRelax_L1_Jacobi(A_array[level],
                                                 Aux_F,
                                                 CF_marker_array[level],
                                                 loc_relax_points[i],
                                                 relax_weight[level],
                                                 l1_norms[level],
                                                 Aux_U,
                                                 Vtemp);
                 }
                 else /* not CF - so use through AMS */
                 {
                    if (num_threads == 1)
                       hypre_ParCSRRelax(A_array[level], 
                                       Aux_F,
                                       1,
                                       1,
                                       l1_norms_level,
                                       relax_weight[level],
                                       omega[level],0,0,0,0,
                                       Aux_U,
                                       Vtemp, 
                                       Ztemp);

                    else
                       hypre_ParCSRRelaxThreads(A_array[level], 
                                              Aux_F,
                                              1,
                                              1,
                                              l1_norms_level,
                                              relax_weight[level],
                                              omega[level],
                                              Aux_U,
                                              Vtemp,
                                              Ztemp);
                 }
              }
              else if (relax_type == 15)
              {  /* CG */
                 if (j ==0) /* do num sweep iterations of CG */
                    hypre_ParCSRRelax_CG( smoother[level],
                                        A_array[level], 
                                        Aux_F,      
                                        Aux_U,
                                        num_sweep);
              }
              else if (relax_type == 16)
              { /* scaled Chebyshev */
                 HYPRE_Int scale = 1;
                 HYPRE_Int variant = 0;
                 hypre_ParCSRRelax_Cheby(A_array[level], 
                                       Aux_F,
                                       max_eig_est[level],     
                                       min_eig_est[level],     
                                       cheby_fraction, cheby_order, scale,
                                       variant, Aux_U, Vtemp, Ztemp );
              }
              else if (relax_type ==17)
              {
                 hypre_BoomerAMGRelax_FCFJacobi(A_array[level], 
                                              Aux_F,
                                              CF_marker_array[level],
                                              relax_weight[level],
                                              Aux_U,
                                              Vtemp);
              }
	      else if (old_version)
	      {
                 Solve_err_flag = hypre_BoomerAMGRelax(A_array[level], 
                                                     Aux_F,
                                                     CF_marker_array[level],
                                                     relax_type, relax_points,
                                                     relax_weight[level],
                                                     omega[level],
                                                     l1_norms_level,
                                                     Aux_U,
                                                     Vtemp, 
                                                     Ztemp);
	      }
	      else 
	      {
                 /* smoother than can have CF ordering */
                 if (block_mode)
                 {
                     Solve_err_flag = hypre_BoomerAMGBlockRelaxIF(A_block_array[level], 
                                                                  Aux_F,
                                                                  CF_marker_array[level],
                                                                  relax_type,
                                                                  relax_local,
                                                                  cycle_param,
                                                                  relax_weight[level],
                                                                  omega[level],
                                                                  Aux_U,
                                                                  Vtemp);
                 }
                 else
                 {
                    Solve_err_flag = hypre_BoomerAMGRelaxIF(A_array[level], 
                                                          Aux_F,
                                                          CF_marker_array[level],
                                                          relax_type,
                                                          relax_local,
                                                          cycle_param,
                                                          relax_weight[level],
                                                          omega[level],
                                                          l1_norms_level,
                                                          Aux_U,
                                                          Vtemp, 
                                                          Ztemp);
                 }
	      }
 
              if (Solve_err_flag != 0)
                 return(Solve_err_flag);
           }
           if  (smooth_num_levels > level && smooth_type > 9)
           {
              gammaold = gamma;
              gamma = hypre_ParVectorInnerProd(Rtemp,Ztemp);
              if (jj == 0)
                 hypre_ParVectorCopy(Ztemp,Ptemp);
              else
              {
                 beta = gamma/gammaold;
                 for (i=0; i < local_size; i++)
		    Ptemp_data[i] = Ztemp_data[i] + beta*Ptemp_data[i];
              }
              hypre_ParCSRMatrixMatvec(1.0,A_array[level],Ptemp,0.0,Vtemp);
              alfa = gamma /hypre_ParVectorInnerProd(Ptemp,Vtemp);
              hypre_ParVectorAxpy(alfa,Ptemp,U_array[level]);
              hypre_ParVectorAxpy(-alfa,Vtemp,Rtemp);
           }
        }
      }

      /*------------------------------------------------------------------
       * Decrement the control counter and determine which grid to visit next
       *-----------------------------------------------------------------*/

      --lev_counter[level];
       
      if (lev_counter[level] >= 0 && level != num_levels-1)
      {
                               
         /*---------------------------------------------------------------
          * Visit coarser level next.  
 	  * Compute residual using hypre_ParCSRMatrixMatvec.
          * Perform restriction using hypre_ParCSRMatrixMatvecT.
          * Reset counters and cycling parameters for coarse level
          *--------------------------------------------------------------*/

         fine_grid = level;
         coarse_grid = level + 1;

         hypre_ParVectorSetConstantValues(U_array[coarse_grid], 0.0); 
          
         alpha = -1.0;
         beta = 1.0;

         if (block_mode)
         {
            hypre_ParVectorCopy(F_array[fine_grid],Vtemp);
            hypre_ParCSRBlockMatrixMatvec(alpha, A_block_array[fine_grid], U_array[fine_grid],
                                          beta, Vtemp);
         }
         else 
         {
            // JSP: avoid unnecessary copy using out-of-place version of SpMV
            hypre_ParCSRMatrixMatvecOutOfPlace(alpha, A_array[fine_grid], U_array[fine_grid],
                                               beta, F_array[fine_grid], Vtemp);
         }

         alpha = 1.0;
         beta = 0.0;

         if (block_mode)
         {
            hypre_ParCSRBlockMatrixMatvecT(alpha,R_block_array[fine_grid],Vtemp,
                                      beta,F_array[coarse_grid]);
         }
         else
         {
            hypre_ParCSRMatrixMatvecT(alpha,R_array[fine_grid],Vtemp,
                                      beta,F_array[coarse_grid]);
         }

         ++level;
         lev_counter[level] = hypre_max(lev_counter[level],cycle_type);
         cycle_param = 1;
         if (level == num_levels-1) cycle_param = 3;
      }

      else if (level != 0)
      {
         /*---------------------------------------------------------------
          * Visit finer level next.
          * Interpolate and add correction using hypre_ParCSRMatrixMatvec.
          * Reset counters and cycling parameters for finer level.
          *--------------------------------------------------------------*/

         fine_grid = level - 1;
         coarse_grid = level;
         alpha = 1.0;
         beta = 1.0;
         if (block_mode)
         {
            hypre_ParCSRBlockMatrixMatvec(alpha, P_block_array[fine_grid], 
                                     U_array[coarse_grid],
                                     beta, U_array[fine_grid]);   
         }
         else 
         {
            hypre_ParCSRMatrixMatvec(alpha, P_array[fine_grid], 
                                     U_array[coarse_grid],
                                     beta, U_array[fine_grid]);            
         }
         
         --level;
         cycle_param = 2;
      }
      else
      {
         Not_Finished = 0;
      }
   }

   hypre_ParAMGDataCycleOpCount(amg_data) = cycle_op_count;

   hypre_TFree(lev_counter);
   hypre_TFree(num_coeffs);
   if (smooth_num_levels > 0)
   {
     if (smooth_type == 7 || smooth_type == 8 || smooth_type == 9 || 
	smooth_type == 17 || smooth_type == 18 || smooth_type == 19 )
        hypre_ParVectorDestroy(Utemp);
   }
   return(Solve_err_flag);
}

Пример #13

Показать файл

Файл: par_add_cycle.c Проект: IanLee1521/hypre

HYPRE_Int
hypre_BoomerAMGAdditiveCycle( void              *amg_vdata)
{
   hypre_ParAMGData *amg_data = amg_vdata;

   /* Data Structure variables */

   hypre_ParCSRMatrix    **A_array;
   hypre_ParCSRMatrix    **P_array;
   hypre_ParCSRMatrix    **R_array;
   hypre_ParCSRMatrix    *Lambda;
   hypre_ParVector    **F_array;
   hypre_ParVector    **U_array;
   hypre_ParVector    *Vtemp;
   hypre_ParVector    *Ztemp;
   hypre_ParVector    *Xtilde, *Rtilde;
   HYPRE_Int      **CF_marker_array;

   HYPRE_Int       num_levels;
   HYPRE_Int       addlvl;
   HYPRE_Int       additive;
   HYPRE_Int       mult_additive;
   HYPRE_Int       simple;
   HYPRE_Int       i, num_rows;
   HYPRE_Int       n_global;
   HYPRE_Int       rlx_order;

 /* Local variables  */ 
   HYPRE_Int       Solve_err_flag = 0;
   HYPRE_Int       level;
   HYPRE_Int       coarse_grid;
   HYPRE_Int       fine_grid;
   HYPRE_Int       relax_type;
   HYPRE_Int       rlx_down;
   HYPRE_Int       rlx_up;
   HYPRE_Int      *grid_relax_type;
   HYPRE_Real      **l1_norms;
   HYPRE_Real    alpha, beta;
   HYPRE_Int       num_threads;
   HYPRE_Real *u_data;
   HYPRE_Real *f_data;
   HYPRE_Real *v_data;
   HYPRE_Real *l1_norms_lvl;
   HYPRE_Real *D_inv;
   HYPRE_Real *x_global;
   HYPRE_Real *r_global;
   HYPRE_Real *relax_weight;
   HYPRE_Real *omega;

#if 0
   HYPRE_Real   *D_mat;
   HYPRE_Real   *S_vec;
#endif
   
   /* Acquire data and allocate storage */

   num_threads = hypre_NumThreads();

   A_array           = hypre_ParAMGDataAArray(amg_data);
   F_array           = hypre_ParAMGDataFArray(amg_data);
   U_array           = hypre_ParAMGDataUArray(amg_data);
   P_array           = hypre_ParAMGDataPArray(amg_data);
   R_array           = hypre_ParAMGDataRArray(amg_data);
   CF_marker_array   = hypre_ParAMGDataCFMarkerArray(amg_data);
   Vtemp             = hypre_ParAMGDataVtemp(amg_data);
   Ztemp             = hypre_ParAMGDataZtemp(amg_data);
   num_levels        = hypre_ParAMGDataNumLevels(amg_data);
   additive          = hypre_ParAMGDataAdditive(amg_data);
   mult_additive     = hypre_ParAMGDataMultAdditive(amg_data);
   simple            = hypre_ParAMGDataSimple(amg_data);
   grid_relax_type   = hypre_ParAMGDataGridRelaxType(amg_data);
   Lambda            = hypre_ParAMGDataLambda(amg_data);
   Xtilde            = hypre_ParAMGDataXtilde(amg_data);
   Rtilde            = hypre_ParAMGDataRtilde(amg_data);
   l1_norms          = hypre_ParAMGDataL1Norms(amg_data);
   D_inv             = hypre_ParAMGDataDinv(amg_data);
   grid_relax_type   = hypre_ParAMGDataGridRelaxType(amg_data);
   relax_weight      = hypre_ParAMGDataRelaxWeight(amg_data);
   omega             = hypre_ParAMGDataOmega(amg_data);
   rlx_order         = hypre_ParAMGDataRelaxOrder(amg_data);

   /* Initialize */

   addlvl = hypre_max(additive, mult_additive);
   addlvl = hypre_max(addlvl, simple);
   Solve_err_flag = 0;

   /*---------------------------------------------------------------------
    * Main loop of cycling --- multiplicative version --- V-cycle
    *--------------------------------------------------------------------*/

   /* down cycle */
   relax_type = grid_relax_type[1];
   rlx_down = grid_relax_type[1];
   rlx_up = grid_relax_type[2];
   for (level = 0; level < num_levels-1; level++)
   {
      fine_grid = level;
      coarse_grid = level + 1;

      u_data = hypre_VectorData(hypre_ParVectorLocalVector(U_array[fine_grid]));
      f_data = hypre_VectorData(hypre_ParVectorLocalVector(F_array[fine_grid]));
      v_data = hypre_VectorData(hypre_ParVectorLocalVector(Vtemp));
      l1_norms_lvl = l1_norms[level];

      hypre_ParVectorSetConstantValues(U_array[coarse_grid], 0.0); 

      if (level < addlvl) /* multiplicative version */
      {
         /* smoothing step */

         if (rlx_down == 0)
         {
            HYPRE_Real *A_data = hypre_CSRMatrixData(hypre_ParCSRMatrixDiag(A_array[fine_grid]));
            HYPRE_Int *A_i = hypre_CSRMatrixI(hypre_ParCSRMatrixDiag(A_array[fine_grid]));
            hypre_ParVectorCopy(F_array[fine_grid],Vtemp);
            num_rows = hypre_CSRMatrixNumRows(hypre_ParCSRMatrixDiag(A_array[fine_grid]));
#ifdef HYPRE_USING_OPENMP
#pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE
#endif
            for (i = 0; i < num_rows; i++)
               u_data[i] = relax_weight[level]*v_data[i] / A_data[A_i[i]];
         }

         else if (rlx_down != 18)
         {
            /*hypre_BoomerAMGRelax(A_array[fine_grid],F_array[fine_grid],NULL,rlx_down,0,*/
            hypre_BoomerAMGRelaxIF(A_array[fine_grid],F_array[fine_grid],
	     CF_marker_array[fine_grid], rlx_down,rlx_order,1,
             relax_weight[fine_grid], omega[fine_grid],
             l1_norms_lvl, U_array[fine_grid], Vtemp, Ztemp);
            hypre_ParVectorCopy(F_array[fine_grid],Vtemp);
         }
         else
         {
            hypre_ParVectorCopy(F_array[fine_grid],Vtemp);
            num_rows = hypre_CSRMatrixNumRows(hypre_ParCSRMatrixDiag(A_array[fine_grid]));
#ifdef HYPRE_USING_OPENMP
#pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE
#endif
	    for (i = 0; i < num_rows; i++)
               u_data[i] += v_data[i] / l1_norms_lvl[i];
         }
     
         alpha = -1.0;
         beta = 1.0;
         hypre_ParCSRMatrixMatvec(alpha, A_array[fine_grid], U_array[fine_grid],
                                     beta, Vtemp);

         alpha = 1.0;
         beta = 0.0;
         hypre_ParCSRMatrixMatvecT(alpha,R_array[fine_grid],Vtemp,
                                      beta,F_array[coarse_grid]);
      }
      else /* additive version */
      {
         hypre_ParVectorCopy(F_array[fine_grid],Vtemp);
         if (level == 0) /* compute residual */
         {
            hypre_ParVectorCopy(Vtemp, Rtilde);
            hypre_ParVectorCopy(U_array[fine_grid],Xtilde);
         }
         alpha = 1.0;
         beta = 0.0;
         hypre_ParCSRMatrixMatvecT(alpha,R_array[fine_grid],Vtemp,
                                      beta,F_array[coarse_grid]);
      }
   }

   /* solve coarse grid */ 
   if (addlvl < num_levels)
   {
      if (simple > -1)
      {
         x_global = hypre_VectorData(hypre_ParVectorLocalVector(Xtilde));
         r_global = hypre_VectorData(hypre_ParVectorLocalVector(Rtilde));
         n_global = hypre_VectorSize(hypre_ParVectorLocalVector(Xtilde));
#ifdef HYPRE_USING_OPENMP
#pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE
#endif
	 for (i=0; i < n_global; i++)
	    x_global[i] += D_inv[i]*r_global[i];
      }
      else
	 hypre_ParCSRMatrixMatvec(1.0, Lambda, Rtilde, 1.0, Xtilde);
      if (addlvl == 0) hypre_ParVectorCopy(Xtilde, U_array[0]);
   }
   else
   {
      fine_grid = num_levels -1;
      hypre_ParCSRRelax(A_array[fine_grid], F_array[fine_grid],
                              1, 1, l1_norms[fine_grid],
                              1.0, 1.0 ,0,0,0,0,
                              U_array[fine_grid], Vtemp, Ztemp);
   }

   /* up cycle */
   relax_type = grid_relax_type[2];
   for (level = num_levels-1; level > 0; level--)
   {
      fine_grid = level - 1;
      coarse_grid = level;

      if (level <= addlvl) /* multiplicative version */
      {
         alpha = 1.0;
         beta = 1.0;
         hypre_ParCSRMatrixMatvec(alpha, P_array[fine_grid], 
                                     U_array[coarse_grid],
                                     beta, U_array[fine_grid]);            
         if (rlx_up != 18)
            /*hypre_BoomerAMGRelax(A_array[fine_grid],F_array[fine_grid],NULL,rlx_up,0,*/
            hypre_BoomerAMGRelaxIF(A_array[fine_grid],F_array[fine_grid],
		CF_marker_array[fine_grid],
		rlx_up,rlx_order,2,
                relax_weight[fine_grid], omega[fine_grid],
                l1_norms[fine_grid], U_array[fine_grid], Vtemp, Ztemp);
         else if (rlx_order)
         {
            HYPRE_Int loc_relax_points[2];
            loc_relax_points[0] = -1;
            loc_relax_points[1] = 1;
            for (i=0; i < 2; i++)
                hypre_ParCSRRelax_L1_Jacobi(A_array[fine_grid],F_array[fine_grid],
                                            CF_marker_array[fine_grid],
                                            loc_relax_points[i],
                                            1.0, l1_norms[fine_grid],
                                            U_array[fine_grid], Vtemp);
         }
         else 
            hypre_ParCSRRelax(A_array[fine_grid], F_array[fine_grid],
                                 1, 1, l1_norms[fine_grid],
                                 1.0, 1.0 ,0,0,0,0,
                                 U_array[fine_grid], Vtemp, Ztemp);
      }
      else /* additive version */
      {
         alpha = 1.0;
         beta = 1.0;
         hypre_ParCSRMatrixMatvec(alpha, P_array[fine_grid], 
                                     U_array[coarse_grid],
                                     beta, U_array[fine_grid]);            
      }
   }

   return(Solve_err_flag);
}

Пример #14

Показать файл

Файл: par_add_cycle.c Проект: IanLee1521/hypre

HYPRE_Int hypre_CreateLambda(void *amg_vdata)
{
   hypre_ParAMGData *amg_data = amg_vdata;

   /* Data Structure variables */
   MPI_Comm comm;
   hypre_ParCSRMatrix **A_array;
   hypre_ParVector    **F_array;
   hypre_ParVector    **U_array;

   hypre_ParCSRMatrix *A_tmp;
   hypre_ParCSRMatrix *Lambda;
   hypre_CSRMatrix *L_diag;
   hypre_CSRMatrix *L_offd;
   hypre_CSRMatrix *A_tmp_diag;
   hypre_CSRMatrix *A_tmp_offd;
   hypre_ParVector *Xtilde;
   hypre_ParVector *Rtilde;
   hypre_Vector *Xtilde_local;
   hypre_Vector *Rtilde_local;
   hypre_ParCSRCommPkg *comm_pkg;
   hypre_ParCSRCommPkg *L_comm_pkg = NULL;
   hypre_ParCSRCommHandle *comm_handle;
   HYPRE_Real    *L_diag_data;
   HYPRE_Real    *L_offd_data;
   HYPRE_Real    *buf_data = NULL;
   HYPRE_Real    *tmp_data;
   HYPRE_Real    *x_data;
   HYPRE_Real    *r_data;
   HYPRE_Real    *l1_norms;
   HYPRE_Real    *A_tmp_diag_data;
   HYPRE_Real    *A_tmp_offd_data;
   HYPRE_Real    *D_data = NULL;
   HYPRE_Real    *D_data_offd = NULL;
   HYPRE_Int *L_diag_i;
   HYPRE_Int *L_diag_j;
   HYPRE_Int *L_offd_i;
   HYPRE_Int *L_offd_j;
   HYPRE_Int *A_tmp_diag_i;
   HYPRE_Int *A_tmp_offd_i;
   HYPRE_Int *A_tmp_diag_j;
   HYPRE_Int *A_tmp_offd_j;
   HYPRE_Int *L_recv_ptr = NULL;
   HYPRE_Int *L_send_ptr = NULL;
   HYPRE_Int *L_recv_procs = NULL;
   HYPRE_Int *L_send_procs = NULL;
   HYPRE_Int *L_send_map_elmts = NULL;
   HYPRE_Int *recv_procs;
   HYPRE_Int *send_procs;
   HYPRE_Int *send_map_elmts;
   HYPRE_Int *send_map_starts;
   HYPRE_Int *recv_vec_starts;
   HYPRE_Int *all_send_procs = NULL;
   HYPRE_Int *all_recv_procs = NULL;
   HYPRE_Int *remap = NULL;
   HYPRE_Int *level_start;

   HYPRE_Int       addlvl;
   HYPRE_Int       additive;
   HYPRE_Int       mult_additive;
   HYPRE_Int       num_levels;
   HYPRE_Int       num_add_lvls;
   HYPRE_Int       num_procs;
   HYPRE_Int       num_sends, num_recvs;
   HYPRE_Int       num_sends_L = 0;
   HYPRE_Int       num_recvs_L = 0;
   HYPRE_Int       send_data_L = 0;
   HYPRE_Int       num_rows_L = 0;
   HYPRE_Int       num_rows_tmp = 0;
   HYPRE_Int       num_cols_offd_L = 0;
   HYPRE_Int       num_cols_offd = 0;
   HYPRE_Int       level, i, j, k;
   HYPRE_Int       this_proc, cnt, cnt_diag, cnt_offd;
   HYPRE_Int       cnt_recv, cnt_send, cnt_row, row_start;
   HYPRE_Int       start_diag, start_offd, indx, cnt_map;
   HYPRE_Int       start, j_indx, index, cnt_level;
   HYPRE_Int       max_sends, max_recvs;

 /* Local variables  */ 
   HYPRE_Int       Solve_err_flag = 0;
   HYPRE_Int       num_threads;
   HYPRE_Int       num_nonzeros_diag;
   HYPRE_Int       num_nonzeros_offd;

   HYPRE_Real  **l1_norms_ptr = NULL;
   HYPRE_Real   *relax_weight = NULL;
   HYPRE_Real    relax_type;

   /* Acquire data and allocate storage */

   num_threads = hypre_NumThreads();

   A_array           = hypre_ParAMGDataAArray(amg_data);
   F_array           = hypre_ParAMGDataFArray(amg_data);
   U_array           = hypre_ParAMGDataUArray(amg_data);
   additive          = hypre_ParAMGDataAdditive(amg_data);
   mult_additive     = hypre_ParAMGDataMultAdditive(amg_data);
   num_levels        = hypre_ParAMGDataNumLevels(amg_data);
   relax_weight      = hypre_ParAMGDataRelaxWeight(amg_data);
   relax_type        = hypre_ParAMGDataGridRelaxType(amg_data)[1];
   comm              = hypre_ParCSRMatrixComm(A_array[0]);

   hypre_MPI_Comm_size(comm,&num_procs);

   l1_norms_ptr      = hypre_ParAMGDataL1Norms(amg_data); 

   addlvl = hypre_max(additive, mult_additive);
   num_add_lvls = num_levels+1-addlvl;

   level_start = hypre_CTAlloc(HYPRE_Int, num_add_lvls+1);
   send_data_L = 0;
   num_rows_L  = 0;
   num_cols_offd_L = 0;
   num_nonzeros_diag = 0;
   num_nonzeros_offd = 0;
   level_start[0] = 0; 
   cnt = 1;
   max_sends = 0;
   max_recvs = 0;
   for (i=addlvl; i < num_levels; i++)
   {
      A_tmp = A_array[i];
      A_tmp_diag = hypre_ParCSRMatrixDiag(A_tmp);
      A_tmp_offd = hypre_ParCSRMatrixOffd(A_tmp);
      A_tmp_diag_i = hypre_CSRMatrixI(A_tmp_diag);
      A_tmp_offd_i = hypre_CSRMatrixI(A_tmp_offd);
      num_rows_tmp = hypre_CSRMatrixNumRows(A_tmp_diag);
      num_cols_offd = hypre_CSRMatrixNumCols(A_tmp_offd);
      num_rows_L += num_rows_tmp;
      level_start[cnt] = level_start[cnt-1] + num_rows_tmp;
      cnt++;
      num_cols_offd_L += num_cols_offd;
      num_nonzeros_diag += A_tmp_diag_i[num_rows_tmp];
      num_nonzeros_offd += A_tmp_offd_i[num_rows_tmp];
      comm_pkg = hypre_ParCSRMatrixCommPkg(A_tmp);
      if (comm_pkg)
      {
         num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
         max_sends += num_sends;
         if (num_sends) 
	    send_data_L += hypre_ParCSRCommPkgSendMapStart(comm_pkg,num_sends);
         max_recvs += hypre_ParCSRCommPkgNumRecvs(comm_pkg);
      }
   }
   if (max_sends >= num_procs ||max_recvs >= num_procs)
   {
         max_sends = num_procs;
         max_recvs = num_procs;
   }
   if (max_sends) all_send_procs = hypre_CTAlloc(HYPRE_Int, max_sends);
   if (max_recvs) all_recv_procs = hypre_CTAlloc(HYPRE_Int, max_recvs);

   cnt_send = 0;
   cnt_recv = 0;
   if (max_sends || max_recvs)
   {
      if (max_sends < num_procs && max_recvs < num_procs)
      {
         for (i=addlvl; i < num_levels; i++)
         {
            A_tmp = A_array[i];
            comm_pkg = hypre_ParCSRMatrixCommPkg(A_tmp);
            if (comm_pkg)
            {
               num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
               num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg);
               send_procs = hypre_ParCSRCommPkgSendProcs(comm_pkg);
               recv_procs = hypre_ParCSRCommPkgRecvProcs(comm_pkg);
               for (j = 0; j < num_sends; j++)
	          all_send_procs[cnt_send++] = send_procs[j];
               for (j = 0; j < num_recvs; j++)
	          all_recv_procs[cnt_recv++] = recv_procs[j];
            }
         }
         if (max_sends)
         {
            qsort0(all_send_procs, 0, max_sends-1);
            num_sends_L = 1;
            this_proc = all_send_procs[0];
            for (i=1; i < max_sends; i++)
            {
               if (all_send_procs[i] > this_proc)
               {
                  this_proc = all_send_procs[i];
                  all_send_procs[num_sends_L++] = this_proc;
               }
            }
            L_send_procs = hypre_CTAlloc(HYPRE_Int, num_sends_L);
            for (j=0; j < num_sends_L; j++)
	       L_send_procs[j] = all_send_procs[j];
	    hypre_TFree(all_send_procs);
         }
         if (max_recvs)
         {
            qsort0(all_recv_procs, 0, max_recvs-1);
            num_recvs_L = 1;
            this_proc = all_recv_procs[0];
            for (i=1; i < max_recvs; i++)
            {
               if (all_recv_procs[i] > this_proc)
               {
                  this_proc = all_recv_procs[i];
                  all_recv_procs[num_recvs_L++] = this_proc;
               }
            }
            L_recv_procs = hypre_CTAlloc(HYPRE_Int, num_recvs_L);
            for (j=0; j < num_recvs_L; j++)
	       L_recv_procs[j] = all_recv_procs[j];
	    hypre_TFree(all_recv_procs);
         }

         L_recv_ptr = hypre_CTAlloc(HYPRE_Int, num_recvs_L+1);
         L_send_ptr = hypre_CTAlloc(HYPRE_Int, num_sends_L+1);

         for (i=addlvl; i < num_levels; i++)
         {
            A_tmp = A_array[i];
            comm_pkg = hypre_ParCSRMatrixCommPkg(A_tmp);
            if (comm_pkg)
            {
               num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
               num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg);
               send_procs = hypre_ParCSRCommPkgSendProcs(comm_pkg);
               recv_procs = hypre_ParCSRCommPkgRecvProcs(comm_pkg);
               send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg);
               recv_vec_starts = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg);
            }
            else
            {
               num_sends = 0;
               num_recvs = 0;
            }
            for (k = 0; k < num_sends; k++)
            {
               this_proc = hypre_BinarySearch(L_send_procs,send_procs[k],num_sends_L);
               L_send_ptr[this_proc+1] += send_map_starts[k+1]-send_map_starts[k];
            }
            for (k = 0; k < num_recvs; k++)
            {
               this_proc = hypre_BinarySearch(L_recv_procs,recv_procs[k],num_recvs_L);
               L_recv_ptr[this_proc+1] += recv_vec_starts[k+1]-recv_vec_starts[k];
            }
         }

         L_recv_ptr[0] = 0;
         for (i=1; i < num_recvs_L; i++)
            L_recv_ptr[i+1] += L_recv_ptr[i];

         L_send_ptr[0] = 0;
         for (i=1; i < num_sends_L; i++)
            L_send_ptr[i+1] += L_send_ptr[i];
      }
      else
      {
         num_recvs_L = 0;
         num_sends_L = 0;
         for (i=addlvl; i < num_levels; i++)
         {
            A_tmp = A_array[i];
            comm_pkg = hypre_ParCSRMatrixCommPkg(A_tmp);
            if (comm_pkg)
            {
               num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
               num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg);
               send_procs = hypre_ParCSRCommPkgSendProcs(comm_pkg);
               recv_procs = hypre_ParCSRCommPkgRecvProcs(comm_pkg);
               send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg);
               recv_vec_starts = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg);
               for (j = 0; j < num_sends; j++)
               {
                  this_proc = send_procs[j];
	          if (all_send_procs[this_proc] == 0)
		      num_sends_L++;
                  all_send_procs[this_proc] += send_map_starts[j+1]-send_map_starts[j];
               }
               for (j = 0; j < num_recvs; j++)
               {
                  this_proc = recv_procs[j];
	          if (all_recv_procs[this_proc] == 0)
		      num_recvs_L++;
                  all_recv_procs[this_proc] += recv_vec_starts[j+1]-recv_vec_starts[j];
               }
            }
         }
         if (max_sends)
         {
            L_send_procs = hypre_CTAlloc(HYPRE_Int, num_sends_L);
            L_send_ptr = hypre_CTAlloc(HYPRE_Int, num_sends_L+1);
            num_sends_L = 0;
            for (j=0; j < num_procs; j++)
            {
	       this_proc = all_send_procs[j];
	       if (this_proc)
	       {
	           L_send_procs[num_sends_L++] = j;
	           L_send_ptr[num_sends_L] = this_proc + L_send_ptr[num_sends_L-1];
	       }
            }
         }
         if (max_recvs)
         {
            L_recv_procs = hypre_CTAlloc(HYPRE_Int, num_recvs_L);
            L_recv_ptr = hypre_CTAlloc(HYPRE_Int, num_recvs_L+1);
            num_recvs_L = 0;
            for (j=0; j < num_procs; j++)
            {
	       this_proc = all_recv_procs[j];
	       if (this_proc)
	       {
	           L_recv_procs[num_recvs_L++] = j;
	           L_recv_ptr[num_recvs_L] = this_proc + L_recv_ptr[num_recvs_L-1];
	       }
            }
         }
      } 
   }
   if (max_sends) hypre_TFree(all_send_procs);
   if (max_recvs) hypre_TFree(all_recv_procs);

   L_diag = hypre_CSRMatrixCreate(num_rows_L, num_rows_L, num_nonzeros_diag);
   L_offd = hypre_CSRMatrixCreate(num_rows_L, num_cols_offd_L, num_nonzeros_offd);
   hypre_CSRMatrixInitialize(L_diag);
   hypre_CSRMatrixInitialize(L_offd);
   if (num_nonzeros_diag)
   {
      L_diag_data = hypre_CSRMatrixData(L_diag);
      L_diag_j = hypre_CSRMatrixJ(L_diag);
   }
   L_diag_i = hypre_CSRMatrixI(L_diag);
   if (num_nonzeros_offd)
   {
      L_offd_data = hypre_CSRMatrixData(L_offd);
      L_offd_j = hypre_CSRMatrixJ(L_offd);
   }
   L_offd_i = hypre_CSRMatrixI(L_offd);

   if (num_rows_L) D_data = hypre_CTAlloc(HYPRE_Real,num_rows_L);
   if (send_data_L)
   {
      L_send_map_elmts = hypre_CTAlloc(HYPRE_Int, send_data_L);
      buf_data = hypre_CTAlloc(HYPRE_Real,send_data_L);
   }
   if (num_cols_offd_L)
   {
      D_data_offd = hypre_CTAlloc(HYPRE_Real,num_cols_offd_L);
      /*L_col_map_offd = hypre_CTAlloc(HYPRE_Int, num_cols_offd_L);*/
      remap = hypre_CTAlloc(HYPRE_Int, num_cols_offd_L);
   }

   Rtilde = hypre_CTAlloc(hypre_ParVector, 1);
   Rtilde_local = hypre_SeqVectorCreate(num_rows_L);   
   hypre_SeqVectorInitialize(Rtilde_local);
   hypre_ParVectorLocalVector(Rtilde) = Rtilde_local;   
   hypre_ParVectorOwnsData(Rtilde) = 1;

   Xtilde = hypre_CTAlloc(hypre_ParVector, 1);
   Xtilde_local = hypre_SeqVectorCreate(num_rows_L);   
   hypre_SeqVectorInitialize(Xtilde_local);
   hypre_ParVectorLocalVector(Xtilde) = Xtilde_local;   
   hypre_ParVectorOwnsData(Xtilde) = 1;
      
   x_data = hypre_VectorData(hypre_ParVectorLocalVector(Xtilde));
   r_data = hypre_VectorData(hypre_ParVectorLocalVector(Rtilde));

   cnt = 0;
   cnt_level = 0;
   cnt_diag = 0; 
   cnt_offd = 0; 
   cnt_row = 1; 
   L_diag_i[0] = 0;
   L_offd_i[0] = 0;
   for (level=addlvl; level < num_levels; level++)
   {
      row_start = level_start[cnt_level];
      if (level != 0)
      {
         tmp_data = hypre_VectorData(hypre_ParVectorLocalVector(F_array[level]));
         if (tmp_data) hypre_TFree(tmp_data);
         hypre_VectorData(hypre_ParVectorLocalVector(F_array[level])) = &r_data[row_start];
         hypre_VectorOwnsData(hypre_ParVectorLocalVector(F_array[level])) = 0;
         tmp_data = hypre_VectorData(hypre_ParVectorLocalVector(U_array[level]));
         if (tmp_data) hypre_TFree(tmp_data);
         hypre_VectorData(hypre_ParVectorLocalVector(U_array[level])) = &x_data[row_start];
         hypre_VectorOwnsData(hypre_ParVectorLocalVector(U_array[level])) = 0;
      }
      cnt_level++;

      start_diag = L_diag_i[cnt_row-1];
      start_offd = L_offd_i[cnt_row-1];
      A_tmp = A_array[level];
      A_tmp_diag = hypre_ParCSRMatrixDiag(A_tmp);
      A_tmp_offd = hypre_ParCSRMatrixOffd(A_tmp);
      comm_pkg = hypre_ParCSRMatrixCommPkg(A_tmp);
      A_tmp_diag_i = hypre_CSRMatrixI(A_tmp_diag);
      A_tmp_offd_i = hypre_CSRMatrixI(A_tmp_offd);
      A_tmp_diag_j = hypre_CSRMatrixJ(A_tmp_diag);
      A_tmp_offd_j = hypre_CSRMatrixJ(A_tmp_offd);
      A_tmp_diag_data = hypre_CSRMatrixData(A_tmp_diag);
      A_tmp_offd_data = hypre_CSRMatrixData(A_tmp_offd);
      num_rows_tmp = hypre_CSRMatrixNumRows(A_tmp_diag);
      if (comm_pkg)
      {
         num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
         num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg);
         send_procs = hypre_ParCSRCommPkgSendProcs(comm_pkg);
         recv_procs = hypre_ParCSRCommPkgRecvProcs(comm_pkg);
         send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg);
         send_map_elmts = hypre_ParCSRCommPkgSendMapElmts(comm_pkg);
         recv_vec_starts = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg);
      }
      else
      {
         num_sends = 0;
         num_recvs = 0;
      }
   
      /* Compute new combined communication package */
      for (i=0; i < num_sends; i++)
      {
         this_proc = hypre_BinarySearch(L_send_procs,send_procs[i],num_sends_L);
         indx = L_send_ptr[this_proc];
         for (j=send_map_starts[i]; j < send_map_starts[i+1]; j++)
         {
	    L_send_map_elmts[indx++] = row_start + send_map_elmts[j];
         }
         L_send_ptr[this_proc] = indx;
      }
            
      cnt_map = 0;
      for (i = 0; i < num_recvs; i++)
      {
         this_proc = hypre_BinarySearch(L_recv_procs,recv_procs[i],num_recvs_L);
         indx = L_recv_ptr[this_proc];
         for (j=recv_vec_starts[i]; j < recv_vec_starts[i+1]; j++)
         {
	    remap[cnt_map++] = indx++;
         }
         L_recv_ptr[this_proc] = indx;
      }
   
      /* Compute Lambda */ 
      if (relax_type == 0)
      {
        HYPRE_Real rlx_wt = relax_weight[level];
#ifdef HYPRE_USING_OPENMP
#pragma omp for private(i) HYPRE_SMP_SCHEDULE
#endif
         for (i=0; i < num_rows_tmp; i++)
        {
           D_data[i] = rlx_wt/A_tmp_diag_data[A_tmp_diag_i[i]];
           L_diag_i[cnt_row+i] = start_diag + A_tmp_diag_i[i+1];
           L_offd_i[cnt_row+i] = start_offd + A_tmp_offd_i[i+1];
        }
      }
      else
      {
        l1_norms = l1_norms_ptr[level];
#ifdef HYPRE_USING_OPENMP
#pragma omp for private(i) HYPRE_SMP_SCHEDULE
#endif
        for (i=0; i < num_rows_tmp; i++)
        {
           D_data[i] = 1.0/l1_norms[i];
           L_diag_i[cnt_row+i] = start_diag + A_tmp_diag_i[i+1];
           L_offd_i[cnt_row+i] = start_offd + A_tmp_offd_i[i+1];
        }
      }
 
      if (num_procs > 1)
      {
         index = 0;
         for (i=0; i < num_sends; i++)
         {
            start = send_map_starts[i];
            for (j=start; j < send_map_starts[i+1]; j++)
              buf_data[index++] = D_data[send_map_elmts[j]];
         }

         comm_handle = hypre_ParCSRCommHandleCreate(1, comm_pkg,
                        buf_data, D_data_offd);
         hypre_ParCSRCommHandleDestroy(comm_handle);
      }

      for (i = 0; i < num_rows_tmp; i++)
      {
         j_indx = A_tmp_diag_i[i];
         L_diag_data[cnt_diag] = (2.0 - A_tmp_diag_data[j_indx]*D_data[i])*D_data[i];
         L_diag_j[cnt_diag++] = i+row_start;
         for (j=A_tmp_diag_i[i]+1; j < A_tmp_diag_i[i+1]; j++)
         {
             j_indx = A_tmp_diag_j[j];
             L_diag_data[cnt_diag] = (- A_tmp_diag_data[j]*D_data[j_indx])*D_data[i];
             L_diag_j[cnt_diag++] = j_indx+row_start;
         }
         for (j=A_tmp_offd_i[i]; j < A_tmp_offd_i[i+1]; j++)
         {
             j_indx = A_tmp_offd_j[j];
             L_offd_data[cnt_offd] = (- A_tmp_offd_data[j]*D_data_offd[j_indx])*D_data[i];
             L_offd_j[cnt_offd++] = remap[j_indx];
         }
      }
      cnt_row += num_rows_tmp;
   }

   if (L_send_ptr)
   {
      for (i=num_sends_L-1; i > 0; i--)
         L_send_ptr[i] = L_send_ptr[i-1];
      L_send_ptr[0] = 0;
   }
   else
      L_send_ptr = hypre_CTAlloc(HYPRE_Int,1);

   if (L_recv_ptr)
   {
      for (i=num_recvs_L-1; i > 0; i--)
         L_recv_ptr[i] = L_recv_ptr[i-1];
      L_recv_ptr[0] = 0;
   }
   else
      L_recv_ptr = hypre_CTAlloc(HYPRE_Int,1);

   L_comm_pkg = hypre_CTAlloc(hypre_ParCSRCommPkg,1);

   hypre_ParCSRCommPkgNumRecvs(L_comm_pkg) = num_recvs_L;
   hypre_ParCSRCommPkgNumSends(L_comm_pkg) = num_sends_L;
   hypre_ParCSRCommPkgRecvProcs(L_comm_pkg) = L_recv_procs;
   hypre_ParCSRCommPkgSendProcs(L_comm_pkg) = L_send_procs;
   hypre_ParCSRCommPkgRecvVecStarts(L_comm_pkg) = L_recv_ptr;
   hypre_ParCSRCommPkgSendMapStarts(L_comm_pkg) = L_send_ptr;
   hypre_ParCSRCommPkgSendMapElmts(L_comm_pkg) = L_send_map_elmts;
   hypre_ParCSRCommPkgComm(L_comm_pkg) = comm;


   Lambda = hypre_CTAlloc(hypre_ParCSRMatrix, 1);
   hypre_ParCSRMatrixDiag(Lambda) = L_diag;
   hypre_ParCSRMatrixOffd(Lambda) = L_offd;
   hypre_ParCSRMatrixCommPkg(Lambda) = L_comm_pkg;
   hypre_ParCSRMatrixComm(Lambda) = comm;
   hypre_ParCSRMatrixOwnsData(Lambda) = 1;

   hypre_ParAMGDataLambda(amg_data) = Lambda;
   hypre_ParAMGDataRtilde(amg_data) = Rtilde;
   hypre_ParAMGDataXtilde(amg_data) = Xtilde;

   hypre_TFree(D_data_offd);
   hypre_TFree(D_data);
   if (num_procs > 1) hypre_TFree(buf_data);
   hypre_TFree(remap);
   hypre_TFree(buf_data);
   hypre_TFree(level_start);

   return Solve_err_flag;
}

Пример #15

Показать файл

Файл: gmres.c Проект: LLNL/COGENT

HYPRE_Int
hypre_GMRESSolve(void  *gmres_vdata,
                 void  *A,
                 void  *b,
		 void  *x)
{
   hypre_GMRESData  *gmres_data   = gmres_vdata;
   hypre_GMRESFunctions *gmres_functions = gmres_data->functions;
   HYPRE_Int 		     k_dim        = (gmres_data -> k_dim);
   HYPRE_Int               min_iter     = (gmres_data -> min_iter);
   HYPRE_Int 		     max_iter     = (gmres_data -> max_iter);
   HYPRE_Int               rel_change   = (gmres_data -> rel_change);
   HYPRE_Int         skip_real_r_check  = (gmres_data -> skip_real_r_check);
   double 	     r_tol        = (gmres_data -> tol);
   double 	     cf_tol       = (gmres_data -> cf_tol);
   double            a_tol        = (gmres_data -> a_tol);
   void             *matvec_data  = (gmres_data -> matvec_data);

   void             *r            = (gmres_data -> r);
   void             *w            = (gmres_data -> w);
   /* note: w_2 is only allocated if rel_change = 1 */
   void             *w_2          = (gmres_data -> w_2); 

   void            **p            = (gmres_data -> p);


   HYPRE_Int 	           (*precond)()   = (gmres_functions -> precond);
   HYPRE_Int 	            *precond_data = (gmres_data -> precond_data);

   HYPRE_Int             print_level    = (gmres_data -> print_level);
   HYPRE_Int             logging        = (gmres_data -> logging);

   double         *norms          = (gmres_data -> norms);
/* not used yet   char           *log_file_name  = (gmres_data -> log_file_name);*/
/*   FILE           *fp; */
   
   HYPRE_Int        break_value = 0;
   HYPRE_Int	      i, j, k;
   double     *rs, **hh, *c, *s, *rs_2; 
   HYPRE_Int        iter; 
   HYPRE_Int        my_id, num_procs;
   double     epsilon, gamma, t, r_norm, b_norm, den_norm, x_norm;
   double     w_norm;
   
   double     epsmac = 1.e-16; 
   double     ieee_check = 0.;

   double     guard_zero_residual; 
   double     cf_ave_0 = 0.0;
   double     cf_ave_1 = 0.0;
   double     weight;
   double     r_norm_0;
   double     relative_error = 1.0;

   HYPRE_Int        rel_change_passed = 0, num_rel_change_check = 0;

   double     real_r_norm_old, real_r_norm_new;

   (gmres_data -> converged) = 0;
   /*-----------------------------------------------------------------------
    * With relative change convergence test on, it is possible to attempt
    * another iteration with a zero residual. This causes the parameter
    * alpha to go NaN. The guard_zero_residual parameter is to circumvent
    * this. Perhaps it should be set to something non-zero (but small).
    *-----------------------------------------------------------------------*/
   guard_zero_residual = 0.0;

   (*(gmres_functions->CommInfo))(A,&my_id,&num_procs);
   if ( logging>0 || print_level>0 )
   {
      norms          = (gmres_data -> norms);
   }

   /* initialize work arrays */
   rs = hypre_CTAllocF(double,k_dim+1,gmres_functions); 
   c = hypre_CTAllocF(double,k_dim,gmres_functions); 
   s = hypre_CTAllocF(double,k_dim,gmres_functions); 
   if (rel_change) rs_2 = hypre_CTAllocF(double,k_dim+1,gmres_functions); 
   


   hh = hypre_CTAllocF(double*,k_dim+1,gmres_functions); 
   for (i=0; i < k_dim+1; i++)
   {	
   	hh[i] = hypre_CTAllocF(double,k_dim,gmres_functions); 
   }

   (*(gmres_functions->CopyVector))(b,p[0]);

   /* compute initial residual */
   (*(gmres_functions->Matvec))(matvec_data,-1.0, A, x, 1.0, p[0]);

   b_norm = sqrt((*(gmres_functions->InnerProd))(b,b));
   real_r_norm_old = b_norm;

   /* Since it is does not diminish performance, attempt to return an error flag
      and notify users when they supply bad input. */
   if (b_norm != 0.) ieee_check = b_norm/b_norm; /* INF -> NaN conversion */
   if (ieee_check != ieee_check)
   {
      /* ...INFs or NaNs in input can make ieee_check a NaN.  This test
         for ieee_check self-equality works on all IEEE-compliant compilers/
         machines, c.f. page 8 of "Lecture Notes on the Status of IEEE 754"
         by W. Kahan, May 31, 1996.  Currently (July 2002) this paper may be
         found at http://HTTP.CS.Berkeley.EDU/~wkahan/ieee754status/IEEE754.PDF */
      if (logging > 0 || print_level > 0)
      {
        hypre_printf("\n\nERROR detected by Hypre ... BEGIN\n");
        hypre_printf("ERROR -- hypre_GMRESSolve: INFs and/or NaNs detected in input.\n");
        hypre_printf("User probably placed non-numerics in supplied b.\n");
        hypre_printf("Returning error flag += 101.  Program not terminated.\n");
        hypre_printf("ERROR detected by Hypre ... END\n\n\n");
      }
      hypre_error(HYPRE_ERROR_GENERIC);
      return hypre_error_flag;
   }

   r_norm = sqrt((*(gmres_functions->InnerProd))(p[0],p[0]));
   r_norm_0 = r_norm;

   /* Since it is does not diminish performance, attempt to return an error flag
      and notify users when they supply bad input. */
   if (r_norm != 0.) ieee_check = r_norm/r_norm; /* INF -> NaN conversion */
   if (ieee_check != ieee_check)
   {
      /* ...INFs or NaNs in input can make ieee_check a NaN.  This test
         for ieee_check self-equality works on all IEEE-compliant compilers/
         machines, c.f. page 8 of "Lecture Notes on the Status of IEEE 754"
         by W. Kahan, May 31, 1996.  Currently (July 2002) this paper may be
         found at http://HTTP.CS.Berkeley.EDU/~wkahan/ieee754status/IEEE754.PDF */
      if (logging > 0 || print_level > 0)
      {
        hypre_printf("\n\nERROR detected by Hypre ... BEGIN\n");
        hypre_printf("ERROR -- hypre_GMRESSolve: INFs and/or NaNs detected in input.\n");
        hypre_printf("User probably placed non-numerics in supplied A or x_0.\n");
        hypre_printf("Returning error flag += 101.  Program not terminated.\n");
        hypre_printf("ERROR detected by Hypre ... END\n\n\n");
      }
      hypre_error(HYPRE_ERROR_GENERIC);
      return hypre_error_flag;
   }

   if ( logging>0 || print_level > 0)
   {
      norms[0] = r_norm;
      if ( print_level>1 && my_id == 0 )
      {
  	 hypre_printf("L2 norm of b: %e\n", b_norm);
         if (b_norm == 0.0)
            hypre_printf("Rel_resid_norm actually contains the residual norm\n");
         hypre_printf("Initial L2 norm of residual: %e\n", r_norm);
      
      }
   }
   iter = 0;

   if (b_norm > 0.0)
   {
     /* convergence criterion |r_i|/|b| <= accuracy if |b| > 0 */
     den_norm= b_norm;
   }
   else
   {
     /* convergence criterion |r_i|/|r0| <= accuracy if |b| = 0 */
     den_norm= r_norm;
   };


   /* convergence criteria: |r_i| <= max( a_tol, r_tol * den_norm)
      den_norm = |r_0| or |b|
      note: default for a_tol is 0.0, so relative residual criteria is used unless
            user specifies a_tol, or sets r_tol = 0.0, which means absolute
            tol only is checked  */
      
   epsilon = hypre_max(a_tol,r_tol*den_norm);
   
   /* so now our stop criteria is |r_i| <= epsilon */

   if ( print_level>1 && my_id == 0 )
   {
      if (b_norm > 0.0)
         {hypre_printf("=============================================\n\n");
          hypre_printf("Iters     resid.norm     conv.rate  rel.res.norm\n");
          hypre_printf("-----    ------------    ---------- ------------\n");
      
          }

      else
         {hypre_printf("=============================================\n\n");
          hypre_printf("Iters     resid.norm     conv.rate\n");
          hypre_printf("-----    ------------    ----------\n");
      
          };
   }


   /* once the rel. change check has passed, we do not want to check it again */
   rel_change_passed = 0;


   /* outer iteration cycle */
   while (iter < max_iter)
   {
   /* initialize first term of hessenberg system */

	rs[0] = r_norm;
        if (r_norm == 0.0)
        {
           hypre_TFreeF(c,gmres_functions); 
           hypre_TFreeF(s,gmres_functions); 
           hypre_TFreeF(rs,gmres_functions);
           if (rel_change)  hypre_TFreeF(rs_2,gmres_functions);
           for (i=0; i < k_dim+1; i++) hypre_TFreeF(hh[i],gmres_functions);
           hypre_TFreeF(hh,gmres_functions); 
	   return hypre_error_flag;
           
	}

        /* see if we are already converged and 
           should print the final norm and exit */
	if (r_norm  <= epsilon && iter >= min_iter) 
        {
           if (!rel_change) /* shouldn't exit after no iterations if
                             * relative change is on*/
           {
              (*(gmres_functions->CopyVector))(b,r);
              (*(gmres_functions->Matvec))(matvec_data,-1.0,A,x,1.0,r);
              r_norm = sqrt((*(gmres_functions->InnerProd))(r,r));
              if (r_norm  <= epsilon)
              {
                 if ( print_level>1 && my_id == 0)
                 {
                    hypre_printf("\n\n");
                    hypre_printf("Final L2 norm of residual: %e\n\n", r_norm);
                 }
                 break;
              }
              else
                 if ( print_level>0 && my_id == 0)
                    hypre_printf("false convergence 1\n");
           }
	}

      
        
      	t = 1.0 / r_norm;
	(*(gmres_functions->ScaleVector))(t,p[0]);
	i = 0;

        /***RESTART CYCLE (right-preconditioning) ***/
        while (i < k_dim && iter < max_iter)
	{
           i++;
           iter++;
           (*(gmres_functions->ClearVector))(r);
           precond(precond_data, A, p[i-1], r);
           (*(gmres_functions->Matvec))(matvec_data, 1.0, A, r, 0.0, p[i]);
           /* modified Gram_Schmidt */
           for (j=0; j < i; j++)
           {
              hh[j][i-1] = (*(gmres_functions->InnerProd))(p[j],p[i]);
              (*(gmres_functions->Axpy))(-hh[j][i-1],p[j],p[i]);
           }
           t = sqrt((*(gmres_functions->InnerProd))(p[i],p[i]));
           hh[i][i-1] = t;	
           if (t != 0.0)
           {
              t = 1.0/t;
              (*(gmres_functions->ScaleVector))(t,p[i]);
           }
           /* done with modified Gram_schmidt and Arnoldi step.
              update factorization of hh */
           for (j = 1; j < i; j++)
           {
              t = hh[j-1][i-1];
              hh[j-1][i-1] = s[j-1]*hh[j][i-1] + c[j-1]*t;
              hh[j][i-1] = -s[j-1]*t + c[j-1]*hh[j][i-1];
           }
           t= hh[i][i-1]*hh[i][i-1];
           t+= hh[i-1][i-1]*hh[i-1][i-1];
           gamma = sqrt(t);
           if (gamma == 0.0) gamma = epsmac;
           c[i-1] = hh[i-1][i-1]/gamma;
           s[i-1] = hh[i][i-1]/gamma;
           rs[i] = -hh[i][i-1]*rs[i-1];
           rs[i]/=  gamma;
           rs[i-1] = c[i-1]*rs[i-1];
           /* determine residual norm */
           hh[i-1][i-1] = s[i-1]*hh[i][i-1] + c[i-1]*hh[i-1][i-1];
           r_norm = fabs(rs[i]);

           /* print ? */
           if ( print_level>0 )
           {
              norms[iter] = r_norm;
              if ( print_level>1 && my_id == 0 )
              {
                 if (b_norm > 0.0)
                    hypre_printf("% 5d    %e    %f   %e\n", iter, 
                           norms[iter],norms[iter]/norms[iter-1],
                           norms[iter]/b_norm);
                 else
                    hypre_printf("% 5d    %e    %f\n", iter, norms[iter],
                           norms[iter]/norms[iter-1]);
              }
           }
           /*convergence factor tolerance */
           if (cf_tol > 0.0)
           {
              cf_ave_0 = cf_ave_1;
              cf_ave_1 = pow( r_norm / r_norm_0, 1.0/(2.0*iter));
              
              weight   = fabs(cf_ave_1 - cf_ave_0);
              weight   = weight / hypre_max(cf_ave_1, cf_ave_0);
              weight   = 1.0 - weight;
#if 0
              hypre_printf("I = %d: cf_new = %e, cf_old = %e, weight = %e\n",
                     i, cf_ave_1, cf_ave_0, weight );
#endif
              if (weight * cf_ave_1 > cf_tol) 
              {
                 break_value = 1;
                 break;
              }
           }
           /* should we exit the restart cycle? (conv. check) */
           if (r_norm <= epsilon && iter >= min_iter)
           {
              if (rel_change && !rel_change_passed)
              {
                 
                 /* To decide whether to break here: to actually
                  determine the relative change requires the approx
                  solution (so a triangular solve) and a
                  precond. solve - so if we have to do this many
                  times, it will be expensive...(unlike cg where is
                  is relatively straightforward)

                  previously, the intent (there was a bug), was to
                  exit the restart cycle based on the residual norm
                  and check the relative change outside the cycle.
                  Here we will check the relative here as we don't
                  want to exit the restart cycle prematurely */
                 
                 for (k=0; k<i; k++) /* extra copy of rs so we don't need
                                        to change the later solve */
                    rs_2[k] = rs[k];

                 /* solve tri. system*/
                 rs_2[i-1] = rs_2[i-1]/hh[i-1][i-1];
                 for (k = i-2; k >= 0; k--)
                 {
                    t = 0.0;
                    for (j = k+1; j < i; j++)
                    {
                       t -= hh[k][j]*rs_2[j];
                    }
                    t+= rs_2[k];
                    rs_2[k] = t/hh[k][k];
                 }
                 
                 (*(gmres_functions->CopyVector))(p[i-1],w);
                 (*(gmres_functions->ScaleVector))(rs_2[i-1],w);
                 for (j = i-2; j >=0; j--)
                    (*(gmres_functions->Axpy))(rs_2[j], p[j], w);
                    
                 (*(gmres_functions->ClearVector))(r);
                 /* find correction (in r) */
                 precond(precond_data, A, w, r);
                 /* copy current solution (x) to w (don't want to over-write x)*/
                 (*(gmres_functions->CopyVector))(x,w);

                 /* add the correction */
                 (*(gmres_functions->Axpy))(1.0,r,w);

                 /* now w is the approx solution  - get the norm*/
                 x_norm = sqrt( (*(gmres_functions->InnerProd))(w,w) );

                 if ( !(x_norm <= guard_zero_residual ))
                    /* don't divide by zero */
                 {  /* now get  x_i - x_i-1 */
                    
                    if (num_rel_change_check)
                    {
                       /* have already checked once so we can avoid another precond.
                          solve */
                       (*(gmres_functions->CopyVector))(w, r);
                       (*(gmres_functions->Axpy))(-1.0, w_2, r);
                       /* now r contains x_i - x_i-1*/

                       /* save current soln w in w_2 for next time */
                       (*(gmres_functions->CopyVector))(w, w_2);
                    }
                    else
                    {
                       /* first time to check rel change*/

                       /* first save current soln w in w_2 for next time */
                       (*(gmres_functions->CopyVector))(w, w_2);

                       /* for relative change take x_(i-1) to be 
                          x + M^{-1}[sum{j=0..i-2} rs_j p_j ]. 
                          Now
                          x_i - x_{i-1}= {x + M^{-1}[sum{j=0..i-1} rs_j p_j ]}
                          - {x + M^{-1}[sum{j=0..i-2} rs_j p_j ]}
                          = M^{-1} rs_{i-1}{p_{i-1}} */
                       
                       (*(gmres_functions->ClearVector))(w);
                       (*(gmres_functions->Axpy))(rs_2[i-1], p[i-1], w);
                       (*(gmres_functions->ClearVector))(r);
                       /* apply the preconditioner */
                       precond(precond_data, A, w, r);
                       /* now r contains x_i - x_i-1 */          
                    }
                    /* find the norm of x_i - x_i-1 */          
                    w_norm = sqrt( (*(gmres_functions->InnerProd))(r,r) );
                    relative_error = w_norm/x_norm;
                    if (relative_error <= r_tol)
                    {
                       rel_change_passed = 1;
                       break;
                    }
                 }
                 else
                 {
                    rel_change_passed = 1;
                    break;

                 }
                 num_rel_change_check++;
              }
           else /* no relative change */
              {
                 break;
              }
           }
           

	} /*** end of restart cycle ***/

	/* now compute solution, first solve upper triangular system */

	if (break_value) break;
	
	rs[i-1] = rs[i-1]/hh[i-1][i-1];
	for (k = i-2; k >= 0; k--)
	{
           t = 0.0;
           for (j = k+1; j < i; j++)
           {
              t -= hh[k][j]*rs[j];
           }
           t+= rs[k];
           rs[k] = t/hh[k][k];
	}

        (*(gmres_functions->CopyVector))(p[i-1],w);
        (*(gmres_functions->ScaleVector))(rs[i-1],w);
        for (j = i-2; j >=0; j--)
                (*(gmres_functions->Axpy))(rs[j], p[j], w);

	(*(gmres_functions->ClearVector))(r);
	/* find correction (in r) */
        precond(precond_data, A, w, r);

        /* update current solution x (in x) */
	(*(gmres_functions->Axpy))(1.0,r,x);
         

        /* check for convergence by evaluating the actual residual */
	if (r_norm  <= epsilon && iter >= min_iter)
        {
           if (skip_real_r_check)
           {
              (gmres_data -> converged) = 1;
              break;
           }

           /* calculate actual residual norm*/
           (*(gmres_functions->CopyVector))(b,r);
           (*(gmres_functions->Matvec))(matvec_data,-1.0,A,x,1.0,r);
           real_r_norm_new = r_norm = sqrt( (*(gmres_functions->InnerProd))(r,r) );

           if (r_norm <= epsilon)
           {
              if (rel_change && !rel_change_passed) /* calculate the relative change */
              {

                 /* calculate the norm of the solution */
                 x_norm = sqrt( (*(gmres_functions->InnerProd))(x,x) );
               
                 if ( !(x_norm <= guard_zero_residual ))
                    /* don't divide by zero */
                 {
                    
                    /* for relative change take x_(i-1) to be 
                       x + M^{-1}[sum{j=0..i-2} rs_j p_j ]. 
                       Now
                       x_i - x_{i-1}= {x + M^{-1}[sum{j=0..i-1} rs_j p_j ]}
                       - {x + M^{-1}[sum{j=0..i-2} rs_j p_j ]}
                       = M^{-1} rs_{i-1}{p_{i-1}} */
                    (*(gmres_functions->ClearVector))(w);
                    (*(gmres_functions->Axpy))(rs[i-1], p[i-1], w);
                    (*(gmres_functions->ClearVector))(r);
                    /* apply the preconditioner */
                    precond(precond_data, A, w, r);
                    /* find the norm of x_i - x_i-1 */          
                    w_norm = sqrt( (*(gmres_functions->InnerProd))(r,r) );
                    relative_error= w_norm/x_norm;
                    if ( relative_error < r_tol )
                    {
                       (gmres_data -> converged) = 1;
                       if ( print_level>1 && my_id == 0 )
                       {
                          hypre_printf("\n\n");
                          hypre_printf("Final L2 norm of residual: %e\n\n", r_norm);
                       }
                       break;
                    }
                 }
                 else
                 {
                    (gmres_data -> converged) = 1;
                    if ( print_level>1 && my_id == 0 )
                    {
                       hypre_printf("\n\n");
                       hypre_printf("Final L2 norm of residual: %e\n\n", r_norm);
                    }
                    break;
                 }

              }
              else /* don't need to check rel. change */
              {
                 if ( print_level>1 && my_id == 0 )
                 {
                    hypre_printf("\n\n");
                    hypre_printf("Final L2 norm of residual: %e\n\n", r_norm);
                 }
                 (gmres_data -> converged) = 1;
                 break;
              }
           }
           else /* conv. has not occurred, according to true residual */
           {
              /* exit if the real residual norm has not decreased */
              if (real_r_norm_new >= real_r_norm_old)
              {
                 if (print_level > 1 && my_id == 0)
                 {
                    hypre_printf("\n\n");
                    hypre_printf("Final L2 norm of residual: %e\n\n", r_norm);
                 }
                 (gmres_data -> converged) = 1;
                 break;
              }

              /* report discrepancy between real/GMRES residuals and restart */
              if ( print_level>0 && my_id == 0)
                 hypre_printf("false convergence 2, L2 norm of residual: %e\n", r_norm);
              (*(gmres_functions->CopyVector))(r,p[0]);
              i = 0;
              real_r_norm_old = real_r_norm_new;
           }
	} /* end of convergence check */

        /* compute residual vector and continue loop */
	for (j=i ; j > 0; j--)
	{
           rs[j-1] = -s[j-1]*rs[j];
           rs[j] = c[j-1]*rs[j];
	}
        
        if (i) (*(gmres_functions->Axpy))(rs[i]-1.0,p[i],p[i]);
        for (j=i-1 ; j > 0; j--)
           (*(gmres_functions->Axpy))(rs[j],p[j],p[i]);
        
        if (i)
        {
           (*(gmres_functions->Axpy))(rs[0]-1.0,p[0],p[0]);
           (*(gmres_functions->Axpy))(1.0,p[i],p[0]);
        }
   } /* END of iteration while loop */


   if ( print_level>1 && my_id == 0 )
          hypre_printf("\n\n"); 

   (gmres_data -> num_iterations) = iter;
   if (b_norm > 0.0)
      (gmres_data -> rel_residual_norm) = r_norm/b_norm;
   if (b_norm == 0.0)
      (gmres_data -> rel_residual_norm) = r_norm;

   if (iter >= max_iter && r_norm > epsilon) hypre_error(HYPRE_ERROR_CONV);
   

   hypre_TFreeF(c,gmres_functions); 
   hypre_TFreeF(s,gmres_functions); 
   hypre_TFreeF(rs,gmres_functions);
   if (rel_change)  hypre_TFreeF(rs_2,gmres_functions);

   for (i=0; i < k_dim+1; i++)
   {	
   	hypre_TFreeF(hh[i],gmres_functions);
   }
   hypre_TFreeF(hh,gmres_functions); 

   return hypre_error_flag;
}

Пример #16

Показать файл

Файл: par_stats.c Проект: Kyushick/sight

int
hypre_BoomerAMGSetupStats( void               *amg_vdata,
                        hypre_ParCSRMatrix *A         )
{
   MPI_Comm 	      comm = hypre_ParCSRMatrixComm(A);   

   hypre_ParAMGData *amg_data = (hypre_ParAMGData*)amg_vdata;

   /*hypre_SeqAMGData *seq_data = hypre_ParAMGDataSeqData(amg_data);*/

   /* Data Structure variables */

   hypre_ParCSRMatrix **A_array;
   hypre_ParCSRMatrix **P_array;

   hypre_CSRMatrix *A_diag;
   double          *A_diag_data;
   int             *A_diag_i;

   hypre_CSRMatrix *A_offd;   
   double          *A_offd_data;
   int             *A_offd_i;

   hypre_CSRMatrix *P_diag;
   double          *P_diag_data;
   int             *P_diag_i;

   hypre_CSRMatrix *P_offd;   
   double          *P_offd_data;
   int             *P_offd_i;


   int	    numrows;

   HYPRE_BigInt	    *row_starts;

 
   int      num_levels; 
   int      coarsen_type;
   int      interp_type;
   int      measure_type;
   double   global_nonzeros;

   double  *send_buff;
   double  *gather_buff;
 
   /* Local variables */

   int       level;
   int       j;
   HYPRE_BigInt fine_size;
 
   int       min_entries;
   int       max_entries;

   int       num_procs,my_id, num_threads;


   double    min_rowsum;
   double    max_rowsum;
   double    sparse;


   int       i;
   

   HYPRE_BigInt coarse_size;
   int       entries;

   double    avg_entries;
   double    rowsum;

   double    min_weight;
   double    max_weight;

   int       global_min_e;
   int       global_max_e;
   double    global_min_rsum;
   double    global_max_rsum;
   double    global_min_wt;
   double    global_max_wt;

   double  *num_coeffs;
   double  *num_variables;
   double   total_variables; 
   double   operat_cmplxty;
   double   grid_cmplxty;

   /* amg solve params */
   int      max_iter;
   int      cycle_type;    
   int     *num_grid_sweeps;  
   int     *grid_relax_type;   
   int      relax_order;
   int    **grid_relax_points; 
   double  *relax_weight;
   double  *omega;
   double   tol;


   int one = 1;
   int minus_one = -1;
   int zero = 0;
   int smooth_type;
   int smooth_num_levels;
   int agg_num_levels;
   /*int seq_cg = 0;*/
   
   /*if (seq_data)
      seq_cg = 1;*/


   MPI_Comm_size(comm, &num_procs);   
   MPI_Comm_rank(comm,&my_id);
   num_threads = hypre_NumThreads();

   if (my_id == 0)
      printf("\nNumber of MPI processes: %d , Number of OpenMP threads: %d\n", num_procs, num_threads);
   A_array = hypre_ParAMGDataAArray(amg_data);
   P_array = hypre_ParAMGDataPArray(amg_data);
   num_levels = hypre_ParAMGDataNumLevels(amg_data);
   coarsen_type = hypre_ParAMGDataCoarsenType(amg_data);
   interp_type = hypre_ParAMGDataInterpType(amg_data);
   measure_type = hypre_ParAMGDataMeasureType(amg_data);
   smooth_type = hypre_ParAMGDataSmoothType(amg_data);
   smooth_num_levels = hypre_ParAMGDataSmoothNumLevels(amg_data);
   agg_num_levels = hypre_ParAMGDataAggNumLevels(amg_data);


   /*----------------------------------------------------------
    * Get the amg_data data
    *----------------------------------------------------------*/

   num_levels = hypre_ParAMGDataNumLevels(amg_data);
   max_iter   = hypre_ParAMGDataMaxIter(amg_data);
   cycle_type = hypre_ParAMGDataCycleType(amg_data);    
   num_grid_sweeps = hypre_ParAMGDataNumGridSweeps(amg_data);  
   grid_relax_type = hypre_ParAMGDataGridRelaxType(amg_data);
   grid_relax_points = hypre_ParAMGDataGridRelaxPoints(amg_data);
   relax_weight = hypre_ParAMGDataRelaxWeight(amg_data); 
   relax_order = hypre_ParAMGDataRelaxOrder(amg_data); 
   omega = hypre_ParAMGDataOmega(amg_data); 
   tol = hypre_ParAMGDataTol(amg_data);

   /*block_mode = hypre_ParAMGDataBlockMode(amg_data);*/

   send_buff     = hypre_CTAlloc(double, 6);
#ifdef HYPRE_NO_GLOBAL_PARTITION
   gather_buff = hypre_CTAlloc(double,6);    
#else
   gather_buff = hypre_CTAlloc(double,6*num_procs);    
#endif

   if (my_id==0)
   {
      printf("\nBoomerAMG SETUP PARAMETERS:\n\n");
      printf(" Max levels = %d\n",hypre_ParAMGDataMaxLevels(amg_data));
      printf(" Num levels = %d\n\n",num_levels);
      printf(" Strength Threshold = %f\n", 
                         hypre_ParAMGDataStrongThreshold(amg_data));
      printf(" Interpolation Truncation Factor = %f\n", 
                         hypre_ParAMGDataTruncFactor(amg_data));
      printf(" Maximum Row Sum Threshold for Dependency Weakening = %f\n\n", 
                         hypre_ParAMGDataMaxRowSum(amg_data));

      if (coarsen_type == 0)
      {
	printf(" Coarsening Type = Cleary-Luby-Jones-Plassman\n");
      }
      else if (abs(coarsen_type) == 1) 
      {
	printf(" Coarsening Type = Ruge\n");
      }
      else if (abs(coarsen_type) == 2) 
      {
	printf(" Coarsening Type = Ruge2B\n");
      }
      else if (abs(coarsen_type) == 3) 
      {
	printf(" Coarsening Type = Ruge3\n");
      }
      else if (abs(coarsen_type) == 4) 
      {
	printf(" Coarsening Type = Ruge 3c \n");
      }
      else if (abs(coarsen_type) == 5) 
      {
	printf(" Coarsening Type = Ruge relax special points \n");
      }
      else if (abs(coarsen_type) == 6) 
      {
	printf(" Coarsening Type = Falgout-CLJP \n");
      }
      else if (abs(coarsen_type) == 8) 
      {
	printf(" Coarsening Type = PMIS \n");
      }
      else if (abs(coarsen_type) == 10) 
      {
	printf(" Coarsening Type = HMIS \n");
      }
      else if (abs(coarsen_type) == 11) 
      {
	printf(" Coarsening Type = Ruge 1st pass only \n");
      }
      else if (abs(coarsen_type) == 9) 
      {
	printf(" Coarsening Type = PMIS fixed random \n");
      }
      else if (abs(coarsen_type) == 7) 
      {
	printf(" Coarsening Type = CLJP, fixed random \n");
      }
      if (coarsen_type > 0) 
      {
	printf(" Hybrid Coarsening (switch to CLJP when coarsening slows)\n");
      }
      

      if (coarsen_type)
      	printf(" measures are determined %s\n\n", 
                  (measure_type ? "globally" : "locally"));

      if (agg_num_levels)
	printf(" no. of levels of aggressive coarsening: %d\n\n", agg_num_levels);

#ifdef HYPRE_NO_GLOBAL_PARTITION
      printf( "\n No global partition option chosen.\n\n");
#endif

      if (interp_type == 0)
      {
	printf(" Interpolation = modified classical interpolation\n");
      }
      else if (interp_type == 1) 
      {
	printf(" Interpolation = LS interpolation \n");
      }
      else if (interp_type == 2) 
      {
	printf(" Interpolation = modified classical interpolation for hyperbolic PDEs\n");
      }
      else if (interp_type == 3) 
      {
	printf(" Interpolation = direct interpolation with separation of weights\n");
      }
      else if (interp_type == 4) 
      {
	printf(" Interpolation = multipass interpolation\n");
      }
      else if (interp_type == 5) 
      {
	printf(" Interpolation = multipass interpolation with separation of weights\n");
      }
      else if (interp_type == 6) 
      {
	printf(" Interpolation = extended+i interpolation\n");
      }
      else if (interp_type == 7) 
      {
	printf(" Interpolation = extended+i interpolation (only when needed)\n");
      }
      else if (interp_type == 8) 
      {
	printf(" Interpolation = standard interpolation\n");
      }
      else if (interp_type == 9) 
      {
	printf(" Interpolation = standard interpolation with separation of weights\n");
      }
      else if (interp_type == 12) 
      {
	printf(" FF interpolation \n");
      }
      else if (interp_type == 13) 
      {
	printf(" FF1 interpolation \n");
      }

      {
         printf( "\nOperator Matrix Information:\n\n");
      }
#if HYPRE_LONG_LONG
      printf("                  nonzero         entries p");
      printf("er row        row sums\n");
      printf("lev        rows   entries  sparse  min  max   ");
      printf("avg       min         max\n");
      printf("=======================================");
      printf("==================================\n");
#else      
      printf("            nonzero         entries p");
      printf("er row        row sums\n");
      printf("lev   rows  entries  sparse  min  max   ");
      printf("avg       min         max\n");
      printf("=======================================");
      printf("============================\n");
#endif
   }
  
   /*-----------------------------------------------------
    *  Enter Statistics Loop
    *-----------------------------------------------------*/

   num_coeffs = hypre_CTAlloc(double,num_levels);

   num_variables = hypre_CTAlloc(double,num_levels);

   for (level = 0; level < num_levels; level++)
   { 

      {
         A_diag = hypre_ParCSRMatrixDiag(A_array[level]);
         A_diag_data = hypre_CSRMatrixData(A_diag);
         A_diag_i = hypre_CSRMatrixI(A_diag);
         
         A_offd = hypre_ParCSRMatrixOffd(A_array[level]);   
         A_offd_data = hypre_CSRMatrixData(A_offd);
         A_offd_i = hypre_CSRMatrixI(A_offd);
         
         row_starts = hypre_ParCSRMatrixRowStarts(A_array[level]);
         
         fine_size = hypre_ParCSRMatrixGlobalNumRows(A_array[level]);
         global_nonzeros = hypre_ParCSRMatrixDNumNonzeros(A_array[level]);
         num_coeffs[level] = global_nonzeros;
         num_variables[level] = (double) fine_size;
         
         sparse = global_nonzeros /((double) fine_size * (double) fine_size);

         min_entries = 0;
         max_entries = 0;
         min_rowsum = 0.0;
         max_rowsum = 0.0;
         
         if (hypre_CSRMatrixNumRows(A_diag))
         {
            min_entries = (A_diag_i[1]-A_diag_i[0])+(A_offd_i[1]-A_offd_i[0]);
            for (j = A_diag_i[0]; j < A_diag_i[1]; j++)
               min_rowsum += A_diag_data[j];
            for (j = A_offd_i[0]; j < A_offd_i[1]; j++)
               min_rowsum += A_offd_data[j];
            
            max_rowsum = min_rowsum;
            
            for (j = 0; j < hypre_CSRMatrixNumRows(A_diag); j++)
            {
               entries = (A_diag_i[j+1]-A_diag_i[j])+(A_offd_i[j+1]-A_offd_i[j]);
               min_entries = hypre_min(entries, min_entries);
               max_entries = hypre_max(entries, max_entries);
               
               rowsum = 0.0;
               for (i = A_diag_i[j]; i < A_diag_i[j+1]; i++)
                  rowsum += A_diag_data[i];
               
               for (i = A_offd_i[j]; i < A_offd_i[j+1]; i++)
                  rowsum += A_offd_data[i];
               
               min_rowsum = hypre_min(rowsum, min_rowsum);
               max_rowsum = hypre_max(rowsum, max_rowsum);
            }
         }
         avg_entries = global_nonzeros / ((double) fine_size);
      }
      
#ifdef HYPRE_NO_GLOBAL_PARTITION       

       numrows = (int)(row_starts[1]-row_starts[0]);
       if (!numrows) /* if we don't have any rows, then don't have this count toward
                         min row sum or min num entries */
       {
          min_entries = 1000000;
          min_rowsum =  1.0e7;
       }
       
       send_buff[0] = - (double) min_entries;
       send_buff[1] = (double) max_entries;
       send_buff[2] = - min_rowsum;
       send_buff[3] = max_rowsum;

       MPI_Reduce(send_buff, gather_buff, 4, MPI_DOUBLE, MPI_MAX, 0, comm);
       
       if (my_id ==0)
       {
          global_min_e = - gather_buff[0];
          global_max_e = gather_buff[1];
          global_min_rsum = - gather_buff[2];
          global_max_rsum = gather_buff[3];
#ifdef HYPRE_LONG_LONG
          printf( "%2d %12lld %8.0f  %0.3f  %4d %4d",
                  level, fine_size, global_nonzeros, sparse, global_min_e, 
                  global_max_e);
#else          
          printf( "%2d %7d %8.0f  %0.3f  %4d %4d",
                  level, fine_size, global_nonzeros, sparse, global_min_e, 
                  global_max_e);
#endif          
          printf("  %4.1f  %10.3e  %10.3e\n", avg_entries,
                 global_min_rsum, global_max_rsum);
       }
       
#else

       send_buff[0] = (double) min_entries;
       send_buff[1] = (double) max_entries;
       send_buff[2] = min_rowsum;
       send_buff[3] = max_rowsum;
       
       MPI_Gather(send_buff,4,MPI_DOUBLE,gather_buff,4,MPI_DOUBLE,0,comm);

       if (my_id == 0)
       {
          global_min_e = 1000000;
          global_max_e = 0;
          global_min_rsum = 1.0e7;
          global_max_rsum = 0.0;
          for (j = 0; j < num_procs; j++)
          {
             numrows = row_starts[j+1]-row_starts[j];
             if (numrows)
             {
                global_min_e = hypre_min(global_min_e, (int) gather_buff[j*4]);
                global_min_rsum = hypre_min(global_min_rsum, gather_buff[j*4 +2]);
             }
             global_max_e = hypre_max(global_max_e, (int) gather_buff[j*4 +1]);
             global_max_rsum = hypre_max(global_max_rsum, gather_buff[j*4 +3]);
          }

#ifdef HYPRE_LONG_LONG
          printf( "%2d %12lld %8.0f  %0.3f  %4d %4d",
                  level, fine_size, global_nonzeros, sparse, global_min_e, 
                  global_max_e);
#else          
          printf( "%2d %7d %8.0f  %0.3f  %4d %4d",
                  level, fine_size, global_nonzeros, sparse, global_min_e, 
                  global_max_e);
#endif          
          printf("  %4.1f  %10.3e  %10.3e\n", avg_entries,
                 global_min_rsum, global_max_rsum);
       }

#endif

        
   }

       
   if (my_id == 0)
   {
      {
         printf( "\n\nInterpolation Matrix Information:\n\n");
      }
#if HYPRE_LONG_LONG
      printf("                             entries/row    min     max");
      printf("         row sums\n");
      printf("lev        rows x cols          min max  ");
      printf("   weight   weight     min       max \n");
      printf("=======================================");
      printf("======================================\n");
#else      
      printf("                 entries/row    min     max");
      printf("         row sums\n");
      printf("lev  rows cols    min max  ");
      printf("   weight   weight     min       max \n");
      printf("=======================================");
      printf("==========================\n");
#endif
   }
  
   /*-----------------------------------------------------
    *  Enter Statistics Loop
    *-----------------------------------------------------*/


   for (level = 0; level < num_levels-1; level++)
   {
    
      {
         P_diag = hypre_ParCSRMatrixDiag(P_array[level]);
         P_diag_data = hypre_CSRMatrixData(P_diag);
         P_diag_i = hypre_CSRMatrixI(P_diag);
         
         P_offd = hypre_ParCSRMatrixOffd(P_array[level]);   
         P_offd_data = hypre_CSRMatrixData(P_offd);
         P_offd_i = hypre_CSRMatrixI(P_offd);
         
         row_starts = hypre_ParCSRMatrixRowStarts(P_array[level]);
         
         fine_size = hypre_ParCSRMatrixGlobalNumRows(P_array[level]);
         coarse_size = hypre_ParCSRMatrixGlobalNumCols(P_array[level]);
         global_nonzeros = hypre_ParCSRMatrixNumNonzeros(P_array[level]);
         
         min_weight = 1.0;
         max_weight = 0.0;
         max_rowsum = 0.0;
         min_rowsum = 0.0;
         min_entries = 0;
         max_entries = 0;
         
         if (hypre_CSRMatrixNumRows(P_diag))
         {
            if (hypre_CSRMatrixNumCols(P_diag)) min_weight = P_diag_data[0];
            for (j = P_diag_i[0]; j < P_diag_i[1]; j++)
            {
               min_weight = hypre_min(min_weight, P_diag_data[j]);
               if (P_diag_data[j] != 1.0)
                  max_weight = hypre_max(max_weight, P_diag_data[j]);
               min_rowsum += P_diag_data[j];
            }
            for (j = P_offd_i[0]; j < P_offd_i[1]; j++)
            {        
               min_weight = hypre_min(min_weight, P_offd_data[j]); 
               if (P_offd_data[j] != 1.0)
                  max_weight = hypre_max(max_weight, P_offd_data[j]);     
               min_rowsum += P_offd_data[j];
            }
            
            max_rowsum = min_rowsum;
            
            min_entries = (P_diag_i[1]-P_diag_i[0])+(P_offd_i[1]-P_offd_i[0]); 
            max_entries = 0;
            
            for (j = 0; j < hypre_CSRMatrixNumRows(P_diag); j++)
            {
               entries = (P_diag_i[j+1]-P_diag_i[j])+(P_offd_i[j+1]-P_offd_i[j]);
               min_entries = hypre_min(entries, min_entries);
               max_entries = hypre_max(entries, max_entries);
               
               rowsum = 0.0;
               for (i = P_diag_i[j]; i < P_diag_i[j+1]; i++)
               {
                  min_weight = hypre_min(min_weight, P_diag_data[i]);
                  if (P_diag_data[i] != 1.0)
                     max_weight = hypre_max(max_weight, P_diag_data[i]);
                  rowsum += P_diag_data[i];
               }
               
               for (i = P_offd_i[j]; i < P_offd_i[j+1]; i++)
               {
                  min_weight = hypre_min(min_weight, P_offd_data[i]);
                  if (P_offd_data[i] != 1.0) 
                     max_weight = hypre_max(max_weight, P_offd_data[i]);
                  rowsum += P_offd_data[i];
               }
               
               min_rowsum = hypre_min(rowsum, min_rowsum);
               max_rowsum = hypre_max(rowsum, max_rowsum);
            }
         
         }
         avg_entries = ((double) global_nonzeros) / ((double) fine_size);
      }

#ifdef HYPRE_NO_GLOBAL_PARTITION

      numrows = (int)(row_starts[1]-row_starts[0]);
      if (!numrows) /* if we don't have any rows, then don't have this count toward
                       min row sum or min num entries */
      {
         min_entries = 1000000;
         min_rowsum =  1.0e7;
         min_weight = 1.0e7;
       }
       
      send_buff[0] = - (double) min_entries;
      send_buff[1] = (double) max_entries;
      send_buff[2] = - min_rowsum;
      send_buff[3] = max_rowsum;
      send_buff[4] = - min_weight;
      send_buff[5] = max_weight;

      MPI_Reduce(send_buff, gather_buff, 6, MPI_DOUBLE, MPI_MAX, 0, comm);

      if (my_id == 0)
      {
         global_min_e = - gather_buff[0];
         global_max_e = gather_buff[1];
         global_min_rsum = -gather_buff[2];
         global_max_rsum = gather_buff[3];
         global_min_wt = -gather_buff[4];
         global_max_wt = gather_buff[5];

#ifdef HYPRE_LONG_LONG
          printf( "%2d %12lld x %-12lld %3d %3d",
                 level, fine_size, coarse_size,  global_min_e, global_max_e);
#else          
          printf( "%2d %5d x %-5d %3d %3d",
                 level, fine_size, coarse_size,  global_min_e, global_max_e);
#endif          
         printf("  %10.3e %9.3e %9.3e %9.3e\n",
                global_min_wt, global_max_wt, 
                global_min_rsum, global_max_rsum);
      }


#else
      
      send_buff[0] = (double) min_entries;
      send_buff[1] = (double) max_entries;
      send_buff[2] = min_rowsum;
      send_buff[3] = max_rowsum;
      send_buff[4] = min_weight;
      send_buff[5] = max_weight;
      
      MPI_Gather(send_buff,6,MPI_DOUBLE,gather_buff,6,MPI_DOUBLE,0,comm);
      
      if (my_id == 0)
      {
         global_min_e = 1000000;
         global_max_e = 0;
         global_min_rsum = 1.0e7;
         global_max_rsum = 0.0;
         global_min_wt = 1.0e7;
         global_max_wt = 0.0;
         
         for (j = 0; j < num_procs; j++)
         {
            numrows = row_starts[j+1] - row_starts[j];
            if (numrows)
            {
               global_min_e = hypre_min(global_min_e, (int) gather_buff[j*6]);
               global_min_rsum = hypre_min(global_min_rsum, gather_buff[j*6+2]);
               global_min_wt = hypre_min(global_min_wt, gather_buff[j*6+4]);
            }
            global_max_e = hypre_max(global_max_e, (int) gather_buff[j*6+1]);
            global_max_rsum = hypre_max(global_max_rsum, gather_buff[j*6+3]);
            global_max_wt = hypre_max(global_max_wt, gather_buff[j*6+5]);
         }
         
#ifdef HYPRE_LONG_LONG
         printf( "%2d %12lld x %-12lld %3d %3d",
                 level, fine_size, coarse_size,  global_min_e, global_max_e);
#else          
         printf( "%2d %5d x %-5d %3d %3d",
                 level, fine_size, coarse_size,  global_min_e, global_max_e);
#endif          
         printf("  %10.3e %9.3e %9.3e %9.3e\n",
                global_min_wt, global_max_wt, 
                global_min_rsum, global_max_rsum);
      }

#endif

   }


   total_variables = 0;
   operat_cmplxty = 0;
   for (j=0;j<hypre_ParAMGDataNumLevels(amg_data);j++)
   {
      operat_cmplxty +=  num_coeffs[j] / num_coeffs[0];
      total_variables += num_variables[j];
   }
   if (num_variables[0] != 0)
      grid_cmplxty = total_variables / num_variables[0];
 
   if (my_id == 0 )
   {
      printf("\n\n     Complexity:    grid = %f\n",grid_cmplxty);
      printf("                operator = %f\n",operat_cmplxty);
   }

   if (my_id == 0) printf("\n\n");

   if (my_id == 0)
   { 
      printf("\n\nBoomerAMG SOLVER PARAMETERS:\n\n");
      printf( "  Maximum number of cycles:         %d \n",max_iter);
      printf( "  Stopping Tolerance:               %e \n",tol); 
      printf( "  Cycle type (1 = V, 2 = W, etc.):  %d\n\n", cycle_type);
      printf( "  Relaxation Parameters:\n");
      printf( "   Visiting Grid:                     down   up  coarse\n");
      printf( "            Number of partial sweeps: %4d   %2d  %4d \n",
              num_grid_sweeps[1],
              num_grid_sweeps[2],num_grid_sweeps[3]);
      printf( "   Type 0=Jac, 3=hGS, 6=hSGS, 9=GE:   %4d   %2d  %4d \n",
              grid_relax_type[1],
              grid_relax_type[2],grid_relax_type[3]);
#if 1 /* TO DO: may not want this to print if CG in the coarse grid */
      printf( "   Point types, partial sweeps (1=C, -1=F):\n");
      if (grid_relax_points)
      {
         printf( "                  Pre-CG relaxation (down):");
         for (j = 0; j < num_grid_sweeps[1]; j++)
              printf("  %2d", grid_relax_points[1][j]);
         printf( "\n");
         printf( "                   Post-CG relaxation (up):");
         for (j = 0; j < num_grid_sweeps[2]; j++)
              printf("  %2d", grid_relax_points[2][j]);
         printf( "\n");
         printf( "                             Coarsest grid:");
         for (j = 0; j < num_grid_sweeps[3]; j++)
              printf("  %2d", grid_relax_points[3][j]);
         printf( "\n\n");
      }
      else if (relax_order == 1)
      {
         printf( "                  Pre-CG relaxation (down):");
         for (j = 0; j < num_grid_sweeps[1]; j++)
              printf("  %2d  %2d", one, minus_one);
         printf( "\n");
         printf( "                   Post-CG relaxation (up):");
         for (j = 0; j < num_grid_sweeps[2]; j++)
              printf("  %2d  %2d", minus_one, one);
         printf( "\n");
         printf( "                             Coarsest grid:");
         for (j = 0; j < num_grid_sweeps[3]; j++)
              printf("  %2d", zero);
         printf( "\n\n");
      }
      else 
      {
         printf( "                  Pre-CG relaxation (down):");
         for (j = 0; j < num_grid_sweeps[1]; j++)
              printf("  %2d", zero);
         printf( "\n");
         printf( "                   Post-CG relaxation (up):");
         for (j = 0; j < num_grid_sweeps[2]; j++)
              printf("  %2d", zero);
         printf( "\n");
         printf( "                             Coarsest grid:");
         for (j = 0; j < num_grid_sweeps[3]; j++)
              printf("  %2d", zero);
         printf( "\n\n");
      }
#endif
      if (smooth_type == 6)
         for (j=0; j < smooth_num_levels; j++)
            printf( " Schwarz Relaxation Weight %f level %d\n",
			hypre_ParAMGDataSchwarzRlxWeight(amg_data),j);
      for (j=0; j < num_levels; j++)
         if (relax_weight[j] != 1)
	       printf( " Relaxation Weight %f level %d\n",relax_weight[j],j);
      for (j=0; j < num_levels; j++)
         if (omega[j] != 1)
               printf( " Outer relaxation weight %f level %d\n",omega[j],j);
   }


   /*if (seq_cg) 
   {
      hypre_seqAMGSetupStats(amg_data,num_coeffs[0],num_variables[0], 
                             operat_cmplxty, grid_cmplxty );
   }*/
   




   hypre_TFree(num_coeffs);
   hypre_TFree(num_variables);
   hypre_TFree(send_buff);
   hypre_TFree(gather_buff);
   
   return(0);
}

Пример #17

Показать файл

Файл: lgmres.c Проект: ducpdx/hypre

HYPRE_Int
hypre_LGMRESSolve(void  *lgmres_vdata,
                 void  *A,
                 void  *b,
		 void  *x)
{
   hypre_LGMRESData  *lgmres_data   = (hypre_LGMRESData *)lgmres_vdata;
   hypre_LGMRESFunctions *lgmres_functions = lgmres_data->functions;
   HYPRE_Int 		     k_dim        = (lgmres_data -> k_dim);
   HYPRE_Int               min_iter     = (lgmres_data -> min_iter);
   HYPRE_Int 		     max_iter     = (lgmres_data -> max_iter);
   HYPRE_Real 	     r_tol        = (lgmres_data -> tol);
   HYPRE_Real 	     cf_tol       = (lgmres_data -> cf_tol);
   HYPRE_Real        a_tol        = (lgmres_data -> a_tol);
   void             *matvec_data  = (lgmres_data -> matvec_data);

   void             *r            = (lgmres_data -> r);
   void             *w            = (lgmres_data -> w);
   

   void            **p            = (lgmres_data -> p);

   /* lgmres  mod*/
   void          **aug_vecs       = (lgmres_data ->aug_vecs);
   void          **a_aug_vecs     = (lgmres_data ->a_aug_vecs);
   HYPRE_Int            *aug_order      = (lgmres_data->aug_order);
   HYPRE_Int             aug_dim        = (lgmres_data -> aug_dim);
   HYPRE_Int             approx_constant=  (lgmres_data ->approx_constant);
   HYPRE_Int             it_arnoldi, aug_ct, it_total, ii, order, it_aug;
   HYPRE_Int             spot = 0;
   HYPRE_Real      tmp_norm, r_norm_last;
   /*---*/

   HYPRE_Int 	           (*precond)(void*,void*,void*,void*)   = (lgmres_functions -> precond);
   HYPRE_Int 	            *precond_data = (HYPRE_Int*)(lgmres_data -> precond_data);

   HYPRE_Int             print_level    = (lgmres_data -> print_level);
   HYPRE_Int             logging        = (lgmres_data -> logging);

   HYPRE_Real     *norms          = (lgmres_data -> norms);
   
   HYPRE_Int        break_value = 0;
   HYPRE_Int	      i, j, k;
   HYPRE_Real *rs, **hh, *c, *s; 
   HYPRE_Int        iter; 
   HYPRE_Int        my_id, num_procs;
   HYPRE_Real epsilon, gamma, t, r_norm, b_norm, den_norm;
   
   HYPRE_Real epsmac = 1.e-16; 
   HYPRE_Real ieee_check = 0.;

   HYPRE_Real cf_ave_0 = 0.0;
   HYPRE_Real cf_ave_1 = 0.0;
   HYPRE_Real weight;
   HYPRE_Real r_norm_0;


   /* We are not checking rel. change for now... */
   

   (lgmres_data -> converged) = 0;
   /*-----------------------------------------------------------------------
    * With relative change convergence test on, it is possible to attempt
    * another iteration with a zero residual. This causes the parameter
    * alpha to go NaN. The guard_zero_residual parameter is to circumvent
    * this. Perhaps it should be set to something non-zero (but small).
    *-----------------------------------------------------------------------*/

   (*(lgmres_functions->CommInfo))(A,&my_id,&num_procs);
   if ( logging>0 || print_level>0 )
   {
      norms          = (lgmres_data -> norms);
      /* not used yet      log_file_name  = (lgmres_data -> log_file_name);*/
      /* fp = fopen(log_file_name,"w"); */
   }

   /* initialize work arrays  - lgmres includes aug_dim*/
   rs = hypre_CTAllocF(HYPRE_Real,k_dim+1+aug_dim,lgmres_functions); 
   c = hypre_CTAllocF(HYPRE_Real,k_dim+aug_dim,lgmres_functions); 
   s = hypre_CTAllocF(HYPRE_Real,k_dim+aug_dim,lgmres_functions); 


   
  /* lgmres mod. - need non-modified hessenberg to avoid aug_dim matvecs */
   hh = hypre_CTAllocF(HYPRE_Real*,k_dim+aug_dim+1,lgmres_functions); 
   for (i=0; i < k_dim+aug_dim+1; i++)
   {	
   	hh[i] = hypre_CTAllocF(HYPRE_Real,k_dim+aug_dim,lgmres_functions); 
   }
   
   (*(lgmres_functions->CopyVector))(b,p[0]);

   /* compute initial residual */
   (*(lgmres_functions->Matvec))(matvec_data,-1.0, A, x, 1.0, p[0]);

   b_norm = sqrt((*(lgmres_functions->InnerProd))(b,b));

   /* Since it is does not diminish performance, attempt to return an error flag
      and notify users when they supply bad input. */
   if (b_norm != 0.) ieee_check = b_norm/b_norm; /* INF -> NaN conversion */
   if (ieee_check != ieee_check)
   {
      /* ...INFs or NaNs in input can make ieee_check a NaN.  This test
         for ieee_check self-equality works on all IEEE-compliant compilers/
         machines, c.f. page 8 of "Lecture Notes on the Status of IEEE 754"
         by W. Kahan, May 31, 1996.  Currently (July 2002) this paper may be
         found at http://HTTP.CS.Berkeley.EDU/~wkahan/ieee754status/IEEE754.PDF */
      if (logging > 0 || print_level > 0)
      {
        hypre_printf("\n\nERROR detected by Hypre ... BEGIN\n");
        hypre_printf("ERROR -- hypre_LGMRESSolve: INFs and/or NaNs detected in input.\n");
        hypre_printf("User probably placed non-numerics in supplied b.\n");
        hypre_printf("Returning error flag += 101.  Program not terminated.\n");
        hypre_printf("ERROR detected by Hypre ... END\n\n\n");
      }
      hypre_error(HYPRE_ERROR_GENERIC);
      return hypre_error_flag;
   }

   r_norm = sqrt((*(lgmres_functions->InnerProd))(p[0],p[0]));
   r_norm_0 = r_norm;

   /* Since it is does not diminish performance, attempt to return an error flag
      and notify users when they supply bad input. */
   if (r_norm != 0.) ieee_check = r_norm/r_norm; /* INF -> NaN conversion */
   if (ieee_check != ieee_check)
   {
      /* ...INFs or NaNs in input can make ieee_check a NaN.  This test
         for ieee_check self-equality works on all IEEE-compliant compilers/
         machines, c.f. page 8 of "Lecture Notes on the Status of IEEE 754"
         by W. Kahan, May 31, 1996.  Currently (July 2002) this paper may be
         found at http://HTTP.CS.Berkeley.EDU/~wkahan/ieee754status/IEEE754.PDF */
      if (logging > 0 || print_level > 0)
      {
        hypre_printf("\n\nERROR detected by Hypre ... BEGIN\n");
        hypre_printf("ERROR -- hypre_LGMRESSolve: INFs and/or NaNs detected in input.\n");
        hypre_printf("User probably placed non-numerics in supplied A or x_0.\n");
        hypre_printf("Returning error flag += 101.  Program not terminated.\n");
        hypre_printf("ERROR detected by Hypre ... END\n\n\n");
      }
      hypre_error(HYPRE_ERROR_GENERIC);
      return hypre_error_flag;
   }

   if ( logging>0 || print_level > 0)
   {
      norms[0] = r_norm;
      if ( print_level>1 && my_id == 0 )
      {
  	 hypre_printf("L2 norm of b: %e\n", b_norm);
         if (b_norm == 0.0)
            hypre_printf("Rel_resid_norm actually contains the residual norm\n");
         hypre_printf("Initial L2 norm of residual: %e\n", r_norm);
      
      }
   }
   iter = 0;

   if (b_norm > 0.0)
   {
/* convergence criterion |r_i|/|b| <= accuracy if |b| > 0 */
     den_norm= b_norm;
   }
   else
   {
/* convergence criterion |r_i|/|r0| <= accuracy if |b| = 0 */
     den_norm= r_norm;
   };

  /* convergence criteria: |r_i| <= max( a_tol, r_tol * den_norm)
      den_norm = |r_0| or |b|
      note: default for a_tol is 0.0, so relative residual criteria is used unless
            user specifies a_tol, or sets r_tol = 0.0, which means absolute
            tol only is checked  */
      
   epsilon = hypre_max(a_tol,r_tol*den_norm);
   
   /* so now our stop criteria is |r_i| <= epsilon */
 

   if ( print_level>1 && my_id == 0 )
   {
      if (b_norm > 0.0)
         {hypre_printf("=============================================\n\n");
          hypre_printf("Iters     resid.norm     conv.rate  rel.res.norm\n");
          hypre_printf("-----    ------------    ---------- ------------\n");
      
          }

      else
         {hypre_printf("=============================================\n\n");
          hypre_printf("Iters     resid.norm     conv.rate\n");
          hypre_printf("-----    ------------    ----------\n");
      
          };
   }

   
   
/*lgmres initialization */
   for (ii=0; ii<aug_dim; ii++) {
      aug_order[ii] = 0;
   }
   aug_ct = 0; /* number of aug. vectors available */



   /* outer iteration cycle */
   while (iter < max_iter)
   {
   /* initialize first term of hessenberg system */

	rs[0] = r_norm;
        if (r_norm == 0.0)
        {
           hypre_TFreeF(c,lgmres_functions); 
           hypre_TFreeF(s,lgmres_functions); 
           hypre_TFreeF(rs,lgmres_functions);
           for (i=0; i < k_dim+aug_dim+1; i++) {
              hypre_TFreeF(hh[i],lgmres_functions);
           }

           hypre_TFreeF(hh,lgmres_functions); 
	   return hypre_error_flag;
           
	}

        /* see if we are already converged and 
           should print the final norm and exit */
	if (r_norm <= epsilon && iter >= min_iter) 
        {
           (*(lgmres_functions->CopyVector))(b,r);
           (*(lgmres_functions->Matvec))(matvec_data,-1.0,A,x,1.0,r);
           r_norm = sqrt((*(lgmres_functions->InnerProd))(r,r));
           if (r_norm  <= epsilon)
           {
              if ( print_level>1 && my_id == 0)
              {
                 hypre_printf("\n\n");
                 hypre_printf("Final L2 norm of residual: %e\n\n", r_norm);
              }
              break;
              }
           else
              if ( print_level>0 && my_id == 0)
                 hypre_printf("false convergence 1\n");
           
	}
        
      	t = 1.0 / r_norm;
        r_norm_last = r_norm;
	
        (*(lgmres_functions->ScaleVector))(t,p[0]);
	i = 0;

        /* lgmres mod: determine number of arnoldi steps to take */
        /* if approx_constant then we keep the space the same size
           even if we don't have the full number of aug vectors yet*/
        if (approx_constant) {
           it_arnoldi = k_dim - aug_ct;
        } else {
           it_arnoldi = k_dim - aug_dim; 
        }
        it_total =  it_arnoldi + aug_ct;
        it_aug = 0; /* keep track of augmented iterations */


        /***RESTART CYCLE (right-preconditioning) ***/
        while (i < it_total && iter < max_iter)
	{
           i++;
           iter++;
           (*(lgmres_functions->ClearVector))(r);


           /*LGMRES_MOD: decide whether this is an arnoldi step or an aug step */ 
           if ( i <= it_arnoldi) 
           { /* Arnoldi */
              precond(precond_data, A, p[i-1], r);
              (*(lgmres_functions->Matvec))(matvec_data, 1.0, A, r, 0.0, p[i]);
           } else 
           { /*lgmres aug step */
              it_aug ++;
              order = i - it_arnoldi - 1; /* which aug step (note i starts at 1) - aug order number at 0*/ 
              for (ii=0; ii<aug_dim; ii++) 
              {
                 if (aug_order[ii] == order) 
                 {
                    spot = ii;
                    break; /* must have this because there will be duplicates before aug_ct = aug_dim */ 
                 }  
              }
              /* copy a_aug_vecs[spot] to p[i] */ 
              (*(lgmres_functions->CopyVector))(a_aug_vecs[spot],p[i]);
              
              /*note: an alternate implementation choice would be to only save the AUGVECS and
                not A_AUGVEC and then apply the PC here to the augvec */
           }
           /*---*/

           /* modified Gram_Schmidt */
           for (j=0; j < i; j++)
           {
              hh[j][i-1] = (*(lgmres_functions->InnerProd))(p[j],p[i]);
              (*(lgmres_functions->Axpy))(-hh[j][i-1],p[j],p[i]);
           }
           t = sqrt((*(lgmres_functions->InnerProd))(p[i],p[i]));
           hh[i][i-1] = t;	
           if (t != 0.0)
           {
              t = 1.0/t;
              (*(lgmres_functions->ScaleVector))(t,p[i]);
           }


           /* done with modified Gram_schmidt and Arnoldi step.
              update factorization of hh */
           for (j = 1; j < i; j++)
           {
              t = hh[j-1][i-1];
              hh[j-1][i-1] = s[j-1]*hh[j][i-1] + c[j-1]*t;
              hh[j][i-1] = -s[j-1]*t + c[j-1]*hh[j][i-1];
           }
           t= hh[i][i-1]*hh[i][i-1];
           t+= hh[i-1][i-1]*hh[i-1][i-1];
           gamma = sqrt(t);
           if (gamma == 0.0) gamma = epsmac;
           c[i-1] = hh[i-1][i-1]/gamma;
           s[i-1] = hh[i][i-1]/gamma;
           rs[i] = -hh[i][i-1]*rs[i-1];
           rs[i]/=  gamma;
           rs[i-1] = c[i-1]*rs[i-1];
           /* determine residual norm */
           hh[i-1][i-1] = s[i-1]*hh[i][i-1] + c[i-1]*hh[i-1][i-1];
           r_norm = fabs(rs[i]);

           /* print ? */
           if ( print_level>0 )
           {
              norms[iter] = r_norm;
              if ( print_level>1 && my_id == 0 )
              {
                 if (b_norm > 0.0)
                    hypre_printf("% 5d    %e    %f   %e\n", iter, 
                           norms[iter],norms[iter]/norms[iter-1],
                           norms[iter]/b_norm);
                 else
                    hypre_printf("% 5d    %e    %f\n", iter, norms[iter],
                           norms[iter]/norms[iter-1]);
              }
           }
           /*convergence factor tolerance */
           if (cf_tol > 0.0)
           {
              cf_ave_0 = cf_ave_1;
              cf_ave_1 = pow( r_norm / r_norm_0, 1.0/(2.0*iter));
              
              weight   = fabs(cf_ave_1 - cf_ave_0);
              weight   = weight / hypre_max(cf_ave_1, cf_ave_0);
              weight   = 1.0 - weight;
#if 0
              hypre_printf("I = %d: cf_new = %e, cf_old = %e, weight = %e\n",
                     i, cf_ave_1, cf_ave_0, weight );
#endif
              if (weight * cf_ave_1 > cf_tol) 
              {
                 break_value = 1;
                 break;
              }
           }
           /* should we exit the restart cycle? (conv. check) */
           if (r_norm <= epsilon && iter >= min_iter)
           {
                 break;
           }
           

	} /*** end of restart cycle ***/

	/* now compute solution, first solve upper triangular system */

	if (break_value) break;
	
	rs[i-1] = rs[i-1]/hh[i-1][i-1];
	for (k = i-2; k >= 0; k--)
	{
           t = 0.0;
           for (j = k+1; j < i; j++)
           {
              t -= hh[k][j]*rs[j];
           }
           t+= rs[k];
           rs[k] = t/hh[k][k];
	}
        /* form linear combination of p's to get solution */
        /* put the new aug_vector in aug_vecs[aug_dim]  - a temp position*/	
        /* i = number of iterations */  
        /* it_aug = number of augmented iterations */ 
        /* it_arnoldi = number of arnoldi iterations */


        /*check if exited early before all arnoldi its */
        if (it_arnoldi > i) it_arnoldi = i; 


        if (!it_aug)
        {
           (*(lgmres_functions->CopyVector))(p[i-1],w);
           (*(lgmres_functions->ScaleVector))(rs[i-1],w);
           for (j = i-2; j >=0; j--)
              (*(lgmres_functions->Axpy))(rs[j], p[j], w);
        }
        else /* need some of the augvecs */
        {
           (*(lgmres_functions->CopyVector))(p[0],w);
           (*(lgmres_functions->ScaleVector))(rs[0],w);

           /* reg. arnoldi directions */  
           for (j = 1; j < it_arnoldi; j++) /*first one already done */
           {
              (*(lgmres_functions->Axpy))(rs[j], p[j], w);
           }
            
           /* augment directions */
           for (ii=0; ii<it_aug; ii++) 
           {
              for (j=0; j<aug_dim; j++) 
              {
                 if (aug_order[j] == ii) 
                 {
                    spot = j;
                    break; /* must have this because there will be
                            * duplicates before aug_ct = aug_dim */ 
                 }  
              }
              (*(lgmres_functions->Axpy))(rs[it_arnoldi+ii], aug_vecs[spot], w);
           }
        }


        /* grab the new aug vector before the prec*/
        (*(lgmres_functions->CopyVector))(w,aug_vecs[aug_dim]);
        
	(*(lgmres_functions->ClearVector))(r);
	/* find correction (in r) (un-wind precond.)*/
        precond(precond_data, A, w, r);

        /* update current solution x (in x) */
	(*(lgmres_functions->Axpy))(1.0,r,x);
         

        /* check for convergence by evaluating the actual residual */
	if (r_norm <= epsilon && iter >= min_iter) 
        {
           /* calculate actual residual norm*/
           (*(lgmres_functions->CopyVector))(b,r);
           (*(lgmres_functions->Matvec))(matvec_data,-1.0,A,x,1.0,r);
           r_norm = sqrt( (*(lgmres_functions->InnerProd))(r,r) );
           
           if (r_norm <= epsilon)
           {
              if ( print_level>1 && my_id == 0 )
              {
                 hypre_printf("\n\n");
                 hypre_printf("Final L2 norm of residual: %e\n\n", r_norm);
              }
              (lgmres_data -> converged) = 1;
              break;
           }
           else /* conv. has not occurred, according to true residual */ 
           {
              if ( print_level>0 && my_id == 0)
                 hypre_printf("false convergence 2\n");
              (*(lgmres_functions->CopyVector))(r,p[0]);
              i = 0;
           }
	} /* end of convergence check */
        
        /* compute residual vector and continue loop */

        /* copy r0 (not scaled) to w*/  
	(*(lgmres_functions->CopyVector))(p[0],w);
	(*(lgmres_functions->ScaleVector))(r_norm_last,w);


	for (j=i ; j > 0; j--)
	{
           rs[j-1] = -s[j-1]*rs[j];
           rs[j] = c[j-1]*rs[j];
	}
        
        if (i) (*(lgmres_functions->Axpy))(rs[i]-1.0,p[i],p[i]);
        for (j=i-1 ; j > 0; j--)
           (*(lgmres_functions->Axpy))(rs[j],p[j],p[i]);
        
        if (i)
        {
           (*(lgmres_functions->Axpy))(rs[0]-1.0,p[0],p[0]);
           (*(lgmres_functions->Axpy))(1.0,p[i],p[0]);
        }

        /* lgmres mod */  
        /* collect aug vector and A*augvector for future restarts -
           only if we will be restarting (i.e. this cycle performed it_total
           iterations). ordering starts at 0.*/
        if (aug_dim > 0) 
        {
           if (!aug_ct) 
           {
              spot = 0;
              aug_ct++;
           } 
           else if (aug_ct < aug_dim) 
           {
              spot = aug_ct;
              aug_ct++;
           } 
           else 
           { /* truncate - already have aug_dim number of vectors*/
              for (ii=0; ii<aug_dim; ii++) 
              {
                 if (aug_order[ii] == (aug_dim-1)) 
                 {
                    spot = ii;
                 }  
              }
           } 
           /* aug_vecs[aug_dim] contains new aug vector */
           (*(lgmres_functions->CopyVector))(aug_vecs[aug_dim], aug_vecs[spot]);
           /*need to normalize */
           tmp_norm = sqrt((*(lgmres_functions->InnerProd))(aug_vecs[spot], aug_vecs[spot]));
           
           tmp_norm = 1.0/tmp_norm;
           (*(lgmres_functions->ScaleVector))(tmp_norm ,aug_vecs[spot]);
           
           /*set new aug vector to order 0  - move all others back one */
           for (ii=0; ii < aug_dim; ii++) 
           {
              aug_order[ii]++;
           } 
           aug_order[spot] = 0; 

           /*now add the A*aug vector to A_AUGVEC(spot) - this is
            * independ. of preconditioning type*/
           /* A*augvec = V*H*y  = r0-rm   (r0 is in w and rm is in p[0])*/
           (*(lgmres_functions->CopyVector))( w, a_aug_vecs[spot]);
           (*(lgmres_functions->ScaleVector))(- 1.0, a_aug_vecs[spot]); /* -r0*/
           (*(lgmres_functions->Axpy))(1.0, p[0],a_aug_vecs[spot]); /* rm - r0 */
           (*(lgmres_functions->ScaleVector))(-tmp_norm, a_aug_vecs[spot]); /* r0-rm /norm */
           
        }
        
   } /* END of iteration while loop */
        

   if ( print_level>1 && my_id == 0 )
          hypre_printf("\n\n"); 

   (lgmres_data -> num_iterations) = iter;
   if (b_norm > 0.0)
      (lgmres_data -> rel_residual_norm) = r_norm/b_norm;
   if (b_norm == 0.0)
      (lgmres_data -> rel_residual_norm) = r_norm;

   if (iter >= max_iter && r_norm > epsilon) hypre_error(HYPRE_ERROR_CONV);
   

   hypre_TFreeF(c,lgmres_functions); 
   hypre_TFreeF(s,lgmres_functions); 
   hypre_TFreeF(rs,lgmres_functions);

   for (i=0; i < k_dim+1+aug_dim; i++)
   {	
   	hypre_TFreeF(hh[i],lgmres_functions);
   }
   hypre_TFreeF(hh,lgmres_functions); 

   return hypre_error_flag;
}

Пример #18

Показать файл

Файл: communication_info.c Проект: LLNL/COGENT

HYPRE_Int
hypre_CreateCommInfoFromStencil( hypre_StructGrid      *grid,
                                 hypre_StructStencil   *stencil,
                                 hypre_CommInfo       **comm_info_ptr )
{
   HYPRE_Int              i,j,k, d, m, s;   

   hypre_BoxArrayArray   *send_boxes;
   hypre_BoxArrayArray   *recv_boxes;

   HYPRE_Int            **send_procs;
   HYPRE_Int            **recv_procs;
   HYPRE_Int            **send_rboxnums;
   HYPRE_Int            **recv_rboxnums;
   hypre_BoxArrayArray   *send_rboxes;
   hypre_BoxArrayArray   *recv_rboxes;

   hypre_BoxArray        *local_boxes;
   HYPRE_Int              num_boxes;

   HYPRE_Int             *local_ids;

   hypre_BoxManager      *boxman;
                       
   hypre_Index           *stencil_shape;
   hypre_IndexRef         stencil_offset;
   hypre_IndexRef         pshift;
                          
   hypre_Box             *box;
   hypre_Box             *hood_box;
   hypre_Box             *grow_box;
   hypre_Box             *extend_box;
   hypre_Box             *int_box;
   hypre_Box             *periodic_box;
   
   HYPRE_Int              stencil_grid[3][3][3];
   HYPRE_Int              grow[3][2];
                       
   hypre_BoxManEntry    **entries;
   hypre_BoxManEntry     *entry;
   
   HYPRE_Int              num_entries;
   hypre_BoxArray        *neighbor_boxes = NULL;
   HYPRE_Int             *neighbor_procs = NULL;
   HYPRE_Int             *neighbor_ids = NULL;
   HYPRE_Int             *neighbor_shifts = NULL;
   HYPRE_Int              neighbor_count;
   HYPRE_Int              neighbor_alloc;

   hypre_Index            ilower, iupper;

   hypre_BoxArray        *send_box_array;
   hypre_BoxArray        *recv_box_array;
   hypre_BoxArray        *send_rbox_array;
   hypre_BoxArray        *recv_rbox_array;
                       
   hypre_Box            **cboxes;
   hypre_Box             *cboxes_mem;
   HYPRE_Int             *cboxes_neighbor_location;
   HYPRE_Int              num_cboxes, cbox_alloc;
                       
   HYPRE_Int              istart[3], istop[3];
   HYPRE_Int              sgindex[3];               

   HYPRE_Int              num_periods, loc, box_id, id, proc_id;
   HYPRE_Int              myid;
   
   MPI_Comm               comm;

   /*------------------------------------------------------
    * Initializations
    *------------------------------------------------------*/

   local_boxes  = hypre_StructGridBoxes(grid);
   local_ids    = hypre_StructGridIDs(grid);
   num_boxes    = hypre_BoxArraySize(local_boxes);
   num_periods  = hypre_StructGridNumPeriods(grid);
   
   boxman    = hypre_StructGridBoxMan(grid);
   comm      =  hypre_StructGridComm(grid);
   
   hypre_MPI_Comm_rank(comm, &myid);
  
   for (k = 0; k < 3; k++)
   {
      for (j = 0; j < 3; j++)
      {
         for (i = 0; i < 3; i++)
         {
            stencil_grid[i][j][k] = 0;
         }
      }
   }

   /*------------------------------------------------------
    * Compute the "grow" information from the stencil
    *------------------------------------------------------*/

   stencil_shape = hypre_StructStencilShape(stencil);

   for (d = 0; d < 3; d++)
   {
      grow[d][0] = 0;
      grow[d][1] = 0;
   }

   for (s = 0; s < hypre_StructStencilSize(stencil); s++)
   {
      stencil_offset = stencil_shape[s];

      for (d = 0; d < 3; d++)
      {
         m = stencil_offset[d];

         istart[d] = 1;
         istop[d]  = 1;

         if (m < 0)
         {
            istart[d] = 0;
            grow[d][0] = hypre_max(grow[d][0], -m);
         }
         else if (m > 0)
         {
            istop[d] = 2;
            grow[d][1] = hypre_max(grow[d][1],  m);
         }
      }

      /* update stencil grid from the grow_stencil */
      for (k = istart[2]; k <= istop[2]; k++)
      {
         for (j = istart[1]; j <= istop[1]; j++)
         {
            for (i = istart[0]; i <= istop[0]; i++)
            {
               stencil_grid[i][j][k] = 1;
            }
         }
      }
   }

   /*------------------------------------------------------
    * Compute send/recv boxes and procs for each local box
    *------------------------------------------------------*/

   /* initialize: for each local box, we create an array of send/recv info */

   send_boxes = hypre_BoxArrayArrayCreate(num_boxes);
   recv_boxes = hypre_BoxArrayArrayCreate(num_boxes);
   send_procs = hypre_CTAlloc(HYPRE_Int *, num_boxes);
   recv_procs = hypre_CTAlloc(HYPRE_Int *, num_boxes);

   /* Remote boxnums and boxes describe data on the opposing processor, so some
      shifting of boxes is needed below for periodic neighbor boxes.  Remote box
      info is also needed for receives to allow for reverse communication. */
   send_rboxnums = hypre_CTAlloc(HYPRE_Int *, num_boxes);
   send_rboxes   = hypre_BoxArrayArrayCreate(num_boxes);
   recv_rboxnums = hypre_CTAlloc(HYPRE_Int *, num_boxes);
   recv_rboxes   = hypre_BoxArrayArrayCreate(num_boxes);

   grow_box = hypre_BoxCreate();
   extend_box = hypre_BoxCreate();
   int_box  = hypre_BoxCreate();
   periodic_box =  hypre_BoxCreate();
 
   /* storage we will use and keep track of the neighbors */
   neighbor_alloc = 30; /* initial guess at max size */
   neighbor_boxes = hypre_BoxArrayCreate(neighbor_alloc);
   neighbor_procs = hypre_CTAlloc(HYPRE_Int, neighbor_alloc);
   neighbor_ids = hypre_CTAlloc(HYPRE_Int, neighbor_alloc);
   neighbor_shifts = hypre_CTAlloc(HYPRE_Int, neighbor_alloc);

   /* storage we will use to collect all of the intersected boxes (the send and
      recv regions for box i (this may not be enough in the case of periodic
      boxes, so we will have to check) */
   cbox_alloc =  hypre_BoxManNEntries(boxman);

   cboxes_neighbor_location = hypre_CTAlloc(HYPRE_Int, cbox_alloc);
   cboxes = hypre_CTAlloc(hypre_Box *, cbox_alloc);
   cboxes_mem = hypre_CTAlloc(hypre_Box, cbox_alloc);

   /******* loop through each local box **************/

   for (i = 0; i < num_boxes; i++)
   {
      /* get the box */
      box = hypre_BoxArrayBox(local_boxes, i);
      /* box_id = local_ids[i]; the box id in the Box Manager is the box number,
       * and we use this to find out if a box has intersected with itself */
      box_id = i;
      
      /* grow box local i according to the stencil*/
      hypre_CopyBox(box, grow_box);
      for (d = 0; d < 3; d++)
      {
         hypre_BoxIMinD(grow_box, d) -= grow[d][0];
         hypre_BoxIMaxD(grow_box, d) += grow[d][1];
      }

      /* extend_box - to find the list of potential neighbors, we need to grow
         the local box a bit differently in case, for example, the stencil grows
         in one dimension [0] and not the other [1] */
      hypre_CopyBox(box, extend_box);
      for (d = 0; d < 3; d++)
      { 
         hypre_BoxIMinD(extend_box, d) -= hypre_max(grow[d][0],grow[d][1]);
         hypre_BoxIMaxD(extend_box, d) += hypre_max(grow[d][0],grow[d][1]);
      }

      /*------------------------------------------------
       * Determine the neighbors of box i
       *------------------------------------------------*/
     
      /* Do this by intersecting the extend box with the BoxManager. 
         We must also check for periodic neighbors. */

      neighbor_count = 0;
      hypre_BoxArraySetSize(neighbor_boxes, 0);
      /* shift the box by each period (k=0 is original box) */
      for (k = 0; k < num_periods; k++)
      {
         hypre_CopyBox(extend_box, periodic_box);
         pshift = hypre_StructGridPShift(grid, k);
         hypre_BoxShiftPos(periodic_box, pshift);
         
         /* get the intersections */
         hypre_BoxManIntersect(boxman, hypre_BoxIMin(periodic_box) , 
                               hypre_BoxIMax(periodic_box) , 
                               &entries , &num_entries);
      
         /* note: do we need to remove the intersection with our original box?
            no if periodic, yes if non-periodic (k=0) */ 

         /* unpack entries (first check storage) */
         if (neighbor_count + num_entries > neighbor_alloc)
         {
            neighbor_alloc = neighbor_count + num_entries + 5;
            neighbor_procs = hypre_TReAlloc(neighbor_procs, HYPRE_Int,
                                            neighbor_alloc);
            neighbor_ids = hypre_TReAlloc(neighbor_ids, HYPRE_Int, neighbor_alloc);
            neighbor_shifts = hypre_TReAlloc(neighbor_shifts, HYPRE_Int,
                                             neighbor_alloc);
         }
         /* check storage for the array */
         hypre_BoxArraySetSize(neighbor_boxes, neighbor_count + num_entries);
         /* now unpack */
         for (j = 0; j < num_entries; j++)
         {
            entry = entries[j];
            proc_id = hypre_BoxManEntryProc(entry);        
            id = hypre_BoxManEntryId(entry); 
            /* don't keep box i in the non-periodic case*/  
            if (!k)
            {
               if((myid == proc_id) && (box_id == id))
               {
                  continue;
               }
            }

            hypre_BoxManEntryGetExtents(entry, ilower, iupper);        
            hypre_BoxSetExtents(hypre_BoxArrayBox(neighbor_boxes, neighbor_count),
                                ilower, iupper);
            /* shift the periodic boxes (needs to be the opposite of above) */
            if (k)
            {
               hypre_BoxShiftNeg(
                  hypre_BoxArrayBox(neighbor_boxes, neighbor_count), pshift);
            }
            
            neighbor_procs[neighbor_count] = proc_id;
            neighbor_ids[neighbor_count] = id;
            neighbor_shifts[neighbor_count] = k;
            neighbor_count++;
         }
         hypre_BoxArraySetSize(neighbor_boxes, neighbor_count);

         hypre_TFree(entries);
  
      } /* end of loop through periods k */

      /* Now we have a list of all of the neighbors for box i! */

      /* note: we don't want/need to remove duplicates - they should have
         different intersections (TO DO: put more thought into if there are ever
         any exceptions to this? - the intersection routine already eliminates
         duplicates - so what i mean is eliminating duplicates from multiple
         intersection calls in periodic case)  */  
    
      /*------------------------------------------------
       * Compute recv_box_array for box i
       *------------------------------------------------*/

      /* check size of storage for cboxes */
      /* let's make sure that we have enough storage in case each neighbor
         produces a send/recv region */
      if (neighbor_count > cbox_alloc)
      {
         cbox_alloc = neighbor_count;
         cboxes_neighbor_location = hypre_TReAlloc(cboxes_neighbor_location, 
                                                   HYPRE_Int, cbox_alloc);
         cboxes = hypre_TReAlloc(cboxes, hypre_Box *, cbox_alloc);
         cboxes_mem = hypre_TReAlloc(cboxes_mem, hypre_Box, cbox_alloc);
      }

      /* Loop through each neighbor box.  If the neighbor box intersects the
         grown box i (grown according to our stencil), then the intersection is
         a recv region.  If the neighbor box was shifted to handle periodicity,
         we need to (positive) shift it back. */

      num_cboxes = 0;
      
      for (k = 0; k < neighbor_count; k++)
      {
         hood_box = hypre_BoxArrayBox(neighbor_boxes, k);
         /* check the stencil grid to see if it makes sense to intersect */
         for (d = 0; d < 3; d++)
         {
            sgindex[d] = 1;
               
            s = hypre_BoxIMinD(hood_box, d) - hypre_BoxIMaxD(box, d);
            if (s > 0)
            {
               sgindex[d] = 2;
            }
            s = hypre_BoxIMinD(box, d) - hypre_BoxIMaxD(hood_box, d);
            if (s > 0)
            {
               sgindex[d] = 0;
            }
         }
         /* it makes sense only if we have at least one non-zero entry */   
         if (stencil_grid[sgindex[0]][sgindex[1]][sgindex[2]])
         {
            /* intersect - result is int_box */
            hypre_IntersectBoxes(grow_box, hood_box, int_box);
            /* if we have a positive volume box, this is a recv region */
            if (hypre_BoxVolume(int_box))
            {
               /* keep track of which neighbor: k... */
               cboxes_neighbor_location[num_cboxes] = k;
               cboxes[num_cboxes] = &cboxes_mem[num_cboxes];
               /* keep the intersected box */
               hypre_CopyBox(int_box, cboxes[num_cboxes]);
               num_cboxes++;
            }
         }
      } /* end of loop through each neighbor */

      /* create recv_box_array and recv_procs for box i */
      recv_box_array = hypre_BoxArrayArrayBoxArray(recv_boxes, i);
      hypre_BoxArraySetSize(recv_box_array, num_cboxes);
      recv_procs[i] = hypre_CTAlloc(HYPRE_Int, num_cboxes);
      recv_rboxnums[i] = hypre_CTAlloc(HYPRE_Int, num_cboxes);
      recv_rbox_array = hypre_BoxArrayArrayBoxArray(recv_rboxes, i);
      hypre_BoxArraySetSize(recv_rbox_array, num_cboxes);

      for (m = 0; m < num_cboxes; m++)
      {
         loc = cboxes_neighbor_location[m];
         recv_procs[i][m] = neighbor_procs[loc];
         recv_rboxnums[i][m] = neighbor_ids[loc];
         hypre_CopyBox(cboxes[m], hypre_BoxArrayBox(recv_box_array, m));

         /* if periodic, positive shift before copying to the rbox_array */
         if (neighbor_shifts[loc]) /* periodic if shift != 0 */
         {
            pshift = hypre_StructGridPShift(grid, neighbor_shifts[loc]);
            hypre_BoxShiftPos(cboxes[m], pshift);
         }
         hypre_CopyBox(cboxes[m], hypre_BoxArrayBox(recv_rbox_array, m));

         cboxes[m] = NULL;
      }

      /*------------------------------------------------
       * Compute send_box_array for box i
       *------------------------------------------------*/

      /* Loop through each neighbor box.  If the grown neighbor box intersects
         box i, then the intersection is a send region.  If the neighbor box was
         shifted to handle periodicity, we need to (positive) shift it back. */

      num_cboxes = 0;

      for (k = 0; k < neighbor_count; k++)
      {
         hood_box = hypre_BoxArrayBox(neighbor_boxes, k);
         /* check the stencil grid to see if it makes sense to intersect */
         for (d = 0; d < 3; d++)
         {
            sgindex[d] = 1;
            
            s = hypre_BoxIMinD(box, d) - hypre_BoxIMaxD(hood_box, d);
            if (s > 0)
            {
               sgindex[d] = 2;
            }
            s = hypre_BoxIMinD(hood_box, d) - hypre_BoxIMaxD(box, d);
            if (s > 0)
            {
               sgindex[d] = 0;
            }
         }
         /* it makes sense only if we have at least one non-zero entry */   
         if (stencil_grid[sgindex[0]][sgindex[1]][sgindex[2]])
         {
            /* grow the neighbor box and intersect */
            hypre_CopyBox(hood_box, grow_box);
            for (d = 0; d < 3; d++)
            {
               hypre_BoxIMinD(grow_box, d) -= grow[d][0];
               hypre_BoxIMaxD(grow_box, d) += grow[d][1];
            }
            hypre_IntersectBoxes(box, grow_box, int_box);
            /* if we have a positive volume box, this is a send region */
            if (hypre_BoxVolume(int_box))
            {
               /* keep track of which neighbor: k... */
               cboxes_neighbor_location[num_cboxes] = k;
               cboxes[num_cboxes] = &cboxes_mem[num_cboxes];
               /* keep the intersected box */
               hypre_CopyBox(int_box, cboxes[num_cboxes]);
               num_cboxes++;
            }
         }
      }/* end of loop through neighbors */

      /* create send_box_array and send_procs for box i */
      send_box_array = hypre_BoxArrayArrayBoxArray(send_boxes, i);
      hypre_BoxArraySetSize(send_box_array, num_cboxes);
      send_procs[i] = hypre_CTAlloc(HYPRE_Int, num_cboxes);
      send_rboxnums[i] = hypre_CTAlloc(HYPRE_Int, num_cboxes);
      send_rbox_array = hypre_BoxArrayArrayBoxArray(send_rboxes, i);
      hypre_BoxArraySetSize(send_rbox_array, num_cboxes);

      for (m = 0; m < num_cboxes; m++)
      {
         loc = cboxes_neighbor_location[m];
         send_procs[i][m] = neighbor_procs[loc];
         send_rboxnums[i][m] = neighbor_ids[loc];
         hypre_CopyBox(cboxes[m], hypre_BoxArrayBox(send_box_array, m));

         /* if periodic, positive shift before copying to the rbox_array */
         if (neighbor_shifts[loc]) /* periodic if shift != 0 */
         {
            pshift = hypre_StructGridPShift(grid, neighbor_shifts[loc]);
            hypre_BoxShiftPos(cboxes[m], pshift);
         }
         hypre_CopyBox(cboxes[m], hypre_BoxArrayBox(send_rbox_array, m));

         cboxes[m] = NULL;
      }
   } /* end of loop through each local box */

Пример #19

Показать файл

Файл: par_amg_solveT.c Проект: Chang-Liu-0520/hypre

HYPRE_Int
hypre_BoomerAMGCycleT( void              *amg_vdata, 
                   hypre_ParVector  **F_array,
                   hypre_ParVector  **U_array   )
{
   hypre_ParAMGData *amg_data = amg_vdata;

   /* Data Structure variables */

   hypre_ParCSRMatrix    **A_array;
   hypre_ParCSRMatrix    **P_array;
   hypre_ParCSRMatrix    **R_array;
   hypre_ParVector    *Vtemp;

   HYPRE_Int     **CF_marker_array;
   /* HYPRE_Int     **unknown_map_array; */
   /* HYPRE_Int     **point_map_array; */
   /* HYPRE_Int     **v_at_point_array; */

   HYPRE_Real    cycle_op_count;   
   HYPRE_Int       cycle_type;
   HYPRE_Int       num_levels;
   HYPRE_Int       max_levels;

   HYPRE_Real   *num_coeffs;
   HYPRE_Int      *num_grid_sweeps;   
   HYPRE_Int      *grid_relax_type;   
   HYPRE_Int     **grid_relax_points;  
 
   /* Local variables  */

   HYPRE_Int      *lev_counter;
   HYPRE_Int       Solve_err_flag;
   HYPRE_Int       k;
   HYPRE_Int       j;
   HYPRE_Int       level;
   HYPRE_Int       cycle_param;
   HYPRE_Int       coarse_grid;
   HYPRE_Int       fine_grid;
   HYPRE_Int       Not_Finished;
   HYPRE_Int       num_sweep;
   HYPRE_Int       relax_type;
   HYPRE_Int       relax_points;
   HYPRE_Real   *relax_weight;

   HYPRE_Int       relax_local;
   HYPRE_Int       relax_order;
   HYPRE_Int       old_version = 0;


   HYPRE_Real    alpha;
   HYPRE_Real    beta;
#if 0
   HYPRE_Real   *D_mat;
   HYPRE_Real   *S_vec;
#endif
   
   /* Acquire data and allocate storage */

   A_array           = hypre_ParAMGDataAArray(amg_data);
   P_array           = hypre_ParAMGDataPArray(amg_data);
   R_array           = hypre_ParAMGDataRArray(amg_data);
   CF_marker_array   = hypre_ParAMGDataCFMarkerArray(amg_data);
   /* unknown_map_array = hypre_ParAMGDataUnknownMapArray(amg_data); */
   /* point_map_array   = hypre_ParAMGDataPointMapArray(amg_data); */
   /* v_at_point_array  = hypre_ParAMGDataVatPointArray(amg_data); */
   Vtemp             = hypre_ParAMGDataVtemp(amg_data);
   num_levels        = hypre_ParAMGDataNumLevels(amg_data);
   max_levels        = hypre_ParAMGDataMaxLevels(amg_data);
   cycle_type        = hypre_ParAMGDataCycleType(amg_data);
   /* num_unknowns      =  hypre_ParCSRMatrixNumRows(A_array[0]); */

   num_grid_sweeps     = hypre_ParAMGDataNumGridSweeps(amg_data);
   grid_relax_type     = hypre_ParAMGDataGridRelaxType(amg_data);
   grid_relax_points   = hypre_ParAMGDataGridRelaxPoints(amg_data);
   relax_weight        = hypre_ParAMGDataRelaxWeight(amg_data); 

   relax_order         = hypre_ParAMGDataRelaxOrder(amg_data);

   cycle_op_count = hypre_ParAMGDataCycleOpCount(amg_data);

   lev_counter = hypre_CTAlloc(HYPRE_Int, num_levels);

   /* Initialize */

   Solve_err_flag = 0;

   if (grid_relax_points) old_version = 1;

   num_coeffs = hypre_CTAlloc(HYPRE_Real, num_levels);
   num_coeffs[0]    = hypre_ParCSRMatrixDNumNonzeros(A_array[0]);

   for (j = 1; j < num_levels; j++)
      num_coeffs[j] = hypre_ParCSRMatrixDNumNonzeros(A_array[j]);

   /*---------------------------------------------------------------------
    *    Initialize cycling control counter
    *
    *     Cycling is controlled using a level counter: lev_counter[k]
    *     
    *     Each time relaxation is performed on level k, the
    *     counter is decremented by 1. If the counter is then
    *     negative, we go to the next finer level. If non-
    *     negative, we go to the next coarser level. The
    *     following actions control cycling:
    *     
    *     a. lev_counter[0] is initialized to 1.
    *     b. lev_counter[k] is initialized to cycle_type for k>0.
    *     
    *     c. During cycling, when going down to level k, lev_counter[k]
    *        is set to the max of (lev_counter[k],cycle_type)
    *---------------------------------------------------------------------*/

   Not_Finished = 1;

   lev_counter[0] = 1;
   for (k = 1; k < num_levels; ++k) 
   {
      lev_counter[k] = cycle_type;
   }

   level = 0;
   cycle_param = 0;

   /*---------------------------------------------------------------------
    * Main loop of cycling
    *--------------------------------------------------------------------*/
  
   while (Not_Finished)
   {
      num_sweep = num_grid_sweeps[cycle_param];
      relax_type = grid_relax_type[cycle_param];
      if (relax_type != 7 && relax_type != 9) relax_type = 7;
      /*------------------------------------------------------------------
       * Do the relaxation num_sweep times
       *-----------------------------------------------------------------*/

      for (j = 0; j < num_sweep; j++)
      {

         if (num_levels == 1 && max_levels > 1)
         {
            relax_points = 0;
            relax_local = 0;
         }
         else
         {
            if (old_version)
               relax_points = grid_relax_points[cycle_param][j];
            relax_local = relax_order;
         }

         /*-----------------------------------------------
          * VERY sloppy approximation to cycle complexity
          *-----------------------------------------------*/

         if (old_version && level < num_levels -1)
         {
            switch (relax_points)
            {
               case 1:
               cycle_op_count += num_coeffs[level+1];
               break;
  
               case -1: 
               cycle_op_count += (num_coeffs[level]-num_coeffs[level+1]); 
               break;
            }
         }
	 else
         {
            cycle_op_count += num_coeffs[level]; 
         }

         /* note: this does not use relax_points, so it doesn't matter if
            its the "old version" */
         
         Solve_err_flag = hypre_BoomerAMGRelaxT(A_array[level], 
                                                F_array[level],
                                                CF_marker_array[level],
                                                relax_type,
                                                relax_points,
                                                relax_weight[level],
                                                U_array[level],
                                                Vtemp);
        
         
         if (Solve_err_flag != 0)
         {
            hypre_TFree(lev_counter);
            hypre_TFree(num_coeffs);
            return(Solve_err_flag);
         }
      }


      /*------------------------------------------------------------------
       * Decrement the control counter and determine which grid to visit next
       *-----------------------------------------------------------------*/

      --lev_counter[level];
       
      if (lev_counter[level] >= 0 && level != num_levels-1)
      {
                               
         /*---------------------------------------------------------------
          * Visit coarser level next.  Compute residual using hypre_ParCSRMatrixMatvec.
          * Use interpolation (since transpose i.e. P^TATR instead of
          * RAP) using hypre_ParCSRMatrixMatvecT.
          * Reset counters and cycling parameters for coarse level
          *--------------------------------------------------------------*/

         fine_grid = level;
         coarse_grid = level + 1;

         hypre_ParVectorSetConstantValues(U_array[coarse_grid], 0.0);
          
         hypre_ParVectorCopy(F_array[fine_grid],Vtemp);
         alpha = -1.0;
         beta = 1.0;
         hypre_ParCSRMatrixMatvecT(alpha, A_array[fine_grid], U_array[fine_grid],
                         beta, Vtemp);

         alpha = 1.0;
         beta = 0.0;

         hypre_ParCSRMatrixMatvecT(alpha,P_array[fine_grid],Vtemp,
                          beta,F_array[coarse_grid]);

         ++level;
         lev_counter[level] = hypre_max(lev_counter[level],cycle_type);
         cycle_param = 1;
         if (level == num_levels-1) cycle_param = 3;
      }

      else if (level != 0)
      {
                            
         /*---------------------------------------------------------------
          * Visit finer level next.
          * Use restriction (since transpose i.e. P^TA^TR instead of RAP)
          * and add correction using hypre_ParCSRMatrixMatvec.
          * Reset counters and cycling parameters for finer level.
          *--------------------------------------------------------------*/

         fine_grid = level - 1;
         coarse_grid = level;
         alpha = 1.0;
         beta = 1.0;

         hypre_ParCSRMatrixMatvec(alpha, R_array[fine_grid], U_array[coarse_grid],
                         beta, U_array[fine_grid]);            
 
         --level;
         cycle_param = 2;
         if (level == 0) cycle_param = 0;
      }
      else
      {
         Not_Finished = 0;
      }
   }

   hypre_ParAMGDataCycleOpCount(amg_data) = cycle_op_count;

   hypre_TFree(lev_counter);
   hypre_TFree(num_coeffs);

   return(Solve_err_flag);
}

Пример #20

Показать файл

Файл: par_jacobi_interp.c Проект: Chang-Liu-0520/hypre

void hypre_BoomerAMGTruncateInterp( hypre_ParCSRMatrix *P,
                                    HYPRE_Real eps, HYPRE_Real dlt,
                                    HYPRE_Int * CF_marker )
/* Truncate the interpolation matrix P, but only in rows for which the
   marker is <0.  Truncation means that an element P(i,j) is set to 0 if
   P(i,j)>0 and P(i,j)<eps*max( P(i,j) )  or if
   P(i,j)>0 and P(i,j)<dlt*max( -P(i,j) )  or if
   P(i,j)<0 and P(i,j)>dlt*min( -P(i,j) )  or if
   P(i,j)<0 and P(i,j)>eps*min( P(i,j) )
      ( 0<eps,dlt<1, typically 0.1=dlt<eps=0.2, )
   The min and max are only computed locally, as I'm guessing that there isn't
   usually much to be gained (in the way of improved performance) by getting
   them perfectly right.
*/

/* The function hypre_BoomerAMGInterpTruncation in par_interp.c is
   very similar.  It looks at fabs(value) rather than separately
   dealing with value<0 and value>0 as recommended by Klaus Stuben,
   thus as this function does.  In this function, only "marked" rows
   are affected.  Lastly, in hypre_BoomerAMGInterpTruncation, if any
   element gets discarded, it reallocates arrays to the new size.
*/
{
   hypre_CSRMatrix *P_diag = hypre_ParCSRMatrixDiag(P);
   hypre_CSRMatrix *P_offd = hypre_ParCSRMatrixOffd(P);
   HYPRE_Real      *P_diag_data = hypre_CSRMatrixData(P_diag);
   HYPRE_Int             *P_diag_i = hypre_CSRMatrixI(P_diag);
   HYPRE_Int             *P_diag_j = hypre_CSRMatrixJ(P_diag);
   HYPRE_Real      *P_offd_data = hypre_CSRMatrixData(P_offd);
   HYPRE_Int             *P_offd_i = hypre_CSRMatrixI(P_offd);
   HYPRE_Int             *P_offd_j = hypre_CSRMatrixJ(P_offd);
   HYPRE_Int             *new_P_diag_i;
   HYPRE_Int             *new_P_offd_i;
   HYPRE_Int	num_rows_diag_P = hypre_CSRMatrixNumRows(P_diag);
   HYPRE_Int	num_rows_offd_P = hypre_CSRMatrixNumRows(P_offd);
   HYPRE_Int num_nonzeros_diag = hypre_CSRMatrixNumNonzeros(P_diag);
   HYPRE_Int num_nonzeros_offd = hypre_CSRMatrixNumNonzeros(P_offd);
#if 0
   MPI_Comm comm = hypre_ParCSRMatrixComm( P );
   HYPRE_Real vmax1, vmin1;
#endif
   HYPRE_Real vmax = 0.0;
   HYPRE_Real vmin = 0.0;
   HYPRE_Real v, old_sum, new_sum, scale, wmax, wmin;
   HYPRE_Int i1, m, m1d, m1o;

   /* compute vmax = eps*max(P(i,j)), vmin = eps*min(P(i,j)) */
   for ( i1 = 0; i1 < num_rows_diag_P; i1++ )
   {
      for ( m=P_diag_i[i1]; m<P_diag_i[i1+1]; ++m )
         {
            v = P_diag_data[m];
            vmax = hypre_max( v, vmax );
            vmin = hypre_min( v, vmin );
         }
      for ( m=P_offd_i[i1]; m<P_offd_i[i1+1]; ++m )
         {
            v = P_offd_data[m];
            vmax = hypre_max( v, vmax );
            vmin = hypre_min( v, vmin );
         }
   }
#if 0
   /* This can make max,min global so results don't depend on no. processors
      We don't want this except for testing, or maybe this could be put
      someplace better.  I don't like adding communication here, for a minor reason.
   */
   vmax1 = vmax; vmin1 = vmin;
   hypre_MPI_Allreduce( &vmax1, &vmax, 1, HYPRE_MPI_REAL, hypre_MPI_MAX, comm );
   hypre_MPI_Allreduce( &vmin1, &vmin, 1, HYPRE_MPI_REAL, hypre_MPI_MIN, comm );
#endif
   if ( vmax <= 0.0 ) vmax =  1.0;  /* make sure no v is v>vmax if no v is v>0 */
   if ( vmin >= 0.0 ) vmin = -1.0;  /* make sure no v is v<vmin if no v is v<0 */
   wmax = - dlt * vmin;
   wmin = - dlt * vmax;
   vmax *= eps;
   vmin *= eps;

   /* Repack the i,j,and data arrays so as to discard the small elements of P.
      Elements of Coarse rows (CF_marker>=0) are always kept.
      The arrays are not re-allocated, so there will generally be unused space
      at the ends of the arrays. */
   new_P_diag_i = hypre_CTAlloc( HYPRE_Int, num_rows_diag_P+1 );
   new_P_offd_i = hypre_CTAlloc( HYPRE_Int, num_rows_offd_P+1 );
   m1d = P_diag_i[0];
   m1o = P_offd_i[0];
   for ( i1 = 0; i1 < num_rows_diag_P; i1++ )
   {
      old_sum = 0;
      new_sum = 0;
      for ( m=P_diag_i[i1]; m<P_diag_i[i1+1]; ++m )
      {
         v = P_diag_data[m];
         old_sum += v;
         if ( CF_marker[i1]>=0 || ( v>=vmax && v>=wmax ) || ( v<=vmin && v<=wmin ) )
         {  /* keep v */
            new_sum += v;
            P_diag_j[m1d] = P_diag_j[m];
            P_diag_data[m1d] = P_diag_data[m];
            ++m1d;
         }
         else
         {  /* discard v */
            --num_nonzeros_diag;
         }
      }
      for ( m=P_offd_i[i1]; m<P_offd_i[i1+1]; ++m )
      {
         v = P_offd_data[m];
         old_sum += v;
         if ( CF_marker[i1]>=0 || ( v>=vmax && v>=wmax ) || ( v<=vmin && v<=wmin ) )
         {  /* keep v */
            new_sum += v;
            P_offd_j[m1o] = P_offd_j[m];
            P_offd_data[m1o] = P_offd_data[m];
            ++m1o;
         }
         else
         {  /* discard v */
            --num_nonzeros_offd;
         }
      }

      new_P_diag_i[i1+1] = m1d;
      if ( i1<num_rows_offd_P ) new_P_offd_i[i1+1] = m1o;

      /* rescale to keep row sum the same */
      if (new_sum!=0) scale = old_sum/new_sum; else scale = 1.0;
      for ( m=new_P_diag_i[i1]; m<new_P_diag_i[i1+1]; ++m )
         P_diag_data[m] *= scale;
      if ( i1<num_rows_offd_P ) /* this test fails when there is no offd block */
         for ( m=new_P_offd_i[i1]; m<new_P_offd_i[i1+1]; ++m )
            P_offd_data[m] *= scale;

   }

   for ( i1 = 1; i1 <= num_rows_diag_P; i1++ )
   {
      P_diag_i[i1] = new_P_diag_i[i1];
      if ( i1<=num_rows_offd_P && num_nonzeros_offd>0 ) P_offd_i[i1] = new_P_offd_i[i1];
   }
   hypre_TFree( new_P_diag_i );
   if ( num_rows_offd_P>0 ) hypre_TFree( new_P_offd_i );

   hypre_CSRMatrixNumNonzeros(P_diag) = num_nonzeros_diag;
   hypre_CSRMatrixNumNonzeros(P_offd) = num_nonzeros_offd;
   hypre_ParCSRMatrixSetDNumNonzeros( P );
   hypre_ParCSRMatrixSetNumNonzeros( P );

}

Пример #21

Показать файл

Файл: par_jacobi_interp.c Проект: Chang-Liu-0520/hypre

void hypre_BoomerAMGJacobiInterp_1( hypre_ParCSRMatrix * A,
                                    hypre_ParCSRMatrix ** P,
                                    hypre_ParCSRMatrix * S,
                                    HYPRE_Int * CF_marker, HYPRE_Int level,
                                    HYPRE_Real truncation_threshold,
                                    HYPRE_Real truncation_threshold_minus,
                                    HYPRE_Int * dof_func, HYPRE_Int * dof_func_offd,
                                    HYPRE_Real weight_AF)
/* One step of Jacobi interpolation:
   A is the linear system.
   P is an interpolation matrix, input and output
   CF_marker identifies coarse and fine points
   If we imagine P and A as split into coarse and fine submatrices,

       [ AFF  AFC ]   [ AF ]            [ IFC ]
   A = [          ] = [    ] ,      P = [     ]
       [ ACF  ACC ]   [ AC ]            [ ICC ]
   (note that ICC is an identity matrix, applied to coarse points only)
   then this function computes

   IFCnew = IFCold - DFF(-1) * ( AFF*IFCold + AFC )
          = IFCold - DFF(-1) * AF * Pold)
   where DFF is the diagonal of AFF, (-1) represents the inverse, and
   where "old" denotes a value on entry to this function, "new" a returned value.

*/
{
   hypre_ParCSRMatrix * Pnew;
   hypre_ParCSRMatrix * C;
   hypre_CSRMatrix *P_diag = hypre_ParCSRMatrixDiag(*P);
   hypre_CSRMatrix *P_offd = hypre_ParCSRMatrixOffd(*P);
   HYPRE_Real      *P_diag_data = hypre_CSRMatrixData(P_diag);
   HYPRE_Int             *P_diag_i = hypre_CSRMatrixI(P_diag);
   HYPRE_Int             *P_diag_j = hypre_CSRMatrixJ(P_diag);
   HYPRE_Real      *P_offd_data = hypre_CSRMatrixData(P_offd);
   HYPRE_Int             *P_offd_i = hypre_CSRMatrixI(P_offd);
   hypre_CSRMatrix *C_diag;
   hypre_CSRMatrix *C_offd;
   hypre_CSRMatrix *Pnew_diag;
   hypre_CSRMatrix *Pnew_offd;
   HYPRE_Int	num_rows_diag_P = hypre_CSRMatrixNumRows(P_diag);
   HYPRE_Int i;
   HYPRE_Int Jnochanges=0, Jchanges, Pnew_num_nonzeros;
   HYPRE_Int CF_coarse=0;
   HYPRE_Int * J_marker = hypre_CTAlloc( HYPRE_Int, num_rows_diag_P );
   HYPRE_Int nc, ncmax, ncmin, nc1;
   HYPRE_Int num_procs, my_id;
   MPI_Comm comm = hypre_ParCSRMatrixComm( A );
#ifdef HYPRE_JACINT_PRINT_ROW_SUMS
   HYPRE_Int m, nmav, npav;
   HYPRE_Real PIi, PIimax, PIimin, PIimav, PIipav, randthresh;
   HYPRE_Real eps = 1.0e-17;
#endif
#ifdef HYPRE_JACINT_PRINT_MATRICES
   char filename[80];
   HYPRE_Int i_dummy, j_dummy;
   HYPRE_Int *base_i_ptr = &i_dummy;
   HYPRE_Int *base_j_ptr = &j_dummy;
#endif
#ifdef HYPRE_JACINT_PRINT_SOME_ROWS
   HYPRE_Int sample_rows[50], n_sample_rows=0, isamp;
#endif

   hypre_MPI_Comm_size(comm, &num_procs);   
   hypre_MPI_Comm_rank(comm,&my_id);


   for ( i=0; i<num_rows_diag_P; ++i )
   {
      J_marker[i] = CF_marker[i];
      if (CF_marker[i]>=0) ++CF_coarse;
   }
#ifdef HYPRE_JACINT_PRINT_DIAGNOSTICS
   hypre_printf("%i %i Jacobi_Interp_1, P has %i+%i=%i nonzeros, local sum %e\n", my_id, level,
          hypre_CSRMatrixNumNonzeros(P_diag), hypre_CSRMatrixNumNonzeros(P_offd),
          hypre_CSRMatrixNumNonzeros(P_diag)+hypre_CSRMatrixNumNonzeros(P_offd),
          hypre_ParCSRMatrixLocalSumElts(*P) );
#endif

   /* row sum computations, for output */
#ifdef HYPRE_JACINT_PRINT_ROW_SUMS
   PIimax=-1.0e12, PIimin=1.0e12, PIimav=0, PIipav=0;
   nmav=0, npav=0;
   for ( i=0; i<num_rows_diag_P; ++i )
   {
      PIi = 0;  /* i-th value of P*1, i.e. sum of row i of P */
      for ( m=P_diag_i[i]; m<P_diag_i[i+1]; ++m )
         PIi += P_diag_data[m];
      for ( m=P_offd_i[i]; m<P_offd_i[i+1]; ++m )
         PIi += P_offd_data[m];
      if (CF_marker[i]<0)
      {
         PIimax = hypre_max( PIimax, PIi );
         PIimin = hypre_min( PIimin, PIi );
         if (PIi<=1-eps) { PIimav+=PIi; ++nmav; };
         if (PIi>=1+eps) { PIipav+=PIi; ++npav; };
      }
   }
   if ( nmav>0 ) PIimav = PIimav/nmav;
   if ( npav>0 ) PIipav = PIipav/npav;
   hypre_printf("%i %i P in max,min row sums %e %e\n", my_id, level, PIimax, PIimin );
#endif

   ncmax=0; ncmin=num_rows_diag_P; nc1=0;
   for ( i=0; i<num_rows_diag_P; ++i )
      if (CF_marker[i]<0)
      {
         nc = P_diag_i[i+1] - P_diag_i[i];
         if (nc<=1)
         {
            ++nc1;
         }
         ncmax = hypre_max( nc, ncmax );
         ncmin = hypre_min( nc, ncmin );
      }
#if 0
   /* a very agressive reduction in how much the Jacobi step does: */
   for ( i=0; i<num_rows_diag_P; ++i )
      if (CF_marker[i]<0)
      {
         nc = P_diag_i[i+1] - P_diag_i[i];
         if (nc>ncmin+1)
            /*if ( nc > ncmin + 0.5*(ncmax-ncmin) )*/
         {
            J_marker[i] = 1;
            ++Jnochanges;
         }
      }
#endif
   Jchanges = num_rows_diag_P - Jnochanges - CF_coarse;

#ifdef HYPRE_JACINT_PRINT_SOME_ROWS
   hypre_printf("some rows to be changed: ");
   randthresh = 15/(HYPRE_Real)Jchanges;
   for ( i=0; i<num_rows_diag_P; ++i )
   {
      if ( J_marker[i]<0 )
      {
         if ( ((HYPRE_Real)rand())/RAND_MAX < randthresh )
         {
            hypre_printf( "%i: ", i );
            for ( m=P_diag_i[i]; m<P_diag_i[i+1]; ++m )
               hypre_printf( " %i %f, ", P_diag_j[m], P_diag_data[m] );
            hypre_printf(";  ");
            sample_rows[n_sample_rows] = i;
            ++n_sample_rows;
         }
      }
   }
   hypre_printf("\n");
#endif
#ifdef HYPRE_JACINT_PRINT_DIAGNOSTICS
   hypre_printf("%i %i P has %i rows, %i changeable, %i don't change-good, %i coarse\n",
          my_id, level, num_rows_diag_P, Jchanges, Jnochanges, CF_coarse );
   hypre_printf("%i %i min,max diag cols per row: %i, %i;  no.rows w.<=1 col: %i\n", my_id, level, ncmin, ncmax, nc1 );
#endif
#ifdef HYPRE_JACINT_PRINT_MATRICES
   if ( num_rows_diag_P <= HYPRE_MAX_PRINTABLE_MATRIX )
   {
      hypre_sprintf( filename, "Ain%i", level );
      hypre_ParCSRMatrixPrintIJ( A,0,0,filename);
      hypre_sprintf( filename, "Sin%i", level );
      hypre_ParCSRMatrixPrintIJ( S,0,0,filename);
      hypre_sprintf( filename, "Pin%i", level );
      hypre_ParCSRMatrixPrintIJ( *P,0,0,filename);
   }
#endif

   C = hypre_ParMatmul_FC( A, *P, J_marker, dof_func, dof_func_offd );
   /* hypre_parMatmul_FC creates and returns C, a variation of the
      matrix product A*P in which only the "Fine"-designated rows have
      been computed.  (all columns are Coarse because all columns of P
      are).  "Fine" is defined solely by the marker array, and for
      example could be a proper subset of the fine points of a
      multigrid hierarchy.
      As a matrix, C is the size of A*P.  But only the marked rows have
      been computed.
   */
#ifdef HYPRE_JACINT_PRINT_MATRICES
   hypre_sprintf( filename, "C%i", level );
   if ( num_rows_diag_P <= HYPRE_MAX_PRINTABLE_MATRIX ) hypre_ParCSRMatrixPrintIJ( C,0,0,filename);
#endif
   C_diag = hypre_ParCSRMatrixDiag(C);
   C_offd = hypre_ParCSRMatrixOffd(C);
#ifdef HYPRE_JACINT_PRINT_DIAGNOSTICS
   hypre_printf("%i %i Jacobi_Interp_1 after matmul, C has %i+%i=%i nonzeros, local sum %e\n",
          my_id, level, hypre_CSRMatrixNumNonzeros(C_diag),
          hypre_CSRMatrixNumNonzeros(C_offd),
          hypre_CSRMatrixNumNonzeros(C_diag)+hypre_CSRMatrixNumNonzeros(C_offd),
          hypre_ParCSRMatrixLocalSumElts(C) );
#endif

   hypre_ParMatScaleDiagInv_F( C, A, weight_AF, J_marker );
   /* hypre_ParMatScaleDiagInv scales of its first argument by premultiplying with
      a submatrix of the inverse of the diagonal of its second argument.
      The marker array determines which diagonal elements are used.  The marker
      array should select exactly the right number of diagonal elements (the number
      of rows of AP_FC).
   */
#ifdef HYPRE_JACINT_PRINT_MATRICES
   hypre_sprintf( filename, "Cout%i", level );
   if ( num_rows_diag_P <= HYPRE_MAX_PRINTABLE_MATRIX )  hypre_ParCSRMatrixPrintIJ( C,0,0,filename);
#endif

   Pnew = hypre_ParMatMinus_F( *P, C, J_marker );
   /* hypre_ParMatMinus_F subtracts rows of its second argument from selected rows
      of its first argument.  The marker array determines which rows of the first
      argument are affected, and they should exactly correspond to all the rows
      of the second argument.
   */
   Pnew_diag = hypre_ParCSRMatrixDiag(Pnew);
   Pnew_offd = hypre_ParCSRMatrixOffd(Pnew);
   Pnew_num_nonzeros = hypre_CSRMatrixNumNonzeros(Pnew_diag)+hypre_CSRMatrixNumNonzeros(Pnew_offd);
#ifdef HYPRE_JACINT_PRINT_DIAGNOSTICS
   hypre_printf("%i %i Jacobi_Interp_1 after MatMinus, Pnew has %i+%i=%i nonzeros, local sum %e\n",
          my_id, level, hypre_CSRMatrixNumNonzeros(Pnew_diag),
          hypre_CSRMatrixNumNonzeros(Pnew_offd), Pnew_num_nonzeros,
          hypre_ParCSRMatrixLocalSumElts(Pnew) );
#endif

   /* Transfer ownership of col_starts from P to Pnew  ... */
   if ( hypre_ParCSRMatrixColStarts(*P) &&
        hypre_ParCSRMatrixColStarts(*P)==hypre_ParCSRMatrixColStarts(Pnew) )
   {
      if ( hypre_ParCSRMatrixOwnsColStarts(*P) && !hypre_ParCSRMatrixOwnsColStarts(Pnew) )
      {
         hypre_ParCSRMatrixSetColStartsOwner(*P,0);
         hypre_ParCSRMatrixSetColStartsOwner(Pnew,1);
      }
   }

   hypre_ParCSRMatrixDestroy( C );
   hypre_ParCSRMatrixDestroy( *P );

   /* Note that I'm truncating all the fine rows, not just the J-marked ones. */
#if 0
   if ( Pnew_num_nonzeros < 10000 )  /* a fixed number like this makes it no.procs.-depdendent */
   {  /* ad-hoc attempt to reduce zero-matrix problems seen in testing..*/
      truncation_threshold = 1.0e-6 * truncation_threshold; 
      truncation_threshold_minus = 1.0e-6 * truncation_threshold_minus;
  }
#endif
   hypre_BoomerAMGTruncateInterp( Pnew, truncation_threshold,
                                  truncation_threshold_minus, CF_marker );

   hypre_MatvecCommPkgCreate ( Pnew );


   *P = Pnew;

   P_diag = hypre_ParCSRMatrixDiag(*P);
   P_offd = hypre_ParCSRMatrixOffd(*P);
   P_diag_data = hypre_CSRMatrixData(P_diag);
   P_diag_i = hypre_CSRMatrixI(P_diag);
   P_diag_j = hypre_CSRMatrixJ(P_diag);
   P_offd_data = hypre_CSRMatrixData(P_offd);
   P_offd_i = hypre_CSRMatrixI(P_offd);

   /* row sum computations, for output */
#ifdef HYPRE_JACINT_PRINT_ROW_SUMS
   PIimax=-1.0e12, PIimin=1.0e12, PIimav=0, PIipav=0;
   nmav=0, npav=0;
   for ( i=0; i<num_rows_diag_P; ++i )
   {
      PIi = 0;  /* i-th value of P*1, i.e. sum of row i of P */
      for ( m=P_diag_i[i]; m<P_diag_i[i+1]; ++m )
         PIi += P_diag_data[m];
      for ( m=P_offd_i[i]; m<P_offd_i[i+1]; ++m )
         PIi += P_offd_data[m];
      if (CF_marker[i]<0)
      {
         PIimax = hypre_max( PIimax, PIi );
         PIimin = hypre_min( PIimin, PIi );
         if (PIi<=1-eps) { PIimav+=PIi; ++nmav; };
         if (PIi>=1+eps) { PIipav+=PIi; ++npav; };
      }
   }
   if ( nmav>0 ) PIimav = PIimav/nmav;
   if ( npav>0 ) PIipav = PIipav/npav;
   hypre_printf("%i %i P out max,min row sums %e %e\n", my_id, level, PIimax, PIimin );
#endif

#ifdef HYPRE_JACINT_PRINT_SOME_ROWS
   hypre_printf("some changed rows: ");
   for ( isamp=0; isamp<n_sample_rows; ++isamp )
   {
      i = sample_rows[isamp];
      hypre_printf( "%i: ", i );
      for ( m=P_diag_i[i]; m<P_diag_i[i+1]; ++m )
         hypre_printf( " %i %f, ", P_diag_j[m], P_diag_data[m] );
      hypre_printf(";  ");
   }
   hypre_printf("\n");
#endif
   ncmax=0; ncmin=num_rows_diag_P; nc1=0;
   for ( i=0; i<num_rows_diag_P; ++i )
      if (CF_marker[i]<0)
      {
         nc = P_diag_i[i+1] - P_diag_i[i];
         if (nc<=1) ++nc1;
         ncmax = hypre_max( nc, ncmax );
         ncmin = hypre_min( nc, ncmin );
      }
#ifdef HYPRE_JACINT_PRINT_DIAGNOSTICS
   hypre_printf("%i %i P has %i rows, %i changeable, %i too good, %i coarse\n",
          my_id, level, num_rows_diag_P, num_rows_diag_P-Jnochanges-CF_coarse, Jnochanges, CF_coarse );
   hypre_printf("%i %i min,max diag cols per row: %i, %i;  no.rows w.<=1 col: %i\n", my_id, level, ncmin, ncmax, nc1 );

   hypre_printf("%i %i Jacobi_Interp_1 after truncation (%e), Pnew has %i+%i=%i nonzeros, local sum %e\n",
          my_id, level, truncation_threshold,
          hypre_CSRMatrixNumNonzeros(Pnew_diag), hypre_CSRMatrixNumNonzeros(Pnew_offd),
          hypre_CSRMatrixNumNonzeros(Pnew_diag)+hypre_CSRMatrixNumNonzeros(Pnew_offd),
          hypre_ParCSRMatrixLocalSumElts(Pnew) );
#endif

   /* Programming Notes:
      1. Judging by around line 299 of par_interp.c, they typical use of CF_marker
      is that CF_marker>=0 means Coarse, CF_marker<0 means Fine.
   */
#ifdef HYPRE_JACINT_PRINT_MATRICES
   hypre_sprintf( filename, "Pout%i", level );
   if ( num_rows_diag_P <= HYPRE_MAX_PRINTABLE_MATRIX )  hypre_ParCSRMatrixPrintIJ( *P,0,0,filename);
#endif

   hypre_TFree( J_marker );
      
}

Пример #22

Показать файл

Файл: bicgstab.c Проект: ducpdx/hypre

HYPRE_Int
hypre_BiCGSTABSolve(void  *bicgstab_vdata,
                 void  *A,
                 void  *b,
		 void  *x)
{
	hypre_BiCGSTABData  *bicgstab_data   = (hypre_BiCGSTABData*)bicgstab_vdata;
   hypre_BiCGSTABFunctions *bicgstab_functions = bicgstab_data->functions;

   HYPRE_Int               min_iter     = (bicgstab_data -> min_iter);
   HYPRE_Int 		     max_iter     = (bicgstab_data -> max_iter);
   HYPRE_Int 		     stop_crit    = (bicgstab_data -> stop_crit);
   HYPRE_Real 	     r_tol     = (bicgstab_data -> tol);
   HYPRE_Real 	     cf_tol       = (bicgstab_data -> cf_tol);
   void             *matvec_data  = (bicgstab_data -> matvec_data);
   HYPRE_Real        a_tol        = (bicgstab_data -> a_tol);
  
   

   void             *r            = (bicgstab_data -> r);
   void             *r0           = (bicgstab_data -> r0);
   void             *s            = (bicgstab_data -> s);
   void             *v           = (bicgstab_data -> v);
   void             *p            = (bicgstab_data -> p);
   void             *q            = (bicgstab_data -> q);

   HYPRE_Int 	           (*precond)(void*,void*,void*,void*)   = (bicgstab_functions -> precond);
   HYPRE_Int 	            *precond_data = (HYPRE_Int*)(bicgstab_data -> precond_data);

   /* logging variables */
   HYPRE_Int             logging        = (bicgstab_data -> logging);
   HYPRE_Int             print_level    = (bicgstab_data -> print_level);
   HYPRE_Real     *norms          = (bicgstab_data -> norms);
   /*   char           *log_file_name  = (bicgstab_data -> log_file_name);
     FILE           *fp; */
   
   HYPRE_Int        iter; 
   HYPRE_Int        my_id, num_procs;
   HYPRE_Real alpha, beta, gamma, epsilon, temp, res, r_norm, b_norm;
   HYPRE_Real epsmac = 1.e-128; 
   HYPRE_Real ieee_check = 0.;
   HYPRE_Real cf_ave_0 = 0.0;
   HYPRE_Real cf_ave_1 = 0.0;
   HYPRE_Real weight;
   HYPRE_Real r_norm_0;
   HYPRE_Real den_norm;
   HYPRE_Real gamma_numer;
   HYPRE_Real gamma_denom;

   (bicgstab_data -> converged) = 0;

   (*(bicgstab_functions->CommInfo))(A,&my_id,&num_procs);
   if (logging > 0 || print_level > 0)
   {
      norms          = (bicgstab_data -> norms);
      /* log_file_name  = (bicgstab_data -> log_file_name);
         fp = fopen(log_file_name,"w"); */
   }

   /* initialize work arrays */
   (*(bicgstab_functions->CopyVector))(b,r0);

   /* compute initial residual */

   (*(bicgstab_functions->Matvec))(matvec_data,-1.0, A, x, 1.0, r0);
   (*(bicgstab_functions->CopyVector))(r0,r);
   (*(bicgstab_functions->CopyVector))(r0,p);

   b_norm = sqrt((*(bicgstab_functions->InnerProd))(b,b));

   /* Since it is does not diminish performance, attempt to return an error flag
      and notify users when they supply bad input. */
   if (b_norm != 0.) ieee_check = b_norm/b_norm; /* INF -> NaN conversion */
   if (ieee_check != ieee_check)
   {
      /* ...INFs or NaNs in input can make ieee_check a NaN.  This test
         for ieee_check self-equality works on all IEEE-compliant compilers/
         machines, c.f. page 8 of "Lecture Notes on the Status of IEEE 754"
         by W. Kahan, May 31, 1996.  Currently (July 2002) this paper may be
         found at http://HTTP.CS.Berkeley.EDU/~wkahan/ieee754status/IEEE754.PDF */
      if (logging > 0 || print_level > 0)
      {
        hypre_printf("\n\nERROR detected by Hypre ...  BEGIN\n");
        hypre_printf("ERROR -- hypre_BiCGSTABSolve: INFs and/or NaNs detected in input.\n");
        hypre_printf("User probably placed non-numerics in supplied b.\n");
        hypre_printf("Returning error flag += 101.  Program not terminated.\n");
        hypre_printf("ERROR detected by Hypre ...  END\n\n\n");
      }
      hypre_error(HYPRE_ERROR_GENERIC);
      return hypre_error_flag;
   }

   res = (*(bicgstab_functions->InnerProd))(r0,r0);
   r_norm = sqrt(res);
   r_norm_0 = r_norm;
 
   /* Since it is does not diminish performance, attempt to return an error flag
      and notify users when they supply bad input. */
   if (r_norm != 0.) ieee_check = r_norm/r_norm; /* INF -> NaN conversion */
   if (ieee_check != ieee_check)
   {
      /* ...INFs or NaNs in input can make ieee_check a NaN.  This test
         for ieee_check self-equality works on all IEEE-compliant compilers/
         machines, c.f. page 8 of "Lecture Notes on the Status of IEEE 754"
         by W. Kahan, May 31, 1996.  Currently (July 2002) this paper may be
         found at http://HTTP.CS.Berkeley.EDU/~wkahan/ieee754status/IEEE754.PDF */
      if (logging > 0 || print_level > 0)
      {
        hypre_printf("\n\nERROR detected by Hypre ...  BEGIN\n");
        hypre_printf("ERROR -- hypre_BiCGSTABSolve: INFs and/or NaNs detected in input.\n");
        hypre_printf("User probably placed non-numerics in supplied A or x_0.\n");
        hypre_printf("Returning error flag += 101.  Program not terminated.\n");
        hypre_printf("ERROR detected by Hypre ...  END\n\n\n");
      }

      hypre_error(HYPRE_ERROR_GENERIC);
      return hypre_error_flag;
   }

   if (logging > 0 || print_level > 0)
   {
      norms[0] = r_norm;
      if (print_level > 0 && my_id == 0)
      {
   	     hypre_printf("L2 norm of b: %e\n", b_norm);
         if (b_norm == 0.0)
            hypre_printf("Rel_resid_norm actually contains the residual norm\n");
         hypre_printf("Initial L2 norm of residual: %e\n", r_norm);
      }
   }
   iter = 0;

   if (b_norm > 0.0)
   {
      /* convergence criterion |r_i| <= r_tol*|b| if |b| > 0 */
      den_norm = b_norm;
   }
   else
   {
      /* convergence criterion |r_i| <= r_tol*|r0| if |b| = 0 */
      den_norm = r_norm;
   };

   /* convergence criterion |r_i| <= r_tol/a_tol , absolute residual norm*/
   if (stop_crit)
   {
      if (a_tol == 0.0) /* this is for backwards compatibility
                           (accomodating setting stop_crit to 1, but not setting a_tol) -
                           eventually we will get rid of the stop_crit flag as with GMRES */
         epsilon = r_tol;
      else
         epsilon = a_tol; /* this means new interface fcn called */
      
   }
   else /* default convergence test (stop_crit = 0)*/
   {
      
      /* convergence criteria: |r_i| <= max( a_tol, r_tol * den_norm)
      den_norm = |r_0| or |b|
      note: default for a_tol is 0.0, so relative residual criteria is used unless
            user also specifies a_tol or sets r_tol = 0.0, which means absolute
            tol only is checked  */
      
      epsilon = hypre_max(a_tol, r_tol*den_norm);
   
   }
   
   
   if (print_level > 0 && my_id == 0)
   {
      if (b_norm > 0.0)
         {hypre_printf("=============================================\n\n");
          hypre_printf("Iters     resid.norm     conv.rate  rel.res.norm\n");
          hypre_printf("-----    ------------    ---------- ------------\n");
      }
      else
         {hypre_printf("=============================================\n\n");
          hypre_printf("Iters     resid.norm     conv.rate\n");
          hypre_printf("-----    ------------    ----------\n");
      
      }
   }

   (bicgstab_data -> num_iterations) = iter;
   if (b_norm > 0.0)
      (bicgstab_data -> rel_residual_norm) = r_norm/b_norm;
   /* check for convergence before starting */
   if (r_norm == 0.0)
   {
	   return hypre_error_flag;
   }
   else if (r_norm <= epsilon && iter >= min_iter) 
   {
       if (print_level > 0 && my_id == 0)
       {
          hypre_printf("\n\n");
          hypre_printf("Tolerance and min_iter requirements satisfied by initial data.\n");
          hypre_printf("Final L2 norm of residual: %e\n\n", r_norm);
       }
       (bicgstab_data -> converged) = 1;
       return hypre_error_flag;
   }
   /* Start BiCGStab iterations */
   while (iter < max_iter)
   {
        iter++;

	(*(bicgstab_functions->ClearVector))(v);
        precond(precond_data, A, p, v);
        (*(bicgstab_functions->Matvec))(matvec_data,1.0,A,v,0.0,q);
      	temp = (*(bicgstab_functions->InnerProd))(r0,q);
      	if (fabs(temp) >= epsmac)
	   alpha = res/temp;
	else
	{
	   hypre_printf("BiCGSTAB broke down!! divide by near zero\n");
	   return(1);
	}
	(*(bicgstab_functions->Axpy))(alpha,v,x);
	(*(bicgstab_functions->Axpy))(-alpha,q,r);
	(*(bicgstab_functions->ClearVector))(v);
        precond(precond_data, A, r, v);
        (*(bicgstab_functions->Matvec))(matvec_data,1.0,A,v,0.0,s);
      	/* Handle case when gamma = 0.0/0.0 as 0.0 and not NAN */
        gamma_numer = (*(bicgstab_functions->InnerProd))(r,s);
        gamma_denom = (*(bicgstab_functions->InnerProd))(s,s);
        if ((gamma_numer == 0.0) && (gamma_denom == 0.0))
            gamma = 0.0;
        else
            gamma= gamma_numer/gamma_denom;
	(*(bicgstab_functions->Axpy))(gamma,v,x);
	(*(bicgstab_functions->Axpy))(-gamma,s,r);
    /* residual is now updated, must immediately check for convergence */
	r_norm = sqrt((*(bicgstab_functions->InnerProd))(r,r));
	if (logging > 0 || print_level > 0)
	{
	   norms[iter] = r_norm;
	}
    if (print_level > 0 && my_id == 0)
    {
        if (b_norm > 0.0)
           hypre_printf("% 5d    %e    %f   %e\n", iter, norms[iter],
                      norms[iter]/norms[iter-1], norms[iter]/b_norm);
        else
           hypre_printf("% 5d    %e    %f\n", iter, norms[iter],
		                             norms[iter]/norms[iter-1]);
	}
    /* check for convergence, evaluate actual residual */
	if (r_norm <= epsilon && iter >= min_iter) 
    {
	   (*(bicgstab_functions->CopyVector))(b,r);
           (*(bicgstab_functions->Matvec))(matvec_data,-1.0,A,x,1.0,r);
	   r_norm = sqrt((*(bicgstab_functions->InnerProd))(r,r));
	   if (r_norm <= epsilon)
       {
           if (print_level > 0 && my_id == 0)
           {
              hypre_printf("\n\n");
              hypre_printf("Final L2 norm of residual: %e\n\n", r_norm);
           }
           (bicgstab_data -> converged) = 1;
           break;
       }
    }
    /*--------------------------------------------------------------------
     * Optional test to see if adequate progress is being made.
     * The average convergence factor is recorded and compared
     * against the tolerance 'cf_tol'. The weighting factor is
     * intended to pay more attention to the test when an accurate
     * estimate for average convergence factor is available.
     *--------------------------------------------------------------------*/
    if (cf_tol > 0.0)
    {
       cf_ave_0 = cf_ave_1;
       cf_ave_1 = pow( r_norm / r_norm_0, 1.0/(2.0*iter));

       weight   = fabs(cf_ave_1 - cf_ave_0);
       weight   = weight / hypre_max(cf_ave_1, cf_ave_0);
       weight   = 1.0 - weight;
       if (weight * cf_ave_1 > cf_tol) break;
    }

      	if (fabs(res) >= epsmac)
           beta = 1.0/res;
	else
	{
	   hypre_printf("BiCGSTAB broke down!! res=0 \n");
	   return(2);
	}
        res = (*(bicgstab_functions->InnerProd))(r0,r);
        beta *= res;    
	(*(bicgstab_functions->Axpy))(-gamma,q,p);
      	if (fabs(gamma) >= epsmac)
           (*(bicgstab_functions->ScaleVector))((beta*alpha/gamma),p);
	else
	{
	   hypre_printf("BiCGSTAB broke down!! gamma=0 \n");
	   return(3);
	}
	(*(bicgstab_functions->Axpy))(1.0,r,p);
   } /* end while loop */
    
   (bicgstab_data -> num_iterations) = iter;
   if (b_norm > 0.0)
      (bicgstab_data -> rel_residual_norm) = r_norm/b_norm;
   if (b_norm == 0.0)
      (bicgstab_data -> rel_residual_norm) = r_norm;

   if (iter >= max_iter && r_norm > epsilon) hypre_error(HYPRE_ERROR_CONV);


   return hypre_error_flag;
}

Пример #23

Показать файл

Файл: struct_matrix.c Проект: kailiao/test-suite

int
hypre_StructMatrixInitializeShell( hypre_StructMatrix *matrix )
{
    int    ierr = 0;

    hypre_StructGrid     *grid;

    hypre_StructStencil  *user_stencil;
    hypre_StructStencil  *stencil;
    hypre_Index          *stencil_shape;
    int                   stencil_size;
    int                   num_values;
    int                  *symm_elements;

    int                  *num_ghost;
    int                   extra_ghost[] = {0, 0, 0, 0, 0, 0};

    hypre_BoxArray       *data_space;
    hypre_BoxArray       *boxes;
    hypre_Box            *box;
    hypre_Box            *data_box;

    int                 **data_indices;
    int                   data_size;
    int                   data_box_volume;

    int                   i, j, d;

    grid = hypre_StructMatrixGrid(matrix);

    /*-----------------------------------------------------------------------
     * Set up stencil and num_values:
     *    The stencil is a "symmetrized" version of the user's stencil
     *    as computed by hypre_StructStencilSymmetrize.
     *
     *    The `symm_elements' array is used to determine what data is
     *    explicitely stored (symm_elements[i] < 0) and what data does is
     *    not explicitely stored (symm_elements[i] >= 0), but is instead
     *    stored as the transpose coefficient at a neighboring grid point.
     *-----------------------------------------------------------------------*/

    if (hypre_StructMatrixStencil(matrix) == NULL)
    {
        user_stencil = hypre_StructMatrixUserStencil(matrix);

        hypre_StructStencilSymmetrize(user_stencil, &stencil, &symm_elements);

        stencil_shape = hypre_StructStencilShape(stencil);
        stencil_size  = hypre_StructStencilSize(stencil);

        if (!hypre_StructMatrixSymmetric(matrix))
        {
            /* store all element data */
            for (i = 0; i < stencil_size; i++)
                symm_elements[i] = -1;
            num_values = stencil_size;
        }
        else
        {
            num_values = (stencil_size + 1) / 2;
        }

        hypre_StructMatrixStencil(matrix)   = stencil;
        hypre_StructMatrixSymmElements(matrix) = symm_elements;
        hypre_StructMatrixNumValues(matrix) = num_values;
    }

    /*-----------------------------------------------------------------------
     * Set ghost-layer size for symmetric storage
     *   - All stencil coeffs are to be available at each point in the
     *     grid, as well as in the user-specified ghost layer.
     *-----------------------------------------------------------------------*/

    num_ghost     = hypre_StructMatrixNumGhost(matrix);
    stencil       = hypre_StructMatrixStencil(matrix);
    stencil_shape = hypre_StructStencilShape(stencil);
    stencil_size  = hypre_StructStencilSize(stencil);
    symm_elements = hypre_StructMatrixSymmElements(matrix);

    for (i = 0; i < stencil_size; i++)
    {
        if (symm_elements[i] >= 0)
        {
            for (d = 0; d < 3; d++)
            {
                extra_ghost[2*d] =
                    hypre_max(extra_ghost[2*d], -hypre_IndexD(stencil_shape[i], d));
                extra_ghost[2*d + 1] =
                    hypre_max(extra_ghost[2*d + 1],  hypre_IndexD(stencil_shape[i], d));
            }
        }
    }

    for (d = 0; d < 3; d++)
    {
        num_ghost[2*d]     += extra_ghost[2*d];
        num_ghost[2*d + 1] += extra_ghost[2*d + 1];
    }

    /*-----------------------------------------------------------------------
     * Set up data_space
     *-----------------------------------------------------------------------*/

    if (hypre_StructMatrixDataSpace(matrix) == NULL)
    {
        boxes = hypre_StructGridBoxes(grid);
        data_space = hypre_BoxArrayCreate(hypre_BoxArraySize(boxes));

        hypre_ForBoxI(i, boxes)
        {
            box = hypre_BoxArrayBox(boxes, i);
            data_box = hypre_BoxArrayBox(data_space, i);

            hypre_CopyBox(box, data_box);
            for (d = 0; d < 3; d++)
            {
                hypre_BoxIMinD(data_box, d) -= num_ghost[2*d];
                hypre_BoxIMaxD(data_box, d) += num_ghost[2*d + 1];
            }
        }

Пример #24

Показать файл

Файл: sstruct_matrix.c Проект: 8l/insieme

int
hypre_SStructPMatrixCreate( MPI_Comm               comm,
                            hypre_SStructPGrid    *pgrid,
                            hypre_SStructStencil **stencils,
                            hypre_SStructPMatrix **pmatrix_ptr )
{
    hypre_SStructPMatrix  *pmatrix;
    int                    nvars;
    int                  **smaps;
    hypre_StructStencil ***sstencils;
    hypre_StructMatrix  ***smatrices;
    int                  **symmetric;

    hypre_StructStencil   *sstencil;
    int                   *vars;
    hypre_Index           *sstencil_shape;
    int                    sstencil_size;
    int                    new_dim;
    int                   *new_sizes;
    hypre_Index          **new_shapes;
    int                    size;
    hypre_StructGrid      *sgrid;

    int                    vi, vj;
    int                    i, j, k;

    pmatrix = hypre_TAlloc(hypre_SStructPMatrix, 1);

    hypre_SStructPMatrixComm(pmatrix)     = comm;
    hypre_SStructPMatrixPGrid(pmatrix)    = pgrid;
    hypre_SStructPMatrixStencils(pmatrix) = stencils;
    nvars = hypre_SStructPGridNVars(pgrid);
    hypre_SStructPMatrixNVars(pmatrix) = nvars;

    /* create sstencils */
    smaps     = hypre_TAlloc(int *, nvars);
    sstencils = hypre_TAlloc(hypre_StructStencil **, nvars);
    new_sizes  = hypre_TAlloc(int, nvars);
    new_shapes = hypre_TAlloc(hypre_Index *, nvars);
    size = 0;
    for (vi = 0; vi < nvars; vi++)
    {
        sstencils[vi] = hypre_TAlloc(hypre_StructStencil *, nvars);
        for (vj = 0; vj < nvars; vj++)
        {
            sstencils[vi][vj] = NULL;
            new_sizes[vj] = 0;
        }

        sstencil       = hypre_SStructStencilSStencil(stencils[vi]);
        vars           = hypre_SStructStencilVars(stencils[vi]);
        sstencil_shape = hypre_StructStencilShape(sstencil);
        sstencil_size  = hypre_StructStencilSize(sstencil);

        smaps[vi] = hypre_TAlloc(int, sstencil_size);
        for (i = 0; i < sstencil_size; i++)
        {
            j = vars[i];
            new_sizes[j]++;
        }
        for (vj = 0; vj < nvars; vj++)
        {
            if (new_sizes[vj])
            {
                new_shapes[vj] = hypre_TAlloc(hypre_Index, new_sizes[vj]);
                new_sizes[vj] = 0;
            }
        }
        for (i = 0; i < sstencil_size; i++)
        {
            j = vars[i];
            k = new_sizes[j];
            hypre_CopyIndex(sstencil_shape[i], new_shapes[j][k]);
            smaps[vi][i] = k;
            new_sizes[j]++;
        }
        new_dim = hypre_StructStencilDim(sstencil);
        for (vj = 0; vj < nvars; vj++)
        {
            if (new_sizes[vj])
            {
                sstencils[vi][vj] = hypre_StructStencilCreate(new_dim,
                                    new_sizes[vj],
                                    new_shapes[vj]);
            }
            size = hypre_max(size, new_sizes[vj]);
        }
    }
    hypre_SStructPMatrixSMaps(pmatrix)     = smaps;
    hypre_SStructPMatrixSStencils(pmatrix) = sstencils;
    hypre_TFree(new_sizes);
    hypre_TFree(new_shapes);

    /* create smatrices */
    smatrices = hypre_TAlloc(hypre_StructMatrix **, nvars);
    for (vi = 0; vi < nvars; vi++)
    {
        smatrices[vi] = hypre_TAlloc(hypre_StructMatrix *, nvars);
        for (vj = 0; vj < nvars; vj++)
        {
            smatrices[vi][vj] = NULL;
            if (sstencils[vi][vj] != NULL)
            {
                sgrid = hypre_SStructPGridSGrid(pgrid, vi);
                smatrices[vi][vj] =
                    hypre_StructMatrixCreate(comm, sgrid, sstencils[vi][vj]);
            }
        }
    }
    hypre_SStructPMatrixSMatrices(pmatrix) = smatrices;

    /* create symmetric */
    symmetric = hypre_TAlloc(int *, nvars);
    for (vi = 0; vi < nvars; vi++)
    {
        symmetric[vi] = hypre_TAlloc(int, nvars);
        for (vj = 0; vj < nvars; vj++)
        {
            symmetric[vi][vj] = 0;
        }
    }
    hypre_SStructPMatrixSymmetric(pmatrix) = symmetric;

    hypre_SStructPMatrixSEntriesSize(pmatrix) = size;
    hypre_SStructPMatrixSEntries(pmatrix) = hypre_TAlloc(int, size);

    hypre_SStructPMatrixRefCount(pmatrix)   = 1;

    *pmatrix_ptr = pmatrix;

    return hypre_error_flag;
}

Пример #25

Показать файл

Файл: computation.c Проект: ngholka/patki-power

int
hypre_CreateComputeInfo( hypre_StructGrid      *grid,
                         hypre_StructStencil   *stencil,
                         hypre_ComputeInfo    **compute_info_ptr )
{
   int                      ierr = 0;

   hypre_CommInfo          *comm_info;
   hypre_BoxArrayArray     *indt_boxes;
   hypre_BoxArrayArray     *dept_boxes;

   hypre_BoxArray          *boxes;

   hypre_BoxArray          *cbox_array;
   hypre_Box               *cbox;

   int                      i;

#ifdef HYPRE_OVERLAP_COMM_COMP
   hypre_Box               *rembox;
   hypre_Index             *stencil_shape;
   int                      border[3][2] = {{0, 0}, {0, 0}, {0, 0}};
   int                      cbox_array_size;
   int                      s, d;
#endif

   /*------------------------------------------------------
    * Extract needed grid info
    *------------------------------------------------------*/

   boxes = hypre_StructGridBoxes(grid);

   /*------------------------------------------------------
    * Get communication info
    *------------------------------------------------------*/

   hypre_CreateCommInfoFromStencil(grid, stencil, &comm_info);

#ifdef HYPRE_OVERLAP_COMM_COMP

   /*------------------------------------------------------
    * Compute border info
    *------------------------------------------------------*/

   stencil_shape = hypre_StructStencilShape(stencil);
   for (s = 0; s < hypre_StructStencilSize(stencil); s++)
   {
      for (d = 0; d < 3; d++)
      {
         i = hypre_IndexD(stencil_shape[s], d);
         if (i < 0)
         {
            border[d][0] = hypre_max(border[d][0], -i);
         }
         else if (i > 0)
         {
            border[d][1] = hypre_max(border[d][1], i);
         }
      }
   }

   /*------------------------------------------------------
    * Set up the dependent boxes
    *------------------------------------------------------*/

   dept_boxes = hypre_BoxArrayArrayCreate(hypre_BoxArraySize(boxes));

   rembox = hypre_BoxCreate();
   hypre_ForBoxI(i, boxes)
      {
         cbox_array = hypre_BoxArrayArrayBoxArray(dept_boxes, i);
         hypre_BoxArraySetSize(cbox_array, 6);

         hypre_CopyBox(hypre_BoxArrayBox(boxes, i), rembox);
         cbox_array_size = 0;
         for (d = 0; d < 3; d++)
         {
            if ( (hypre_BoxVolume(rembox)) && (border[d][0]) )
            {
               cbox = hypre_BoxArrayBox(cbox_array, cbox_array_size);
               hypre_CopyBox(rembox, cbox);
               hypre_BoxIMaxD(cbox, d) =
                  hypre_BoxIMinD(cbox, d) + border[d][0] - 1;
               hypre_BoxIMinD(rembox, d) =
                  hypre_BoxIMinD(cbox, d) + border[d][0];
               cbox_array_size++;
            }
            if ( (hypre_BoxVolume(rembox)) && (border[d][1]) )
            {
               cbox = hypre_BoxArrayBox(cbox_array, cbox_array_size);
               hypre_CopyBox(rembox, cbox);
               hypre_BoxIMinD(cbox, d) =
                  hypre_BoxIMaxD(cbox, d) - border[d][1] + 1;
               hypre_BoxIMaxD(rembox, d) =
                  hypre_BoxIMaxD(cbox, d) - border[d][1];
               cbox_array_size++;
            }
         }
         hypre_BoxArraySetSize(cbox_array, cbox_array_size);
      }

Пример #26

Показать файл

Файл: sstruct_matrix.c Проект: 8l/insieme

int
hypre_SStructUMatrixInitialize( hypre_SStructMatrix *matrix )
{
    HYPRE_IJMatrix          ijmatrix   = hypre_SStructMatrixIJMatrix(matrix);
    hypre_SStructGraph     *graph      = hypre_SStructMatrixGraph(matrix);
    hypre_SStructGrid      *grid       = hypre_SStructGraphGrid(graph);
    int                     nparts     = hypre_SStructGraphNParts(graph);
    hypre_SStructPGrid    **pgrids     = hypre_SStructGraphPGrids(graph);
    hypre_SStructStencil ***stencils   = hypre_SStructGraphStencils(graph);
    int                     nUventries = hypre_SStructGraphNUVEntries(graph);
    int                    *iUventries = hypre_SStructGraphIUVEntries(graph);
    hypre_SStructUVEntry  **Uventries  = hypre_SStructGraphUVEntries(graph);
    int                   **nvneighbors = hypre_SStructGridNVNeighbors(grid);
    hypre_StructGrid       *sgrid;
    hypre_SStructStencil   *stencil;
    int                    *split;
    int                     nvars;
    int                     nrows, nnzs ;
    int                     part, var, entry, i, j, k,m,b;
    int                    *row_sizes;
    int                     max_row_size;

    int                    matrix_type = hypre_SStructMatrixObjectType(matrix);

    hypre_Box              *gridbox;
    hypre_Box              *loopbox;
    hypre_Box              *ghostbox;
    hypre_BoxArray         *boxes;
    int                    *num_ghost;


    HYPRE_IJMatrixSetObjectType(ijmatrix, HYPRE_PARCSR);

    /* GEC1002 the ghlocalsize is used to set the number of rows   */

    if (matrix_type == HYPRE_PARCSR)
    {
        nrows = hypre_SStructGridLocalSize(grid);
    }
    if (matrix_type == HYPRE_SSTRUCT || matrix_type == HYPRE_STRUCT)
    {
        nrows = hypre_SStructGridGhlocalSize(grid) ;
    }

    /* set row sizes */
    m = 0;
    row_sizes = hypre_CTAlloc(int, nrows);
    max_row_size = 0;
    for (part = 0; part < nparts; part++)
    {
        nvars = hypre_SStructPGridNVars(pgrids[part]);
        for (var = 0; var < nvars; var++)
        {
            sgrid   = hypre_SStructPGridSGrid(pgrids[part], var);

            stencil = stencils[part][var];
            split   = hypre_SStructMatrixSplit(matrix, part, var);
            nnzs = 0;
            for (entry = 0; entry < hypre_SStructStencilSize(stencil); entry++)
            {
                if (split[entry] == -1)
                {
                    nnzs++;
                }
            }
#if 0
            /* TODO: For now, assume stencil is full/complete */
            if (hypre_SStructMatrixSymmetric(matrix))
            {
                nnzs = 2*nnzs - 1;
            }
#endif

            /**************/

            boxes = hypre_StructGridBoxes(sgrid) ;
            num_ghost = hypre_StructGridNumGhost(sgrid);
            for (b = 0; b < hypre_BoxArraySize(boxes); b++)
            {
                gridbox = hypre_BoxArrayBox(boxes, b);
                ghostbox = hypre_BoxCreate();
                loopbox  = hypre_BoxCreate();
                hypre_CopyBox(gridbox,ghostbox);
                hypre_BoxExpand(ghostbox,num_ghost);

                if (matrix_type == HYPRE_SSTRUCT || matrix_type == HYPRE_STRUCT)
                {
                    hypre_CopyBox(ghostbox,loopbox);
                }
                if (matrix_type == HYPRE_PARCSR)
                {
                    hypre_CopyBox(gridbox,loopbox);
                }

                for (k = hypre_BoxIMinZ(loopbox); k <= hypre_BoxIMaxZ(loopbox); k++)
                {
                    for (j = hypre_BoxIMinY(loopbox); j <= hypre_BoxIMaxY(loopbox); j++)
                    {
                        for (i = hypre_BoxIMinX(loopbox); i <= hypre_BoxIMaxX(loopbox); i++)
                        {
                            if (   ( ( i>=hypre_BoxIMinX(gridbox) )
                                     &&   ( j>=hypre_BoxIMinY(gridbox) ) )
                                    &&   ( k>=hypre_BoxIMinZ(gridbox) ) )
                            {
                                if (  ( ( i<=hypre_BoxIMaxX(gridbox) )
                                        && ( j<=hypre_BoxIMaxY(gridbox) ) )
                                        && ( k<=hypre_BoxIMaxZ(gridbox) ) )
                                {
                                    row_sizes[m] = nnzs;
                                    max_row_size = hypre_max(max_row_size, row_sizes[m]);
                                }
                            }
                            m++;
                        }
                    }
                }
                hypre_BoxDestroy(ghostbox);
                hypre_BoxDestroy(loopbox);
            }


            if (nvneighbors[part][var])
            {
                max_row_size = hypre_max(max_row_size,
                                         hypre_SStructStencilSize(stencil));
            }


            /*********************/
        }
    }

    /* GEC0902 essentially for each UVentry we figure out how many extra columns
     * we need to add to the rowsizes                                   */

    for (entry = 0; entry < nUventries; entry++)
    {
        i = iUventries[entry];
        row_sizes[i] += hypre_SStructUVEntryNUEntries(Uventries[i]);
        max_row_size = hypre_max(max_row_size, row_sizes[i]);
    }

    /* ZTODO: Update row_sizes based on neighbor off-part couplings */
    HYPRE_IJMatrixSetRowSizes (ijmatrix, (const int *) row_sizes);

    hypre_TFree(row_sizes);
    hypre_SStructMatrixTmpColCoords(matrix) =
        hypre_CTAlloc(HYPRE_BigInt, max_row_size);
    hypre_SStructMatrixTmpCoeffs(matrix) =
        hypre_CTAlloc(double, max_row_size);

    /* GEC1002 at this point the processor has the partitioning (creation of ij) */

    HYPRE_IJMatrixInitialize(ijmatrix);

    return hypre_error_flag;
}

Пример #27

Показать файл

Файл: amgstats.c Проект: LLNL/COGENT

HYPRE_Int
hypre_AMGSetupStats( void *amg_vdata )
{
   hypre_AMGData *amg_data = amg_vdata;

   /* Data Structure variables */

   hypre_CSRMatrix **A_array;
   hypre_CSRMatrix **P_array;

   HYPRE_Int      num_levels; 
   HYPRE_Int      num_nonzeros;
/*   HYPRE_Int      amg_ioutdat;
   char    *log_file_name;
*/ 

   /* Local variables */

   HYPRE_Int      *A_i;
   double   *A_data;

   HYPRE_Int      *P_i;
   double   *P_data;

   HYPRE_Int       level;
   HYPRE_Int       i,j;
   HYPRE_Int       fine_size;
   HYPRE_Int       coarse_size;
   HYPRE_Int       entries;
   HYPRE_Int       total_entries;
   HYPRE_Int       min_entries;
   HYPRE_Int       max_entries;
   double    avg_entries;
   double    rowsum;
   double    min_rowsum;
   double    max_rowsum;
   double    sparse;
   double    min_weight;
   double    max_weight;
   double    op_complxty=0;
   double    grid_complxty=0;
   double    num_nz0;
   double    num_var0;

   A_array = hypre_AMGDataAArray(amg_data);
   P_array = hypre_AMGDataPArray(amg_data);
   num_levels = hypre_AMGDataNumLevels(amg_data);
/*   amg_ioutdat = hypre_AMGDataIOutDat(amg_data);
   log_file_name = hypre_AMGDataLogFileName(amg_data);
*/    
   hypre_printf("\n  AMG SETUP PARAMETERS:\n\n");
   hypre_printf(" Strength threshold = %f\n",hypre_AMGDataStrongThreshold(amg_data));
   hypre_printf(" Max levels = %d\n",hypre_AMGDataMaxLevels(amg_data));
   hypre_printf(" Num levels = %d\n\n",num_levels);

   hypre_printf( "\nOperator Matrix Information:\n\n");

   hypre_printf("         nonzero         entries p");
   hypre_printf("er row        row sums\n");
   hypre_printf("lev rows entries  sparse  min max  ");
   hypre_printf("avg       min         max\n");
   hypre_printf("=======================================");
   hypre_printf("==========================\n");

  
   /*-----------------------------------------------------
    *  Enter Statistics Loop
    *-----------------------------------------------------*/

   num_var0 = (double) hypre_CSRMatrixNumRows(A_array[0]);
   num_nz0 = (double) hypre_CSRMatrixNumNonzeros(A_array[0]);
 
   for (level = 0; level < num_levels; level++)
   {
       A_i = hypre_CSRMatrixI(A_array[level]);
       A_data = hypre_CSRMatrixData(A_array[level]);

       fine_size = hypre_CSRMatrixNumRows(A_array[level]);
       num_nonzeros = hypre_CSRMatrixNumNonzeros(A_array[level]);
       sparse = num_nonzeros /((double) fine_size * (double) fine_size);
       op_complxty += ((double)num_nonzeros/num_nz0);
       grid_complxty += ((double)fine_size/num_var0);

       min_entries = A_i[1]-A_i[0];
       max_entries = 0;
       total_entries = 0;
       min_rowsum = 0.0;
       max_rowsum = 0.0;

       for (j = A_i[0]; j < A_i[1]; j++)
                    min_rowsum += A_data[j];

       max_rowsum = min_rowsum;

       for (j = 0; j < fine_size; j++)
       {
           entries = A_i[j+1] - A_i[j];
           min_entries = hypre_min(entries, min_entries);
           max_entries = hypre_max(entries, max_entries);
           total_entries += entries;

           rowsum = 0.0;
           for (i = A_i[j]; i < A_i[j+1]; i++)
               rowsum += A_data[i];

           min_rowsum = hypre_min(rowsum, min_rowsum);
           max_rowsum = hypre_max(rowsum, max_rowsum);
       }

       avg_entries = ((double) total_entries) / ((double) fine_size);

       hypre_printf( "%2d %5d %7d  %0.3f  %3d %3d",
                 level, fine_size, num_nonzeros, sparse, min_entries, 
                 max_entries);
       hypre_printf("  %4.1f  %10.3e  %10.3e\n", avg_entries,
                                 min_rowsum, max_rowsum);
   }
       
   hypre_printf( "\n\nInterpolation Matrix Information:\n\n");

   hypre_printf("                 entries/row    min     max");
   hypre_printf("         row sums\n");
   hypre_printf("lev  rows cols    min max  ");
   hypre_printf("   weight   weight     min       max \n");
   hypre_printf("=======================================");
   hypre_printf("==========================\n");

  
   /*-----------------------------------------------------
    *  Enter Statistics Loop
    *-----------------------------------------------------*/

   for (level = 0; level < num_levels-1; level++)
   {
       P_i = hypre_CSRMatrixI(P_array[level]);
       P_data = hypre_CSRMatrixData(P_array[level]);

       fine_size = hypre_CSRMatrixNumRows(P_array[level]);
       coarse_size = hypre_CSRMatrixNumCols(P_array[level]);
       num_nonzeros = hypre_CSRMatrixNumNonzeros(P_array[level]);

       min_entries = P_i[1]-P_i[0];
       max_entries = 0;
       total_entries = 0;
       min_rowsum = 0.0;
       max_rowsum = 0.0;
       min_weight = P_data[0];
       max_weight = 0.0;

       for (j = P_i[0]; j < P_i[1]; j++)
                    min_rowsum += P_data[j];

       max_rowsum = min_rowsum;

       for (j = 0; j < num_nonzeros; j++)
       {
          if (P_data[j] != 1.0)
          {
             min_weight = hypre_min(min_weight,P_data[j]);
             max_weight = hypre_max(max_weight,P_data[j]);
          }
       }

       for (j = 0; j < fine_size; j++)
       {
           entries = P_i[j+1] - P_i[j];
           min_entries = hypre_min(entries, min_entries);
           max_entries = hypre_max(entries, max_entries);
           total_entries += entries;

           rowsum = 0.0;
           for (i = P_i[j]; i < P_i[j+1]; i++)
               rowsum += P_data[i];

           min_rowsum = hypre_min(rowsum, min_rowsum);
           max_rowsum = hypre_max(rowsum, max_rowsum);
       }

       hypre_printf( "%2d %5d x %-5d %3d %3d",
             level, fine_size, coarse_size,  min_entries, max_entries);
       hypre_printf("  %5.3e  %5.3e %5.3e  %5.3e\n",
                 min_weight, max_weight, min_rowsum, max_rowsum);
   }
     
   hypre_printf("\n Operator Complexity: %8.3f\n", op_complxty); 
   hypre_printf(" Grid Complexity:     %8.3f\n", grid_complxty); 
   hypre_WriteSolverParams(amg_data);  
   
   return(0);
}

Пример #28

Показать файл

Файл: sparse_msg_solve.c Проект: Chang-Liu-0520/hypre

HYPRE_Int
hypre_SparseMSGSolve( void               *smsg_vdata,
                      hypre_StructMatrix *A,
                      hypre_StructVector *b,
                      hypre_StructVector *x          )
{
   hypre_SparseMSGData  *smsg_data = smsg_vdata;

   HYPRE_Real            tol                 = (smsg_data -> tol);
   HYPRE_Int             max_iter            = (smsg_data -> max_iter);
   HYPRE_Int             rel_change          = (smsg_data -> rel_change);
   HYPRE_Int             zero_guess          = (smsg_data -> zero_guess);
   HYPRE_Int             jump                = (smsg_data -> jump);
   HYPRE_Int             num_pre_relax       = (smsg_data -> num_pre_relax);
   HYPRE_Int             num_post_relax      = (smsg_data -> num_post_relax);
   HYPRE_Int             num_fine_relax      = (smsg_data -> num_fine_relax);
   HYPRE_Int            *num_grids           = (smsg_data -> num_grids);
   HYPRE_Int             num_all_grids       = (smsg_data -> num_all_grids);
   HYPRE_Int             num_levels          = (smsg_data -> num_levels);
   hypre_StructMatrix  **A_array             = (smsg_data -> A_array);
   hypre_StructMatrix  **Px_array            = (smsg_data -> Px_array);
   hypre_StructMatrix  **Py_array            = (smsg_data -> Py_array);
   hypre_StructMatrix  **Pz_array            = (smsg_data -> Pz_array);
   hypre_StructMatrix  **RTx_array           = (smsg_data -> RTx_array);
   hypre_StructMatrix  **RTy_array           = (smsg_data -> RTy_array);
   hypre_StructMatrix  **RTz_array           = (smsg_data -> RTz_array);
   hypre_StructVector  **b_array             = (smsg_data -> b_array);
   hypre_StructVector  **x_array             = (smsg_data -> x_array);
   hypre_StructVector  **t_array             = (smsg_data -> t_array);
   hypre_StructVector  **r_array             = (smsg_data -> r_array);
   hypre_StructVector  **e_array             = (smsg_data -> e_array);
   hypre_StructVector  **visitx_array        = (smsg_data -> visitx_array);
   hypre_StructVector  **visity_array        = (smsg_data -> visity_array);
   hypre_StructVector  **visitz_array        = (smsg_data -> visitz_array);
   HYPRE_Int            *grid_on             = (smsg_data -> grid_on);
   void                **relax_array         = (smsg_data -> relax_array);
   void                **matvec_array        = (smsg_data -> matvec_array);
   void                **restrictx_array     = (smsg_data -> restrictx_array);
   void                **restricty_array     = (smsg_data -> restricty_array);
   void                **restrictz_array     = (smsg_data -> restrictz_array);
   void                **interpx_array       = (smsg_data -> interpx_array);
   void                **interpy_array       = (smsg_data -> interpy_array);
   void                **interpz_array       = (smsg_data -> interpz_array);
   HYPRE_Int             logging             = (smsg_data -> logging);
   HYPRE_Real           *norms               = (smsg_data -> norms);
   HYPRE_Real           *rel_norms           = (smsg_data -> rel_norms);

   HYPRE_Int            *restrict_count;

   HYPRE_Real            b_dot_b, r_dot_r, eps;
   HYPRE_Real            e_dot_e, x_dot_x;
                    
   HYPRE_Int             i, l, lx, ly, lz;
   HYPRE_Int             lymin, lymax, lzmin, lzmax;
   HYPRE_Int             fi, ci;                              
   HYPRE_Int             ierr = 0;

#if DEBUG
   char                  filename[255];
#endif

   /*-----------------------------------------------------
    * Initialize some things and deal with special cases
    *-----------------------------------------------------*/

   hypre_BeginTiming(smsg_data -> time_index);

   hypre_StructMatrixDestroy(A_array[0]);
   hypre_StructVectorDestroy(b_array[0]);
   hypre_StructVectorDestroy(x_array[0]);
   A_array[0] = hypre_StructMatrixRef(A);
   b_array[0] = hypre_StructVectorRef(b);
   x_array[0] = hypre_StructVectorRef(x);

   (smsg_data -> num_iterations) = 0;

   /* if max_iter is zero, return */
   if (max_iter == 0)
   {
      /* if using a zero initial guess, return zero */
      if (zero_guess)
      {
         hypre_StructVectorSetConstantValues(x, 0.0);
      }

      hypre_EndTiming(smsg_data -> time_index);
      return ierr;
   }

   /* part of convergence check */
   if (tol > 0.0)
   {
      /* eps = (tol^2) */
      b_dot_b = hypre_StructInnerProd(b_array[0], b_array[0]);
      eps = tol*tol;

      /* if rhs is zero, return a zero solution */
      if (b_dot_b == 0.0)
      {
         hypre_StructVectorSetConstantValues(x, 0.0);
         if (logging > 0)
         {
            norms[0]     = 0.0;
            rel_norms[0] = 0.0;
         }

         hypre_EndTiming(smsg_data -> time_index);
         return ierr;
      }
   }

   restrict_count = hypre_TAlloc(HYPRE_Int, num_all_grids);

   /*-----------------------------------------------------
    * Do V-cycles:
    *   For each index l, "fine" = l, "coarse" = (l+1)
    *-----------------------------------------------------*/

   for (i = 0; i < max_iter; i++)
   {
      /*--------------------------------------------------
       * Down cycle:
       *   Note that r = b = x through the jump region
       *--------------------------------------------------*/

      /* fine grid pre-relaxation */
      hypre_PFMGRelaxSetPreRelax(relax_array[0]);
      hypre_PFMGRelaxSetMaxIter(relax_array[0], num_fine_relax);
      hypre_PFMGRelaxSetZeroGuess(relax_array[0], zero_guess);
      hypre_PFMGRelax(relax_array[0], A_array[0], b_array[0], x_array[0]);
      zero_guess = 0;

      /* compute fine grid residual (b - Ax) */
      hypre_StructCopy(b_array[0], r_array[0]);
      hypre_StructMatvecCompute(matvec_array[0],
                                -1.0, A_array[0], x_array[0], 1.0, r_array[0]);

      /* convergence check */
      if (tol > 0.0)
      {
         r_dot_r = hypre_StructInnerProd(r_array[0], r_array[0]);

         if (logging > 0)
         {
            norms[i] = sqrt(r_dot_r);
            if (b_dot_b > 0)
               rel_norms[i] = sqrt(r_dot_r/b_dot_b);
            else
               rel_norms[i] = 0.0;
         }
/* RDF */
#if 0

hypre_printf("iter = %d, rel_norm = %e\n", i, rel_norms[i]);

#endif

         /* always do at least 1 V-cycle */
         if ((r_dot_r/b_dot_b < eps) && (i > 0))
         {
            if (rel_change)
            {
               if ((e_dot_e/x_dot_x) < eps)
                  break;
            }
            else
            {
               break;
            }
         }
      }

      if (num_levels > 1)
      {
         /* initialize restrict_count */
         for (fi = 0; fi < num_all_grids; fi++)
         {
            restrict_count[fi] = 0;
         }

         for (l = 0; l <= (num_levels - 2); l++)
         {
            lzmin = hypre_max((l - num_grids[1] - num_grids[0] + 2), 0);
            lzmax = hypre_min((l), (num_grids[2] - 1));
            for (lz = lzmin; lz <= lzmax; lz++)
            {
               lymin = hypre_max((l - lz - num_grids[0] + 1), 0);
               lymax = hypre_min((l - lz), (num_grids[1] - 1));
               for (ly = lymin; ly <= lymax; ly++)
               {
                  lx = l - lz - ly;

                  hypre_SparseMSGMapIndex(lx, ly, lz, num_grids, fi);

                  if (!grid_on[fi])
                  {
                     break;
                  }

                  if (restrict_count[fi] > 1)
                  {
                     hypre_StructScale((1.0/restrict_count[fi]), b_array[fi]);
                  }

                  if (l > jump)
                  {
                     /* pre-relaxation */
                     hypre_PFMGRelaxSetPreRelax(relax_array[fi]);
                     hypre_PFMGRelaxSetMaxIter(relax_array[fi], num_pre_relax);
                     hypre_PFMGRelaxSetZeroGuess(relax_array[fi], 1);
                     hypre_PFMGRelax(relax_array[fi], A_array[fi], b_array[fi],
                                     x_array[fi]);

                     /* compute residual (b - Ax) */
                     hypre_StructCopy(b_array[fi], r_array[fi]);
                     hypre_StructMatvecCompute(matvec_array[fi],
                                               -1.0, A_array[fi], x_array[fi],
                                               1.0, r_array[fi]);
                  }
                        
                  if ((lx+1) < num_grids[0])
                  {
                     /* restrict to ((lx+1), ly, lz) */
                     hypre_SparseMSGMapIndex((lx+1), ly, lz, num_grids, ci);
                     if (grid_on[ci])
                     {
                        if (restrict_count[ci])
                        {
                           hypre_SparseMSGRestrict(restrictx_array[fi],
                                                   RTx_array[lx], r_array[fi],
                                                   t_array[ci]);
                           hypre_StructAxpy(1.0, t_array[ci], b_array[ci]);
                        }
                        else
                        {
                           hypre_SparseMSGRestrict(restrictx_array[fi],
                                                   RTx_array[lx], r_array[fi],
                                                   b_array[ci]);
                        }
                        restrict_count[ci]++;
                     }
                  }
                  if ((ly+1) < num_grids[1])
                  {
                     /* restrict to (lx, (ly+1), lz) */
                     hypre_SparseMSGMapIndex(lx, (ly+1), lz, num_grids, ci);
                     if (grid_on[ci])
                     {
                        if (restrict_count[ci])
                        {
                           hypre_SparseMSGRestrict(restricty_array[fi],
                                                   RTy_array[ly], r_array[fi],
                                                   t_array[ci]);
                           hypre_StructAxpy(1.0, t_array[ci], b_array[ci]);
                        }
                        else
                        {
                           hypre_SparseMSGRestrict(restricty_array[fi],
                                                   RTy_array[ly], r_array[fi],
                                                   b_array[ci]);
                        }
                        restrict_count[ci]++;
                     }
                  }
                  if ((lz+1) < num_grids[2])
                  {
                     /* restrict to (lx, ly, (lz+1)) */
                     hypre_SparseMSGMapIndex(lx, ly, (lz+1), num_grids, ci);
                     if (grid_on[ci])
                     {
                        if (restrict_count[ci])
                        {
                           hypre_SparseMSGRestrict(restrictz_array[fi],
                                                   RTz_array[lz], r_array[fi],
                                                   t_array[ci]);
                           hypre_StructAxpy(1.0, t_array[ci], b_array[ci]);
                        }
                        else
                        {
                           hypre_SparseMSGRestrict(restrictz_array[fi],
                                                   RTz_array[lz], r_array[fi],
                                                   b_array[ci]);
                        }
                        restrict_count[ci]++;
                     }
                  }
#if DEBUG
                  hypre_sprintf(filename, "zoutSMSG_bdown.%d.%d.%d", lx, ly, lz);
                  hypre_StructVectorPrint(filename, b_array[fi], 0);
                  hypre_sprintf(filename, "zoutSMSG_xdown.%d.%d.%d", lx, ly, lz);
                  hypre_StructVectorPrint(filename, x_array[fi], 0);
                  hypre_sprintf(filename, "zoutSMSG_rdown.%d.%d.%d", lx, ly, lz);
                  hypre_StructVectorPrint(filename, r_array[fi], 0);
#endif
               }
            }
         }

         /*--------------------------------------------------
          * Bottom
          *--------------------------------------------------*/
      
         fi = num_all_grids - 1;

         if (restrict_count[fi] > 1)
         {
            hypre_StructScale((1.0/restrict_count[fi]), b_array[fi]);
         }

         hypre_PFMGRelaxSetZeroGuess(relax_array[fi], 1);
         hypre_PFMGRelax(relax_array[fi], A_array[fi], b_array[fi],
                         x_array[fi]);

#if DEBUG
         hypre_sprintf(filename, "zoutSMSG_bbottom.%d.%d.%d", lx, ly, lz);
         hypre_StructVectorPrint(filename, b_array[fi], 0);
         hypre_sprintf(filename, "zoutSMSG_xbottom.%d.%d.%d", lx, ly, lz);
         hypre_StructVectorPrint(filename, x_array[fi], 0);
#endif

         /*--------------------------------------------------
          * Up cycle
          *   Note that r = b = x through the jump region
          *--------------------------------------------------*/

         for (l = (num_levels - 2); l >= 0; l--)
         {
            lzmin = hypre_max((l - num_grids[1] - num_grids[0] + 2), 0);
            lzmax = hypre_min((l), (num_grids[2] - 1));
            for (lz = lzmax; lz >= lzmin; lz--)
            {
               lymin = hypre_max((l - lz - num_grids[0] + 1), 0);
               lymax = hypre_min((l - lz), (num_grids[1] - 1));
               for (ly = lymax; ly >= lymin; ly--)
               {
                  lx = l - lz - ly;

                  hypre_SparseMSGMapIndex(lx, ly, lz, num_grids, fi);
                     
                  if (!grid_on[fi])
                  {
                     break;
                  }

                  if ((l >= 1) && (l <= jump))
                  {
                     hypre_StructVectorSetConstantValues(x_array[fi], 0.0);
                  }
                  if ((lx+1) < num_grids[0])
                  {
                     /* interpolate from ((lx+1), ly, lz) */
                     hypre_SparseMSGMapIndex((lx+1), ly, lz, num_grids, ci);
                     if (grid_on[ci])
                     {
                        hypre_SparseMSGInterp(interpx_array[fi],
                                              Px_array[lx], x_array[ci],
                                              e_array[fi]);
                        hypre_SparseMSGFilter(visitx_array[fi], e_array[fi],
                                              lx, ly, lz, jump);
                        hypre_StructAxpy(1.0, e_array[fi], x_array[fi]);
                     }
                  }
                  if ((ly+1) < num_grids[1])
                  {
                     /* interpolate from (lx, (ly+1), lz) */
                     hypre_SparseMSGMapIndex(lx, (ly+1), lz, num_grids, ci);
                     if (grid_on[ci])
                     {
                        hypre_SparseMSGInterp(interpy_array[fi],
                                              Py_array[ly], x_array[ci],
                                              e_array[fi]);
                        hypre_SparseMSGFilter(visity_array[fi], e_array[fi],
                                              lx, ly, lz, jump);
                        hypre_StructAxpy(1.0, e_array[fi], x_array[fi]);
                     }
                  }
                  if ((lz+1) < num_grids[2])
                  {
                     /* interpolate from (lx, ly, (lz+1)) */
                     hypre_SparseMSGMapIndex(lx, ly, (lz+1), num_grids, ci);
                     if (grid_on[ci])
                     {
                        hypre_SparseMSGInterp(interpz_array[fi],
                                              Pz_array[lz], x_array[ci],
                                              e_array[fi]);
                        hypre_SparseMSGFilter(visitz_array[fi], e_array[fi],
                                              lx, ly, lz, jump);
                        hypre_StructAxpy(1.0, e_array[fi], x_array[fi]);
                     }
                  }               
#if DEBUG
                  hypre_sprintf(filename, "zoutSMSG_xup.%d.%d.%d", lx, ly, lz);
                  hypre_StructVectorPrint(filename, x_array[fi], 0);
#endif
                  if (l > jump)
                  {
                     /* post-relaxation */
                     hypre_PFMGRelaxSetPostRelax(relax_array[fi]);
                     hypre_PFMGRelaxSetMaxIter(relax_array[fi],
                                               num_post_relax);
                     hypre_PFMGRelaxSetZeroGuess(relax_array[fi], 0);
                     hypre_PFMGRelax(relax_array[fi], A_array[fi], b_array[fi],
                                     x_array[fi]);
                  }
               }
            }
         }
      }

      /* part of convergence check */
      if ((tol > 0.0) && (rel_change))
      {
         if (num_levels > 1)
         {
            e_dot_e = hypre_StructInnerProd(e_array[0], e_array[0]);
            x_dot_x = hypre_StructInnerProd(x_array[0], x_array[0]);
         }
         else
         {
            e_dot_e = 0.0;
            x_dot_x = 1.0;
         }
      }

      /* fine grid post-relaxation */
      hypre_PFMGRelaxSetPostRelax(relax_array[0]);
      hypre_PFMGRelaxSetMaxIter(relax_array[0], num_fine_relax);
      hypre_PFMGRelaxSetZeroGuess(relax_array[0], 0);
      hypre_PFMGRelax(relax_array[0], A_array[0], b_array[0], x_array[0]);

      (smsg_data -> num_iterations) = (i + 1);
   }

   hypre_EndTiming(smsg_data -> time_index);

   return ierr;
}