Beispiel #1
0
int
hypre_GenerateLocalPartitioning(HYPRE_BigInt length, int num_procs, int myid, HYPRE_BigInt **part_ptr)
{


   int ierr = 0;
   HYPRE_BigInt *part;
   HYPRE_BigInt size;
   int rest;

   part = hypre_CTAlloc(HYPRE_BigInt, 2);
   size = length /(HYPRE_BigInt)num_procs;
   rest = (int)(length - size*(HYPRE_BigInt)num_procs);

   /* first row I own */
   part[0] = size*(HYPRE_BigInt)myid;
   part[0] += (HYPRE_BigInt)hypre_min(myid, rest);
   
   /* last row I own */
   part[1] =  size*(HYPRE_BigInt)(myid+1);
   part[1] += (HYPRE_BigInt)hypre_min(myid+1, rest);
   part[1] = part[1] - 1;

   /* add 1 to last row since this is for "starts" vector */
   part[1] = part[1] + 1;
   

   *part_ptr = part;
   return ierr;
}
Beispiel #2
0
/**
 * @param num_threads number of threads that participate in this merge
 * @param my_thread_num thread id (zeor-based) among the threads that participate in this merge
 */
static void hypre_parallel_merge(
   HYPRE_Int *first1, HYPRE_Int *last1, HYPRE_Int *first2, HYPRE_Int *last2,
   HYPRE_Int *out,
   HYPRE_Int num_threads, HYPRE_Int my_thread_num)
{
   HYPRE_Int n1 = last1 - first1;
   HYPRE_Int n2 = last2 - first2;
   HYPRE_Int n = n1 + n2;
   HYPRE_Int n_per_thread = (n + num_threads - 1)/num_threads;
   HYPRE_Int begin_rank = hypre_min(n_per_thread*my_thread_num, n);
   HYPRE_Int end_rank = hypre_min(begin_rank + n_per_thread, n);

#ifdef DBG_MERGE_SORT
   assert(std::is_sorted(first1, last1));
   assert(std::is_sorted(first2, last2));
#endif

   HYPRE_Int begin1, begin2, end1, end2;
   kth_element(&begin1, &begin2, first1, first2, n1, n2, begin_rank);
   kth_element(&end1, &end2, first1, first2, n1, n2, end_rank);

   while (begin1 > end1 && begin1 > 0 && begin2 < n2 && first1[begin1 - 1] == first2[begin2])
   {
#ifdef DBG_MERGE_SORT
      printf("%s:%d\n", __FILE__, __LINE__);
#endif
      begin1--; begin2++; 
   }
   while (begin2 > end2 && end1 > 0 && end2 < n2 && first1[end1 - 1] == first2[end2])
   {
#ifdef DBG_MERGE_SORT
      printf("%s:%d\n", __FILE__, __LINE__);
#endif
      end1--; end2++;
   }

#ifdef DBG_MERGE_SORT
   assert(begin1 <= end1);
   assert(begin2 <= end2);
#endif

   hypre_merge(
      first1 + begin1, first1 + end1,
      first2 + begin2, first2 + end2,
      out + begin1 + begin2);

#ifdef DBG_MERGE_SORT
   assert(std::is_sorted(out + begin1 + begin2, out + end1 + end2));
#endif
}
Beispiel #3
0
HYPRE_Int
hypre_IntersectBoxes( hypre_Box *box1,
                      hypre_Box *box2,
                      hypre_Box *ibox )
{
   HYPRE_Int d;

   /* find x, y, and z bounds */
   for (d = 0; d < 3; d++)
   {
      hypre_BoxIMinD(ibox, d) =
         hypre_max(hypre_BoxIMinD(box1, d), hypre_BoxIMinD(box2, d));
      hypre_BoxIMaxD(ibox, d) =
         hypre_min(hypre_BoxIMaxD(box1, d), hypre_BoxIMaxD(box2, d));
   }

   return hypre_error_flag;
}
Beispiel #4
0
int
hypre_IntersectBoxes( hypre_Box *box1,
                      hypre_Box *box2,
                      hypre_Box *ibox )
{
   int ierr = 0;
   int d;

   /* find x, y, and z bounds */
   for (d = 0; d < 3; d++)
   {
      hypre_BoxIMinD(ibox, d) =
         hypre_max(hypre_BoxIMinD(box1, d), hypre_BoxIMinD(box2, d));
      hypre_BoxIMaxD(ibox, d) =
         hypre_min(hypre_BoxIMaxD(box1, d), hypre_BoxIMaxD(box2, d));
   }

   return ierr;
}
Beispiel #5
0
void hypre_merge_sort(HYPRE_Int *in, HYPRE_Int *temp, HYPRE_Int len, HYPRE_Int **out)
{
   if (0 == len) return;

#ifdef HYPRE_PROFILE
   hypre_profile_times[HYPRE_TIMER_ID_MERGE] -= hypre_MPI_Wtime();
#endif

#ifdef DBG_MERGE_SORT
   HYPRE_Int *dbg_buf = new HYPRE_Int[len];
   std::copy(in, in + len, dbg_buf);
   std::sort(dbg_buf, dbg_buf + len);
#endif

   // HYPRE_Int thread_private_len[hypre_NumThreads()];
   // HYPRE_Int out_len = 0;

#ifdef HYPRE_USING_OPENMP
#pragma omp parallel
#endif
   {
      HYPRE_Int num_threads = hypre_NumActiveThreads();
      HYPRE_Int my_thread_num = hypre_GetThreadNum();

      // thread-private sort
      HYPRE_Int i_per_thread = (len + num_threads - 1)/num_threads;
      HYPRE_Int i_begin = hypre_min(i_per_thread*my_thread_num, len);
      HYPRE_Int i_end = hypre_min(i_begin + i_per_thread, len);

      hypre_qsort0(in, i_begin, i_end - 1);

      // merge sorted sequences
      HYPRE_Int in_group_size;
      HYPRE_Int *in_buf = in;
      HYPRE_Int *out_buf = temp;
      for (in_group_size = 1; in_group_size < num_threads; in_group_size *= 2)
      {
#ifdef HYPRE_USING_OPENMP
#pragma omp barrier
#endif

         // merge 2 in-groups into 1 out-group
         HYPRE_Int out_group_size = in_group_size*2;
         HYPRE_Int group_leader = my_thread_num/out_group_size*out_group_size;
         // HYPRE_Int group_sub_leader = hypre_min(group_leader + in_group_size, num_threads - 1);
         HYPRE_Int id_in_group = my_thread_num%out_group_size;
         HYPRE_Int num_threads_in_group =
            hypre_min(group_leader + out_group_size, num_threads) - group_leader;

         HYPRE_Int in_group1_begin = hypre_min(i_per_thread*group_leader, len);
         HYPRE_Int in_group1_end = hypre_min(in_group1_begin + i_per_thread*in_group_size, len);

         HYPRE_Int in_group2_begin = hypre_min(in_group1_begin + i_per_thread*in_group_size, len);
         HYPRE_Int in_group2_end = hypre_min(in_group2_begin + i_per_thread*in_group_size, len);

         hypre_parallel_merge(
            in_buf + in_group1_begin, in_buf + in_group1_end,
            in_buf + in_group2_begin, in_buf + in_group2_end,
            out_buf + in_group1_begin,
            num_threads_in_group,
            id_in_group);

         HYPRE_Int *temp = in_buf;
         in_buf = out_buf;
         out_buf = temp;
      }

      *out = in_buf;
   } /* omp parallel */

#ifdef DBG_MERGE_SORT
   assert(std::equal(*out, *out + len, dbg_buf));

   delete[] dbg_buf;
#endif

#ifdef HYPRE_PROFILE
   hypre_profile_times[HYPRE_TIMER_ID_MERGE] += hypre_MPI_Wtime();
#endif
}
Beispiel #6
0
HYPRE_ParCSRMatrix 
GenerateLaplacian9pt( MPI_Comm comm,
                      HYPRE_Int      nx,
                      HYPRE_Int      ny,
                      HYPRE_Int      P,
                      HYPRE_Int      Q,
                      HYPRE_Int      p,
                      HYPRE_Int      q,
                      double  *value )
{
   hypre_ParCSRMatrix *A;
   hypre_CSRMatrix *diag;
   hypre_CSRMatrix *offd;

   HYPRE_Int    *diag_i;
   HYPRE_Int    *diag_j;
   double *diag_data;

   HYPRE_Int    *offd_i;
   HYPRE_Int    *offd_j;
   double *offd_data;

   HYPRE_Int *global_part;
   HYPRE_Int ix, iy;
   HYPRE_Int cnt, o_cnt;
   HYPRE_Int local_num_rows; 
   HYPRE_Int *col_map_offd;
   HYPRE_Int *work;
   HYPRE_Int row_index;
   HYPRE_Int i,j;

   HYPRE_Int nx_local, ny_local;
   HYPRE_Int nx_size, ny_size;
   HYPRE_Int num_cols_offd;
   HYPRE_Int grid_size;

   HYPRE_Int *nx_part;
   HYPRE_Int *ny_part;

   HYPRE_Int num_procs, my_id;
   HYPRE_Int P_busy, Q_busy;

   hypre_MPI_Comm_size(comm,&num_procs);
   hypre_MPI_Comm_rank(comm,&my_id);

   grid_size = nx*ny;

   hypre_GeneratePartitioning(nx,P,&nx_part);
   hypre_GeneratePartitioning(ny,Q,&ny_part);

   global_part = hypre_CTAlloc(HYPRE_Int,P*Q+1);

   global_part[0] = 0;
   cnt = 1;
   for (iy = 0; iy < Q; iy++)
   {
      ny_size = ny_part[iy+1]-ny_part[iy];
      for (ix = 0; ix < P; ix++)
      {
         nx_size = nx_part[ix+1] - nx_part[ix];
         global_part[cnt] = global_part[cnt-1];
         global_part[cnt++] += nx_size*ny_size;
      }
   }

   nx_local = nx_part[p+1] - nx_part[p];
   ny_local = ny_part[q+1] - ny_part[q];

   my_id = q*P + p;
   num_procs = P*Q;

   local_num_rows = nx_local*ny_local;
   diag_i = hypre_CTAlloc(HYPRE_Int, local_num_rows+1);
   offd_i = hypre_CTAlloc(HYPRE_Int, local_num_rows+1);

   P_busy = hypre_min(nx,P);
   Q_busy = hypre_min(ny,Q);

   num_cols_offd = 0;
   if (p) num_cols_offd += ny_local;
   if (p < P_busy-1) num_cols_offd += ny_local;
   if (q) num_cols_offd += nx_local;
   if (q < Q_busy-1) num_cols_offd += nx_local;
   if (p && q) num_cols_offd++;
   if (p && q < Q_busy-1 ) num_cols_offd++;
   if (p < P_busy-1 && q ) num_cols_offd++;
   if (p < P_busy-1 && q < Q_busy-1 ) num_cols_offd++;

   if (!local_num_rows) num_cols_offd = 0;

   col_map_offd = hypre_CTAlloc(HYPRE_Int, num_cols_offd);

   cnt = 0;
   o_cnt = 0;
   diag_i[0] = 0;
   offd_i[0] = 0;
   for (iy = ny_part[q];  iy < ny_part[q+1]; iy++)
   {
      for (ix = nx_part[p]; ix < nx_part[p+1]; ix++)
      {
         cnt++;
         o_cnt++;
         diag_i[cnt] = diag_i[cnt-1];
         offd_i[o_cnt] = offd_i[o_cnt-1];
         diag_i[cnt]++;
         if (iy > ny_part[q]) 
         {
            diag_i[cnt]++;
	    if (ix > nx_part[p])
	    {
	       diag_i[cnt]++;
	    }
	    else
	    {
	       if (ix) 
		  offd_i[o_cnt]++;
	    }
	    if (ix < nx_part[p+1]-1)
	    {
	       diag_i[cnt]++;
	    }
	    else
	    {
	       if (ix+1 < nx) 
		  offd_i[o_cnt]++;
	    }
         }
         else
         {
            if (iy) 
            {
               offd_i[o_cnt]++;
	       if (ix > nx_part[p])
	       {
	          offd_i[o_cnt]++;
	       }
	       else if (ix)
	       {
	          offd_i[o_cnt]++;
	       }
	       if (ix < nx_part[p+1]-1)
	       {
	          offd_i[o_cnt]++;
	       }
	       else if (ix < nx-1)
	       {
	          offd_i[o_cnt]++;
	       }
            }
         }
         if (ix > nx_part[p]) 
            diag_i[cnt]++;
         else
         {
            if (ix) 
            {
               offd_i[o_cnt]++; 
            }
         }
         if (ix+1 < nx_part[p+1]) 
            diag_i[cnt]++;
         else
         {
            if (ix+1 < nx) 
            {
               offd_i[o_cnt]++; 
            }
         }
         if (iy+1 < ny_part[q+1]) 
         {
            diag_i[cnt]++;
	    if (ix > nx_part[p])
	    {
	       diag_i[cnt]++;
	    }
	    else
	    {
	       if (ix) 
		  offd_i[o_cnt]++;
	    }
	    if (ix < nx_part[p+1]-1)
	    {
	       diag_i[cnt]++;
	    }
	    else
	    {
	       if (ix+1 < nx) 
		  offd_i[o_cnt]++;
	    }
         }
         else
         {
            if (iy+1 < ny) 
            {
               offd_i[o_cnt]++;
	       if (ix > nx_part[p])
	       {
	          offd_i[o_cnt]++;
	       }
	       else if (ix)
	       {
	          offd_i[o_cnt]++;
	       }
	       if (ix < nx_part[p+1]-1)
	       {
	          offd_i[o_cnt]++;
	       }
	       else if (ix < nx-1)
	       {
	          offd_i[o_cnt]++;
	       }
            }
         }
      }
   }

   diag_j = hypre_CTAlloc(HYPRE_Int, diag_i[local_num_rows]);
   diag_data = hypre_CTAlloc(double, diag_i[local_num_rows]);

   if (num_procs > 1)
   {
      offd_j = hypre_CTAlloc(HYPRE_Int, offd_i[local_num_rows]);
      offd_data = hypre_CTAlloc(double, offd_i[local_num_rows]);
   }
HYPRE_Int
hypre_SparseMSGSolve( void               *smsg_vdata,
                      hypre_StructMatrix *A,
                      hypre_StructVector *b,
                      hypre_StructVector *x          )
{
   hypre_SparseMSGData  *smsg_data = smsg_vdata;

   HYPRE_Real            tol                 = (smsg_data -> tol);
   HYPRE_Int             max_iter            = (smsg_data -> max_iter);
   HYPRE_Int             rel_change          = (smsg_data -> rel_change);
   HYPRE_Int             zero_guess          = (smsg_data -> zero_guess);
   HYPRE_Int             jump                = (smsg_data -> jump);
   HYPRE_Int             num_pre_relax       = (smsg_data -> num_pre_relax);
   HYPRE_Int             num_post_relax      = (smsg_data -> num_post_relax);
   HYPRE_Int             num_fine_relax      = (smsg_data -> num_fine_relax);
   HYPRE_Int            *num_grids           = (smsg_data -> num_grids);
   HYPRE_Int             num_all_grids       = (smsg_data -> num_all_grids);
   HYPRE_Int             num_levels          = (smsg_data -> num_levels);
   hypre_StructMatrix  **A_array             = (smsg_data -> A_array);
   hypre_StructMatrix  **Px_array            = (smsg_data -> Px_array);
   hypre_StructMatrix  **Py_array            = (smsg_data -> Py_array);
   hypre_StructMatrix  **Pz_array            = (smsg_data -> Pz_array);
   hypre_StructMatrix  **RTx_array           = (smsg_data -> RTx_array);
   hypre_StructMatrix  **RTy_array           = (smsg_data -> RTy_array);
   hypre_StructMatrix  **RTz_array           = (smsg_data -> RTz_array);
   hypre_StructVector  **b_array             = (smsg_data -> b_array);
   hypre_StructVector  **x_array             = (smsg_data -> x_array);
   hypre_StructVector  **t_array             = (smsg_data -> t_array);
   hypre_StructVector  **r_array             = (smsg_data -> r_array);
   hypre_StructVector  **e_array             = (smsg_data -> e_array);
   hypre_StructVector  **visitx_array        = (smsg_data -> visitx_array);
   hypre_StructVector  **visity_array        = (smsg_data -> visity_array);
   hypre_StructVector  **visitz_array        = (smsg_data -> visitz_array);
   HYPRE_Int            *grid_on             = (smsg_data -> grid_on);
   void                **relax_array         = (smsg_data -> relax_array);
   void                **matvec_array        = (smsg_data -> matvec_array);
   void                **restrictx_array     = (smsg_data -> restrictx_array);
   void                **restricty_array     = (smsg_data -> restricty_array);
   void                **restrictz_array     = (smsg_data -> restrictz_array);
   void                **interpx_array       = (smsg_data -> interpx_array);
   void                **interpy_array       = (smsg_data -> interpy_array);
   void                **interpz_array       = (smsg_data -> interpz_array);
   HYPRE_Int             logging             = (smsg_data -> logging);
   HYPRE_Real           *norms               = (smsg_data -> norms);
   HYPRE_Real           *rel_norms           = (smsg_data -> rel_norms);

   HYPRE_Int            *restrict_count;

   HYPRE_Real            b_dot_b, r_dot_r, eps;
   HYPRE_Real            e_dot_e, x_dot_x;
                    
   HYPRE_Int             i, l, lx, ly, lz;
   HYPRE_Int             lymin, lymax, lzmin, lzmax;
   HYPRE_Int             fi, ci;                              
   HYPRE_Int             ierr = 0;

#if DEBUG
   char                  filename[255];
#endif

   /*-----------------------------------------------------
    * Initialize some things and deal with special cases
    *-----------------------------------------------------*/

   hypre_BeginTiming(smsg_data -> time_index);

   hypre_StructMatrixDestroy(A_array[0]);
   hypre_StructVectorDestroy(b_array[0]);
   hypre_StructVectorDestroy(x_array[0]);
   A_array[0] = hypre_StructMatrixRef(A);
   b_array[0] = hypre_StructVectorRef(b);
   x_array[0] = hypre_StructVectorRef(x);

   (smsg_data -> num_iterations) = 0;

   /* if max_iter is zero, return */
   if (max_iter == 0)
   {
      /* if using a zero initial guess, return zero */
      if (zero_guess)
      {
         hypre_StructVectorSetConstantValues(x, 0.0);
      }

      hypre_EndTiming(smsg_data -> time_index);
      return ierr;
   }

   /* part of convergence check */
   if (tol > 0.0)
   {
      /* eps = (tol^2) */
      b_dot_b = hypre_StructInnerProd(b_array[0], b_array[0]);
      eps = tol*tol;

      /* if rhs is zero, return a zero solution */
      if (b_dot_b == 0.0)
      {
         hypre_StructVectorSetConstantValues(x, 0.0);
         if (logging > 0)
         {
            norms[0]     = 0.0;
            rel_norms[0] = 0.0;
         }

         hypre_EndTiming(smsg_data -> time_index);
         return ierr;
      }
   }

   restrict_count = hypre_TAlloc(HYPRE_Int, num_all_grids);

   /*-----------------------------------------------------
    * Do V-cycles:
    *   For each index l, "fine" = l, "coarse" = (l+1)
    *-----------------------------------------------------*/

   for (i = 0; i < max_iter; i++)
   {
      /*--------------------------------------------------
       * Down cycle:
       *   Note that r = b = x through the jump region
       *--------------------------------------------------*/

      /* fine grid pre-relaxation */
      hypre_PFMGRelaxSetPreRelax(relax_array[0]);
      hypre_PFMGRelaxSetMaxIter(relax_array[0], num_fine_relax);
      hypre_PFMGRelaxSetZeroGuess(relax_array[0], zero_guess);
      hypre_PFMGRelax(relax_array[0], A_array[0], b_array[0], x_array[0]);
      zero_guess = 0;

      /* compute fine grid residual (b - Ax) */
      hypre_StructCopy(b_array[0], r_array[0]);
      hypre_StructMatvecCompute(matvec_array[0],
                                -1.0, A_array[0], x_array[0], 1.0, r_array[0]);

      /* convergence check */
      if (tol > 0.0)
      {
         r_dot_r = hypre_StructInnerProd(r_array[0], r_array[0]);

         if (logging > 0)
         {
            norms[i] = sqrt(r_dot_r);
            if (b_dot_b > 0)
               rel_norms[i] = sqrt(r_dot_r/b_dot_b);
            else
               rel_norms[i] = 0.0;
         }
/* RDF */
#if 0

hypre_printf("iter = %d, rel_norm = %e\n", i, rel_norms[i]);

#endif

         /* always do at least 1 V-cycle */
         if ((r_dot_r/b_dot_b < eps) && (i > 0))
         {
            if (rel_change)
            {
               if ((e_dot_e/x_dot_x) < eps)
                  break;
            }
            else
            {
               break;
            }
         }
      }

      if (num_levels > 1)
      {
         /* initialize restrict_count */
         for (fi = 0; fi < num_all_grids; fi++)
         {
            restrict_count[fi] = 0;
         }

         for (l = 0; l <= (num_levels - 2); l++)
         {
            lzmin = hypre_max((l - num_grids[1] - num_grids[0] + 2), 0);
            lzmax = hypre_min((l), (num_grids[2] - 1));
            for (lz = lzmin; lz <= lzmax; lz++)
            {
               lymin = hypre_max((l - lz - num_grids[0] + 1), 0);
               lymax = hypre_min((l - lz), (num_grids[1] - 1));
               for (ly = lymin; ly <= lymax; ly++)
               {
                  lx = l - lz - ly;

                  hypre_SparseMSGMapIndex(lx, ly, lz, num_grids, fi);

                  if (!grid_on[fi])
                  {
                     break;
                  }

                  if (restrict_count[fi] > 1)
                  {
                     hypre_StructScale((1.0/restrict_count[fi]), b_array[fi]);
                  }

                  if (l > jump)
                  {
                     /* pre-relaxation */
                     hypre_PFMGRelaxSetPreRelax(relax_array[fi]);
                     hypre_PFMGRelaxSetMaxIter(relax_array[fi], num_pre_relax);
                     hypre_PFMGRelaxSetZeroGuess(relax_array[fi], 1);
                     hypre_PFMGRelax(relax_array[fi], A_array[fi], b_array[fi],
                                     x_array[fi]);

                     /* compute residual (b - Ax) */
                     hypre_StructCopy(b_array[fi], r_array[fi]);
                     hypre_StructMatvecCompute(matvec_array[fi],
                                               -1.0, A_array[fi], x_array[fi],
                                               1.0, r_array[fi]);
                  }
                        
                  if ((lx+1) < num_grids[0])
                  {
                     /* restrict to ((lx+1), ly, lz) */
                     hypre_SparseMSGMapIndex((lx+1), ly, lz, num_grids, ci);
                     if (grid_on[ci])
                     {
                        if (restrict_count[ci])
                        {
                           hypre_SparseMSGRestrict(restrictx_array[fi],
                                                   RTx_array[lx], r_array[fi],
                                                   t_array[ci]);
                           hypre_StructAxpy(1.0, t_array[ci], b_array[ci]);
                        }
                        else
                        {
                           hypre_SparseMSGRestrict(restrictx_array[fi],
                                                   RTx_array[lx], r_array[fi],
                                                   b_array[ci]);
                        }
                        restrict_count[ci]++;
                     }
                  }
                  if ((ly+1) < num_grids[1])
                  {
                     /* restrict to (lx, (ly+1), lz) */
                     hypre_SparseMSGMapIndex(lx, (ly+1), lz, num_grids, ci);
                     if (grid_on[ci])
                     {
                        if (restrict_count[ci])
                        {
                           hypre_SparseMSGRestrict(restricty_array[fi],
                                                   RTy_array[ly], r_array[fi],
                                                   t_array[ci]);
                           hypre_StructAxpy(1.0, t_array[ci], b_array[ci]);
                        }
                        else
                        {
                           hypre_SparseMSGRestrict(restricty_array[fi],
                                                   RTy_array[ly], r_array[fi],
                                                   b_array[ci]);
                        }
                        restrict_count[ci]++;
                     }
                  }
                  if ((lz+1) < num_grids[2])
                  {
                     /* restrict to (lx, ly, (lz+1)) */
                     hypre_SparseMSGMapIndex(lx, ly, (lz+1), num_grids, ci);
                     if (grid_on[ci])
                     {
                        if (restrict_count[ci])
                        {
                           hypre_SparseMSGRestrict(restrictz_array[fi],
                                                   RTz_array[lz], r_array[fi],
                                                   t_array[ci]);
                           hypre_StructAxpy(1.0, t_array[ci], b_array[ci]);
                        }
                        else
                        {
                           hypre_SparseMSGRestrict(restrictz_array[fi],
                                                   RTz_array[lz], r_array[fi],
                                                   b_array[ci]);
                        }
                        restrict_count[ci]++;
                     }
                  }
#if DEBUG
                  hypre_sprintf(filename, "zoutSMSG_bdown.%d.%d.%d", lx, ly, lz);
                  hypre_StructVectorPrint(filename, b_array[fi], 0);
                  hypre_sprintf(filename, "zoutSMSG_xdown.%d.%d.%d", lx, ly, lz);
                  hypre_StructVectorPrint(filename, x_array[fi], 0);
                  hypre_sprintf(filename, "zoutSMSG_rdown.%d.%d.%d", lx, ly, lz);
                  hypre_StructVectorPrint(filename, r_array[fi], 0);
#endif
               }
            }
         }

         /*--------------------------------------------------
          * Bottom
          *--------------------------------------------------*/
      
         fi = num_all_grids - 1;

         if (restrict_count[fi] > 1)
         {
            hypre_StructScale((1.0/restrict_count[fi]), b_array[fi]);
         }

         hypre_PFMGRelaxSetZeroGuess(relax_array[fi], 1);
         hypre_PFMGRelax(relax_array[fi], A_array[fi], b_array[fi],
                         x_array[fi]);

#if DEBUG
         hypre_sprintf(filename, "zoutSMSG_bbottom.%d.%d.%d", lx, ly, lz);
         hypre_StructVectorPrint(filename, b_array[fi], 0);
         hypre_sprintf(filename, "zoutSMSG_xbottom.%d.%d.%d", lx, ly, lz);
         hypre_StructVectorPrint(filename, x_array[fi], 0);
#endif

         /*--------------------------------------------------
          * Up cycle
          *   Note that r = b = x through the jump region
          *--------------------------------------------------*/

         for (l = (num_levels - 2); l >= 0; l--)
         {
            lzmin = hypre_max((l - num_grids[1] - num_grids[0] + 2), 0);
            lzmax = hypre_min((l), (num_grids[2] - 1));
            for (lz = lzmax; lz >= lzmin; lz--)
            {
               lymin = hypre_max((l - lz - num_grids[0] + 1), 0);
               lymax = hypre_min((l - lz), (num_grids[1] - 1));
               for (ly = lymax; ly >= lymin; ly--)
               {
                  lx = l - lz - ly;

                  hypre_SparseMSGMapIndex(lx, ly, lz, num_grids, fi);
                     
                  if (!grid_on[fi])
                  {
                     break;
                  }

                  if ((l >= 1) && (l <= jump))
                  {
                     hypre_StructVectorSetConstantValues(x_array[fi], 0.0);
                  }
                  if ((lx+1) < num_grids[0])
                  {
                     /* interpolate from ((lx+1), ly, lz) */
                     hypre_SparseMSGMapIndex((lx+1), ly, lz, num_grids, ci);
                     if (grid_on[ci])
                     {
                        hypre_SparseMSGInterp(interpx_array[fi],
                                              Px_array[lx], x_array[ci],
                                              e_array[fi]);
                        hypre_SparseMSGFilter(visitx_array[fi], e_array[fi],
                                              lx, ly, lz, jump);
                        hypre_StructAxpy(1.0, e_array[fi], x_array[fi]);
                     }
                  }
                  if ((ly+1) < num_grids[1])
                  {
                     /* interpolate from (lx, (ly+1), lz) */
                     hypre_SparseMSGMapIndex(lx, (ly+1), lz, num_grids, ci);
                     if (grid_on[ci])
                     {
                        hypre_SparseMSGInterp(interpy_array[fi],
                                              Py_array[ly], x_array[ci],
                                              e_array[fi]);
                        hypre_SparseMSGFilter(visity_array[fi], e_array[fi],
                                              lx, ly, lz, jump);
                        hypre_StructAxpy(1.0, e_array[fi], x_array[fi]);
                     }
                  }
                  if ((lz+1) < num_grids[2])
                  {
                     /* interpolate from (lx, ly, (lz+1)) */
                     hypre_SparseMSGMapIndex(lx, ly, (lz+1), num_grids, ci);
                     if (grid_on[ci])
                     {
                        hypre_SparseMSGInterp(interpz_array[fi],
                                              Pz_array[lz], x_array[ci],
                                              e_array[fi]);
                        hypre_SparseMSGFilter(visitz_array[fi], e_array[fi],
                                              lx, ly, lz, jump);
                        hypre_StructAxpy(1.0, e_array[fi], x_array[fi]);
                     }
                  }               
#if DEBUG
                  hypre_sprintf(filename, "zoutSMSG_xup.%d.%d.%d", lx, ly, lz);
                  hypre_StructVectorPrint(filename, x_array[fi], 0);
#endif
                  if (l > jump)
                  {
                     /* post-relaxation */
                     hypre_PFMGRelaxSetPostRelax(relax_array[fi]);
                     hypre_PFMGRelaxSetMaxIter(relax_array[fi],
                                               num_post_relax);
                     hypre_PFMGRelaxSetZeroGuess(relax_array[fi], 0);
                     hypre_PFMGRelax(relax_array[fi], A_array[fi], b_array[fi],
                                     x_array[fi]);
                  }
               }
            }
         }
      }

      /* part of convergence check */
      if ((tol > 0.0) && (rel_change))
      {
         if (num_levels > 1)
         {
            e_dot_e = hypre_StructInnerProd(e_array[0], e_array[0]);
            x_dot_x = hypre_StructInnerProd(x_array[0], x_array[0]);
         }
         else
         {
            e_dot_e = 0.0;
            x_dot_x = 1.0;
         }
      }

      /* fine grid post-relaxation */
      hypre_PFMGRelaxSetPostRelax(relax_array[0]);
      hypre_PFMGRelaxSetMaxIter(relax_array[0], num_fine_relax);
      hypre_PFMGRelaxSetZeroGuess(relax_array[0], 0);
      hypre_PFMGRelax(relax_array[0], A_array[0], b_array[0], x_array[0]);

      (smsg_data -> num_iterations) = (i + 1);
   }

   hypre_EndTiming(smsg_data -> time_index);

   return ierr;
}
Beispiel #8
0
hypre_ParCSRMatrix * hypre_ParMatMinus_F(
   hypre_ParCSRMatrix * P, hypre_ParCSRMatrix * C, HYPRE_Int * CF_marker )
/* hypre_ParMatMinus_F subtracts selected rows of its second argument
   from selected rows of its first argument.  The marker array
   determines which rows are affected - those for which CF_marker<0.
   The result is returned as a new matrix.
*/
{
   /*
     If P=(Pik),C=(Cik), i in Fine+Coarse, k in Coarse, we want
        new Pik = Pik - Cik, for Fine i only, all k.
     This computation is purely local.
   */
   /* This is _not_ a general-purpose matrix subtraction function.
      This is written for an interpolation problem where it is known that C(i,k)
      exists whenever P(i,k) does (because C=A*P where A has nonzero diagonal elements).
   */

   hypre_ParCSRMatrix *Pnew;
   hypre_CSRMatrix *P_diag = hypre_ParCSRMatrixDiag(P);
   hypre_CSRMatrix *P_offd = hypre_ParCSRMatrixOffd(P);
   hypre_CSRMatrix *C_diag = hypre_ParCSRMatrixDiag(C);
   hypre_CSRMatrix *C_offd = hypre_ParCSRMatrixOffd(C);
   hypre_CSRMatrix *Pnew_diag;
   hypre_CSRMatrix *Pnew_offd;

   double          *P_diag_data = hypre_CSRMatrixData(P_diag);
   HYPRE_Int             *P_diag_i = hypre_CSRMatrixI(P_diag);
   HYPRE_Int             *P_diag_j = hypre_CSRMatrixJ(P_diag);
   double          *P_offd_data = hypre_CSRMatrixData(P_offd);
   HYPRE_Int             *P_offd_i = hypre_CSRMatrixI(P_offd);
   HYPRE_Int             *P_offd_j = hypre_CSRMatrixJ(P_offd);
   HYPRE_Int             *P_col_map_offd = hypre_ParCSRMatrixColMapOffd( P );
   double          *C_diag_data = hypre_CSRMatrixData(C_diag);
   HYPRE_Int             *C_diag_i = hypre_CSRMatrixI(C_diag);
   HYPRE_Int             *C_diag_j = hypre_CSRMatrixJ(C_diag);
   double          *C_offd_data = hypre_CSRMatrixData(C_offd);
   HYPRE_Int             *C_offd_i = hypre_CSRMatrixI(C_offd);
   HYPRE_Int             *C_offd_j = hypre_CSRMatrixJ(C_offd);
   HYPRE_Int             *C_col_map_offd = hypre_ParCSRMatrixColMapOffd( C );
   HYPRE_Int             *Pnew_diag_i;
   HYPRE_Int             *Pnew_diag_j;
   double          *Pnew_diag_data;
   HYPRE_Int             *Pnew_offd_i;
   HYPRE_Int             *Pnew_offd_j;
   double          *Pnew_offd_data;
   HYPRE_Int             *Pnew_j2m;
   HYPRE_Int             *Pnew_col_map_offd;

   HYPRE_Int	num_rows_diag_C = hypre_CSRMatrixNumRows(C_diag);
   /* HYPRE_Int	num_rows_offd_C = hypre_CSRMatrixNumRows(C_offd); */
   HYPRE_Int	num_cols_offd_C = hypre_CSRMatrixNumCols(C_offd);
   HYPRE_Int	num_cols_offd_P = hypre_CSRMatrixNumCols(P_offd);
   HYPRE_Int  num_cols_offd_Pnew, num_rows_offd_Pnew;
   
   HYPRE_Int              i1, jmin, jmax, jrange, jrangem1;
   HYPRE_Int              j, m, mc, mp, jc, jp, jP, jC, jg, jCg, jPg;
   double           dc, dp;

/*   Pnew = hypre_ParCSRMatrixCompleteClone( C );*/

   Pnew = hypre_ParCSRMatrixUnion( C, P );
;
   hypre_ParCSRMatrixZero_F( Pnew, CF_marker );  /* fine rows of Pnew set to 0 */
   hypre_ParCSRMatrixCopy_C( Pnew, C, CF_marker ); /* coarse rows of Pnew copied from C (or P) */
   /* ...Zero_F may not be needed depending on how Pnew is made */
   Pnew_diag = hypre_ParCSRMatrixDiag(Pnew);
   Pnew_offd = hypre_ParCSRMatrixOffd(Pnew);
   Pnew_diag_i = hypre_CSRMatrixI(Pnew_diag);
   Pnew_diag_j = hypre_CSRMatrixJ(Pnew_diag);
   Pnew_offd_i = hypre_CSRMatrixI(Pnew_offd);
   Pnew_offd_j = hypre_CSRMatrixJ(Pnew_offd);
   Pnew_diag_data = hypre_CSRMatrixData(Pnew_diag);
   Pnew_offd_data = hypre_CSRMatrixData(Pnew_offd);
   Pnew_col_map_offd = hypre_ParCSRMatrixColMapOffd( Pnew );
   num_rows_offd_Pnew = hypre_CSRMatrixNumRows(Pnew_offd);
   num_cols_offd_Pnew = hypre_CSRMatrixNumCols(Pnew_offd);


   /* Find the j-ranges, needed to allocate a "reverse lookup" array. */
   /* This is the max j - min j over P and Pnew (which here is a copy of C).
      Only the diag block is considered. */
   /* For scalability reasons (jrange can get big) this won't work for the offd block.
      Also, indexing is more complicated in the offd block (c.f. col_map_offd).
      It's not clear, though whether the "quadratic" algorithm I'm using for the offd
      block is really any slower than the more complicated "linear" algorithm here. */
   jrange = 0;
   jrangem1=-1;
   for ( i1 = 0; i1 < num_rows_diag_C; i1++ )
   {
      if ( CF_marker[i1]<0 && hypre_CSRMatrixNumNonzeros(Pnew_diag)>0 )  /* only Fine rows matter */
      {
         jmin = Pnew_diag_j[ Pnew_diag_i[i1] ];
         jmax = Pnew_diag_j[ Pnew_diag_i[i1+1]-1 ];
         jrangem1 = jmax-jmin;
         jrange = hypre_max(jrange,jrangem1+1);
         /* If columns (of a given row) were in increasing order, the above would be sufficient.
            If not, the following would be necessary (and sufficient) */
         jmin = Pnew_diag_j[ Pnew_diag_i[i1] ];
         jmax = Pnew_diag_j[ Pnew_diag_i[i1] ];
         for ( m=Pnew_diag_i[i1]+1; m<Pnew_diag_i[i1+1]; ++m )
         {
            j = Pnew_diag_j[m];
            jmin = hypre_min( jmin, j );
            jmax = hypre_max( jmax, j );
         }
         for ( m=P_diag_i[i1]; m<P_diag_i[i1+1]; ++m )
         {
            j = P_diag_j[m];
            jmin = hypre_min( jmin, j );
            jmax = hypre_max( jmax, j );
         }
         jrangem1 = jmax-jmin;
         jrange = hypre_max(jrange,jrangem1+1);
      }
   }


   /*-----------------------------------------------------------------------
    *  Loop over Pnew_diag rows.  Construct a temporary reverse array:
    *  If j is a column number, Pnew_j2m[j] is the array index for j, i.e.
    *  Pnew_diag_j[ Pnew_j2m[j] ] = j
    *-----------------------------------------------------------------------*/

   Pnew_j2m = hypre_CTAlloc( HYPRE_Int, jrange );

   for ( i1 = 0; i1 < num_rows_diag_C; i1++ )
   {
      if ( CF_marker[i1]<0 && hypre_CSRMatrixNumNonzeros(Pnew_diag)>0 )  /* Fine data only */
      {
         /* just needed for an assertion below... */
         for ( j=0; j<jrange; ++j ) Pnew_j2m[j] = -1;
         jmin = Pnew_diag_j[ Pnew_diag_i[i1] ];
            /* If columns (of a given row) were in increasing order, the above line would be sufficient.
               If not, the following loop would have to be added (or store the jmin computed above )*/
         for ( m=Pnew_diag_i[i1]+1; m<Pnew_diag_i[i1+1]; ++m )
         {
            j = Pnew_diag_j[m];
            jmin = hypre_min( jmin, j );
         }
         for ( m=P_diag_i[i1]; m<P_diag_i[i1+1]; ++m )
         {
            j = P_diag_j[m];
            jmin = hypre_min( jmin, j );
         }
         for ( m = Pnew_diag_i[i1]; m<Pnew_diag_i[i1+1]; ++m )
         {
            j = Pnew_diag_j[m];
            hypre_assert( j-jmin>=0 );
            hypre_assert( j-jmin<jrange );
            Pnew_j2m[ j-jmin ] = m;
         }

         /*-----------------------------------------------------------------------
          *  Loop over C_diag data for the current row.
          *  Subtract each C data entry from the corresponding Pnew entry.
          *-----------------------------------------------------------------------*/

         for ( mc=C_diag_i[i1]; mc<C_diag_i[i1+1]; ++mc )
         {
            jc = C_diag_j[mc];
            dc = C_diag_data[mc];
            m = Pnew_j2m[jc-jmin];
            hypre_assert( m>=0 );
            Pnew_diag_data[m] -= dc;
         }

         /*-----------------------------------------------------------------------
          *  Loop over P_diag data for the current row.
          *  Add each P data entry from the corresponding Pnew entry.
          *-----------------------------------------------------------------------*/

         for ( mp=P_diag_i[i1]; mp<P_diag_i[i1+1]; ++mp )
         {
            jp = P_diag_j[mp];
            dp = P_diag_data[mp];
            m = Pnew_j2m[jp-jmin];
            hypre_assert( m>=0 );
            Pnew_diag_data[m] += dp;
         }
      }
   }

         /*-----------------------------------------------------------------------
          * Repeat for the offd block.
          *-----------------------------------------------------------------------*/

   for ( i1 = 0; i1 < num_rows_offd_Pnew; i1++ )
   {
      if ( CF_marker[i1]<0 && hypre_CSRMatrixNumNonzeros(Pnew_offd)>0 )  /* Fine data only */
      {
         if ( num_cols_offd_Pnew )
         {
            /*  This is a simple quadratic algorithm.  If necessary I may try
               to implement the ideas used on the diag block later. */
            for ( m = Pnew_offd_i[i1]; m<Pnew_offd_i[i1+1]; ++m )
            {
               j = Pnew_offd_j[m];
               jg = Pnew_col_map_offd[j];
               Pnew_offd_data[m] = 0;
               if ( num_cols_offd_C )
                  for ( mc=C_offd_i[i1]; mc<C_offd_i[i1+1]; ++mc )
                  {
                     jC = C_offd_j[mc];
                     jCg = C_col_map_offd[jC];
                     if ( jCg==jg ) Pnew_offd_data[m] -= C_offd_data[mc];
                  }
               if ( num_cols_offd_P )
                  for ( mp=P_offd_i[i1]; mp<P_offd_i[i1+1]; ++mp )
                  {
                     jP = P_offd_j[mp];
                     jPg = P_col_map_offd[jP];
                     if ( jPg==jg ) Pnew_offd_data[m] += P_offd_data[mp];
                  }
            }
         }
      }
   }


   hypre_TFree(Pnew_j2m);

   return Pnew;
}
void hypre_BoomerAMGTruncateInterp( hypre_ParCSRMatrix *P,
                                    HYPRE_Real eps, HYPRE_Real dlt,
                                    HYPRE_Int * CF_marker )
/* Truncate the interpolation matrix P, but only in rows for which the
   marker is <0.  Truncation means that an element P(i,j) is set to 0 if
   P(i,j)>0 and P(i,j)<eps*max( P(i,j) )  or if
   P(i,j)>0 and P(i,j)<dlt*max( -P(i,j) )  or if
   P(i,j)<0 and P(i,j)>dlt*min( -P(i,j) )  or if
   P(i,j)<0 and P(i,j)>eps*min( P(i,j) )
      ( 0<eps,dlt<1, typically 0.1=dlt<eps=0.2, )
   The min and max are only computed locally, as I'm guessing that there isn't
   usually much to be gained (in the way of improved performance) by getting
   them perfectly right.
*/

/* The function hypre_BoomerAMGInterpTruncation in par_interp.c is
   very similar.  It looks at fabs(value) rather than separately
   dealing with value<0 and value>0 as recommended by Klaus Stuben,
   thus as this function does.  In this function, only "marked" rows
   are affected.  Lastly, in hypre_BoomerAMGInterpTruncation, if any
   element gets discarded, it reallocates arrays to the new size.
*/
{
   hypre_CSRMatrix *P_diag = hypre_ParCSRMatrixDiag(P);
   hypre_CSRMatrix *P_offd = hypre_ParCSRMatrixOffd(P);
   HYPRE_Real      *P_diag_data = hypre_CSRMatrixData(P_diag);
   HYPRE_Int             *P_diag_i = hypre_CSRMatrixI(P_diag);
   HYPRE_Int             *P_diag_j = hypre_CSRMatrixJ(P_diag);
   HYPRE_Real      *P_offd_data = hypre_CSRMatrixData(P_offd);
   HYPRE_Int             *P_offd_i = hypre_CSRMatrixI(P_offd);
   HYPRE_Int             *P_offd_j = hypre_CSRMatrixJ(P_offd);
   HYPRE_Int             *new_P_diag_i;
   HYPRE_Int             *new_P_offd_i;
   HYPRE_Int	num_rows_diag_P = hypre_CSRMatrixNumRows(P_diag);
   HYPRE_Int	num_rows_offd_P = hypre_CSRMatrixNumRows(P_offd);
   HYPRE_Int num_nonzeros_diag = hypre_CSRMatrixNumNonzeros(P_diag);
   HYPRE_Int num_nonzeros_offd = hypre_CSRMatrixNumNonzeros(P_offd);
#if 0
   MPI_Comm comm = hypre_ParCSRMatrixComm( P );
   HYPRE_Real vmax1, vmin1;
#endif
   HYPRE_Real vmax = 0.0;
   HYPRE_Real vmin = 0.0;
   HYPRE_Real v, old_sum, new_sum, scale, wmax, wmin;
   HYPRE_Int i1, m, m1d, m1o;

   /* compute vmax = eps*max(P(i,j)), vmin = eps*min(P(i,j)) */
   for ( i1 = 0; i1 < num_rows_diag_P; i1++ )
   {
      for ( m=P_diag_i[i1]; m<P_diag_i[i1+1]; ++m )
         {
            v = P_diag_data[m];
            vmax = hypre_max( v, vmax );
            vmin = hypre_min( v, vmin );
         }
      for ( m=P_offd_i[i1]; m<P_offd_i[i1+1]; ++m )
         {
            v = P_offd_data[m];
            vmax = hypre_max( v, vmax );
            vmin = hypre_min( v, vmin );
         }
   }
#if 0
   /* This can make max,min global so results don't depend on no. processors
      We don't want this except for testing, or maybe this could be put
      someplace better.  I don't like adding communication here, for a minor reason.
   */
   vmax1 = vmax; vmin1 = vmin;
   hypre_MPI_Allreduce( &vmax1, &vmax, 1, HYPRE_MPI_REAL, hypre_MPI_MAX, comm );
   hypre_MPI_Allreduce( &vmin1, &vmin, 1, HYPRE_MPI_REAL, hypre_MPI_MIN, comm );
#endif
   if ( vmax <= 0.0 ) vmax =  1.0;  /* make sure no v is v>vmax if no v is v>0 */
   if ( vmin >= 0.0 ) vmin = -1.0;  /* make sure no v is v<vmin if no v is v<0 */
   wmax = - dlt * vmin;
   wmin = - dlt * vmax;
   vmax *= eps;
   vmin *= eps;

   /* Repack the i,j,and data arrays so as to discard the small elements of P.
      Elements of Coarse rows (CF_marker>=0) are always kept.
      The arrays are not re-allocated, so there will generally be unused space
      at the ends of the arrays. */
   new_P_diag_i = hypre_CTAlloc( HYPRE_Int, num_rows_diag_P+1 );
   new_P_offd_i = hypre_CTAlloc( HYPRE_Int, num_rows_offd_P+1 );
   m1d = P_diag_i[0];
   m1o = P_offd_i[0];
   for ( i1 = 0; i1 < num_rows_diag_P; i1++ )
   {
      old_sum = 0;
      new_sum = 0;
      for ( m=P_diag_i[i1]; m<P_diag_i[i1+1]; ++m )
      {
         v = P_diag_data[m];
         old_sum += v;
         if ( CF_marker[i1]>=0 || ( v>=vmax && v>=wmax ) || ( v<=vmin && v<=wmin ) )
         {  /* keep v */
            new_sum += v;
            P_diag_j[m1d] = P_diag_j[m];
            P_diag_data[m1d] = P_diag_data[m];
            ++m1d;
         }
         else
         {  /* discard v */
            --num_nonzeros_diag;
         }
      }
      for ( m=P_offd_i[i1]; m<P_offd_i[i1+1]; ++m )
      {
         v = P_offd_data[m];
         old_sum += v;
         if ( CF_marker[i1]>=0 || ( v>=vmax && v>=wmax ) || ( v<=vmin && v<=wmin ) )
         {  /* keep v */
            new_sum += v;
            P_offd_j[m1o] = P_offd_j[m];
            P_offd_data[m1o] = P_offd_data[m];
            ++m1o;
         }
         else
         {  /* discard v */
            --num_nonzeros_offd;
         }
      }

      new_P_diag_i[i1+1] = m1d;
      if ( i1<num_rows_offd_P ) new_P_offd_i[i1+1] = m1o;

      /* rescale to keep row sum the same */
      if (new_sum!=0) scale = old_sum/new_sum; else scale = 1.0;
      for ( m=new_P_diag_i[i1]; m<new_P_diag_i[i1+1]; ++m )
         P_diag_data[m] *= scale;
      if ( i1<num_rows_offd_P ) /* this test fails when there is no offd block */
         for ( m=new_P_offd_i[i1]; m<new_P_offd_i[i1+1]; ++m )
            P_offd_data[m] *= scale;

   }

   for ( i1 = 1; i1 <= num_rows_diag_P; i1++ )
   {
      P_diag_i[i1] = new_P_diag_i[i1];
      if ( i1<=num_rows_offd_P && num_nonzeros_offd>0 ) P_offd_i[i1] = new_P_offd_i[i1];
   }
   hypre_TFree( new_P_diag_i );
   if ( num_rows_offd_P>0 ) hypre_TFree( new_P_offd_i );

   hypre_CSRMatrixNumNonzeros(P_diag) = num_nonzeros_diag;
   hypre_CSRMatrixNumNonzeros(P_offd) = num_nonzeros_offd;
   hypre_ParCSRMatrixSetDNumNonzeros( P );
   hypre_ParCSRMatrixSetNumNonzeros( P );

}
Beispiel #10
0
HYPRE_ParCSRMatrix 
GenerateDifConv( MPI_Comm comm,
                 HYPRE_Int      nx,
                 HYPRE_Int      ny,
                 HYPRE_Int      nz, 
                 HYPRE_Int      P,
                 HYPRE_Int      Q,
                 HYPRE_Int      R,
                 HYPRE_Int      p,
                 HYPRE_Int      q,
                 HYPRE_Int      r,
                 double  *value )
{
   hypre_ParCSRMatrix *A;
   hypre_CSRMatrix *diag;
   hypre_CSRMatrix *offd;

   HYPRE_Int    *diag_i;
   HYPRE_Int    *diag_j;
   double *diag_data;

   HYPRE_Int    *offd_i;
   HYPRE_Int    *offd_j;
   double *offd_data;

   HYPRE_Int *global_part;
   HYPRE_Int ix, iy, iz;
   HYPRE_Int cnt, o_cnt;
   HYPRE_Int local_num_rows; 
   HYPRE_Int *col_map_offd;
   HYPRE_Int row_index;
   HYPRE_Int i,j;

   HYPRE_Int nx_local, ny_local, nz_local;
   HYPRE_Int nx_size, ny_size, nz_size;
   HYPRE_Int num_cols_offd;
   HYPRE_Int grid_size;

   HYPRE_Int *nx_part;
   HYPRE_Int *ny_part;
   HYPRE_Int *nz_part;

   HYPRE_Int num_procs, my_id;
   HYPRE_Int P_busy, Q_busy, R_busy;

   hypre_MPI_Comm_size(comm,&num_procs);
   hypre_MPI_Comm_rank(comm,&my_id);

   grid_size = nx*ny*nz;

   hypre_GeneratePartitioning(nx,P,&nx_part);
   hypre_GeneratePartitioning(ny,Q,&ny_part);
   hypre_GeneratePartitioning(nz,R,&nz_part);

   global_part = hypre_CTAlloc(HYPRE_Int,P*Q*R+1);

   global_part[0] = 0;
   cnt = 1;
   for (iz = 0; iz < R; iz++)
   {
      nz_size = nz_part[iz+1]-nz_part[iz];
      for (iy = 0; iy < Q; iy++)
      {
         ny_size = ny_part[iy+1]-ny_part[iy];
         for (ix = 0; ix < P; ix++)
         {
            nx_size = nx_part[ix+1] - nx_part[ix];
            global_part[cnt] = global_part[cnt-1];
            global_part[cnt++] += nx_size*ny_size*nz_size;
         }
      }
   }

   nx_local = nx_part[p+1] - nx_part[p];
   ny_local = ny_part[q+1] - ny_part[q];
   nz_local = nz_part[r+1] - nz_part[r];

   my_id = r*(P*Q) + q*P + p;
   num_procs = P*Q*R;

   local_num_rows = nx_local*ny_local*nz_local;
   diag_i = hypre_CTAlloc(HYPRE_Int, local_num_rows+1);
   offd_i = hypre_CTAlloc(HYPRE_Int, local_num_rows+1);

   P_busy = hypre_min(nx,P);
   Q_busy = hypre_min(ny,Q);
   R_busy = hypre_min(nz,R);

   num_cols_offd = 0;
   if (p) num_cols_offd += ny_local*nz_local;
   if (p < P_busy-1) num_cols_offd += ny_local*nz_local;
   if (q) num_cols_offd += nx_local*nz_local;
   if (q < Q_busy-1) num_cols_offd += nx_local*nz_local;
   if (r) num_cols_offd += nx_local*ny_local;
   if (r < R_busy-1) num_cols_offd += nx_local*ny_local;

   if (!local_num_rows) num_cols_offd = 0;

   col_map_offd = hypre_CTAlloc(HYPRE_Int, num_cols_offd);

   cnt = 1;
   o_cnt = 1;
   diag_i[0] = 0;
   offd_i[0] = 0;
   for (iz = nz_part[r]; iz < nz_part[r+1]; iz++)
   {
      for (iy = ny_part[q];  iy < ny_part[q+1]; iy++)
      {
         for (ix = nx_part[p]; ix < nx_part[p+1]; ix++)
         {
            diag_i[cnt] = diag_i[cnt-1];
            offd_i[o_cnt] = offd_i[o_cnt-1];
            diag_i[cnt]++;
            if (iz > nz_part[r]) 
               diag_i[cnt]++;
            else
            {
               if (iz) 
               {
                  offd_i[o_cnt]++;
               }
            }
            if (iy > ny_part[q]) 
               diag_i[cnt]++;
            else
            {
               if (iy) 
               {
                  offd_i[o_cnt]++;
               }
            }
            if (ix > nx_part[p]) 
               diag_i[cnt]++;
            else
            {
               if (ix) 
               {
                  offd_i[o_cnt]++; 
               }
            }
            if (ix+1 < nx_part[p+1]) 
               diag_i[cnt]++;
            else
            {
               if (ix+1 < nx) 
               {
                  offd_i[o_cnt]++; 
               }
            }
            if (iy+1 < ny_part[q+1]) 
               diag_i[cnt]++;
            else
            {
               if (iy+1 < ny) 
               {
                  offd_i[o_cnt]++;
               }
            }
            if (iz+1 < nz_part[r+1]) 
               diag_i[cnt]++;
            else
            {
               if (iz+1 < nz) 
               {
                  offd_i[o_cnt]++;
               }
            }
            cnt++;
            o_cnt++;
         }
      }
   }

   diag_j = hypre_CTAlloc(HYPRE_Int, diag_i[local_num_rows]);
   diag_data = hypre_CTAlloc(double, diag_i[local_num_rows]);

   if (num_procs > 1)
   {
      offd_j = hypre_CTAlloc(HYPRE_Int, offd_i[local_num_rows]);
      offd_data = hypre_CTAlloc(double, offd_i[local_num_rows]);
   }
Beispiel #11
0
HYPRE_Int main (HYPRE_Int argc, char *argv[])
{
   HYPRE_Int i;
   HYPRE_Int myid, num_procs;
   HYPRE_Int N, n;

   HYPRE_Int ilower, iupper;
   HYPRE_Int local_size, extra;

   HYPRE_Int solver_id;
   HYPRE_Int print_solution;

   double h, h2;

#ifdef HYPRE_FORTRAN
   hypre_F90_Obj A;
   hypre_F90_Obj parcsr_A;
   hypre_F90_Obj b;
   hypre_F90_Obj par_b;
   hypre_F90_Obj x;
   hypre_F90_Obj par_x;

   hypre_F90_Obj solver, precond;

   hypre_F90_Obj long_temp_COMM;
        HYPRE_Int temp_COMM;
        HYPRE_Int precond_id;

        HYPRE_Int one = 1;
        HYPRE_Int two = 2;
        HYPRE_Int three = 3;
        HYPRE_Int six = 6;
        HYPRE_Int twenty = 20;
        HYPRE_Int thousand = 1000;
        HYPRE_Int hypre_type = HYPRE_PARCSR;

     double oo1 = 1.e-3;
     double tol = 1.e-7;
#else
   HYPRE_IJMatrix A;
   HYPRE_ParCSRMatrix parcsr_A;
   HYPRE_IJVector b;
   HYPRE_ParVector par_b;
   HYPRE_IJVector x;
   HYPRE_ParVector par_x;

   HYPRE_Solver solver, precond;
#endif

   /* Initialize MPI */
   hypre_MPI_Init(&argc, &argv);
   hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &myid);
   hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &num_procs);

   /* Default problem parameters */
   n = 33;
   solver_id = 0;
   print_solution  = 0;

   /* Parse command line */
   {
      HYPRE_Int arg_index = 0;
      HYPRE_Int print_usage = 0;

      while (arg_index < argc)
      {
         if ( strcmp(argv[arg_index], "-n") == 0 )
         {
            arg_index++;
            n = atoi(argv[arg_index++]);
         }
         else if ( strcmp(argv[arg_index], "-solver") == 0 )
         {
            arg_index++;
            solver_id = atoi(argv[arg_index++]);
         }
         else if ( strcmp(argv[arg_index], "-print_solution") == 0 )
         {
            arg_index++;
            print_solution = 1;
         }
         else if ( strcmp(argv[arg_index], "-help") == 0 )
         {
            print_usage = 1;
            break;
         }
         else
         {
            arg_index++;
         }
      }

      if ((print_usage) && (myid == 0))
      {
         hypre_printf("\n");
         hypre_printf("Usage: %s [<options>]\n", argv[0]);
         hypre_printf("\n");
         hypre_printf("  -n <n>              : problem size in each direction (default: 33)\n");
         hypre_printf("  -solver <ID>        : solver ID\n");
         hypre_printf("                        0  - AMG (default) \n");
         hypre_printf("                        1  - AMG-PCG\n");
         hypre_printf("                        8  - ParaSails-PCG\n");
         hypre_printf("                        50 - PCG\n");
         hypre_printf("  -print_solution     : print the solution vector\n");
         hypre_printf("\n");
      }

      if (print_usage)
      {
         hypre_MPI_Finalize();
         return (0);
      }
   }

   /* Preliminaries: want at least one processor per row */
   if (n*n < num_procs) n = sqrt(num_procs) + 1;
   N = n*n; /* global number of rows */
   h = 1.0/(n+1); /* mesh size*/
   h2 = h*h;

   /* Each processor knows only of its own rows - the range is denoted by ilower
      and upper.  Here we partition the rows. We account for the fact that
      N may not divide evenly by the number of processors. */
   local_size = N/num_procs;
   extra = N - local_size*num_procs;

   ilower = local_size*myid;
   ilower += hypre_min(myid, extra);

   iupper = local_size*(myid+1);
   iupper += hypre_min(myid+1, extra);
   iupper = iupper - 1;

   /* How many rows do I have? */
   local_size = iupper - ilower + 1;

   /* Create the matrix.
      Note that this is a square matrix, so we indicate the row partition
      size twice (since number of rows = number of cols) */
#ifdef HYPRE_FORTRAN
   long_temp_COMM = (hypre_F90_Obj) hypre_MPI_COMM_WORLD;
   temp_COMM = (HYPRE_Int) hypre_MPI_COMM_WORLD;
   HYPRE_IJMatrixCreate(&long_temp_COMM, &ilower, &iupper, &ilower, &iupper, &A);
#else
   HYPRE_IJMatrixCreate(hypre_MPI_COMM_WORLD, ilower, iupper, ilower, iupper, &A);
#endif

   /* Choose a parallel csr format storage (see the User's Manual) */
#ifdef HYPRE_FORTRAN
   HYPRE_IJMatrixSetObjectType(&A, &hypre_type);
#else
   HYPRE_IJMatrixSetObjectType(A, HYPRE_PARCSR);
#endif

   /* Initialize before setting coefficients */
#ifdef HYPRE_FORTRAN
   HYPRE_IJMatrixInitialize(&A);
#else
   HYPRE_IJMatrixInitialize(A);
#endif

   /* Now go through my local rows and set the matrix entries.
      Each row has at most 5 entries. For example, if n=3:

      A = [M -I 0; -I M -I; 0 -I M]
      M = [4 -1 0; -1 4 -1; 0 -1 4]

      Note that here we are setting one row at a time, though
      one could set all the rows together (see the User's Manual).
   */
   {
      HYPRE_Int nnz;
      double values[5];
      HYPRE_Int cols[5];

      for (i = ilower; i <= iupper; i++)
      {
         nnz = 0;

         /* The left identity block:position i-n */
         if ((i-n)>=0)
         {
	    cols[nnz] = i-n;
	    values[nnz] = -1.0;
	    nnz++;
         }

         /* The left -1: position i-1 */
         if (i%n)
         {
            cols[nnz] = i-1;
            values[nnz] = -1.0;
            nnz++;
         }

         /* Set the diagonal: position i */
         cols[nnz] = i;
         values[nnz] = 4.0;
         nnz++;

         /* The right -1: position i+1 */
         if ((i+1)%n)
         {
            cols[nnz] = i+1;
            values[nnz] = -1.0;
            nnz++;
         }

         /* The right identity block:position i+n */
         if ((i+n)< N)
         {
            cols[nnz] = i+n;
            values[nnz] = -1.0;
            nnz++;
         }

         /* Set the values for row i */
#ifdef HYPRE_FORTRAN
         HYPRE_IJMatrixSetValues(&A, &one, &nnz, &i, &cols[0], &values[0]);
#else
         HYPRE_IJMatrixSetValues(A, 1, &nnz, &i, cols, values);
#endif
      }
   }

   /* Assemble after setting the coefficients */
#ifdef HYPRE_FORTRAN
   HYPRE_IJMatrixAssemble(&A);
#else
   HYPRE_IJMatrixAssemble(A);
#endif
   /* Get the parcsr matrix object to use */
#ifdef HYPRE_FORTRAN
   HYPRE_IJMatrixGetObject(&A, &parcsr_A);
   HYPRE_IJMatrixGetObject(&A, &parcsr_A);
#else
   HYPRE_IJMatrixGetObject(A, (void**) &parcsr_A);
   HYPRE_IJMatrixGetObject(A, (void**) &parcsr_A);
#endif

   /* Create the rhs and solution */
#ifdef HYPRE_FORTRAN
   HYPRE_IJVectorCreate(&temp_COMM, &ilower, &iupper, &b);
   HYPRE_IJVectorSetObjectType(&b, &hypre_type);
   HYPRE_IJVectorInitialize(&b);
#else
   HYPRE_IJVectorCreate(hypre_MPI_COMM_WORLD, ilower, iupper,&b);
   HYPRE_IJVectorSetObjectType(b, HYPRE_PARCSR);
   HYPRE_IJVectorInitialize(b);
#endif

#ifdef HYPRE_FORTRAN
   HYPRE_IJVectorCreate(&temp_COMM, &ilower, &iupper, &x);
   HYPRE_IJVectorSetObjectType(&x, &hypre_type);
   HYPRE_IJVectorInitialize(&x);
#else
   HYPRE_IJVectorCreate(hypre_MPI_COMM_WORLD, ilower, iupper,&x);
   HYPRE_IJVectorSetObjectType(x, HYPRE_PARCSR);
   HYPRE_IJVectorInitialize(x);
#endif

   /* Set the rhs values to h^2 and the solution to zero */
   {
      double *rhs_values, *x_values;
      HYPRE_Int    *rows;

      rhs_values = calloc(local_size, sizeof(double));
      x_values = calloc(local_size, sizeof(double));
      rows = calloc(local_size, sizeof(HYPRE_Int));

      for (i=0; i<local_size; i++)
      {
         rhs_values[i] = h2;
         x_values[i] = 0.0;
         rows[i] = ilower + i;
      }
#ifdef HYPRE_FORTRAN
      HYPRE_IJVectorSetValues(&b, &local_size, &rows[0], &rhs_values[0]);
      HYPRE_IJVectorSetValues(&x, &local_size, &rows[0], &x_values[0]);
#else
      HYPRE_IJVectorSetValues(b, local_size, rows, rhs_values);
      HYPRE_IJVectorSetValues(x, local_size, rows, x_values);
#endif

      free(x_values);
      free(rhs_values);
      free(rows);
   }

#ifdef HYPRE_FORTRAN
   HYPRE_IJVectorAssemble(&b);
   HYPRE_IJVectorGetObject(&b, &par_b);
#else
   HYPRE_IJVectorAssemble(b);
   HYPRE_IJVectorGetObject(b, (void **) &par_b);
#endif

#ifdef HYPRE_FORTRAN
   HYPRE_IJVectorAssemble(&x);
   HYPRE_IJVectorGetObject(&x, &par_x);
#else
   HYPRE_IJVectorAssemble(x);
   HYPRE_IJVectorGetObject(x, (void **) &par_x);
#endif

   /* Choose a solver and solve the system */

   /* AMG */
   if (solver_id == 0)
   {
      HYPRE_Int num_iterations;
      double final_res_norm;

      /* Create solver */
#ifdef HYPRE_FORTRAN
      HYPRE_BoomerAMGCreate(&solver);
#else
      HYPRE_BoomerAMGCreate(&solver);
#endif

      /* Set some parameters (See Reference Manual for more parameters) */
#ifdef HYPRE_FORTRAN
      HYPRE_BoomerAMGSetPrintLevel(&solver, &three);  /* print solve info + parameters */
      HYPRE_BoomerAMGSetCoarsenType(&solver, &six); /* Falgout coarsening */
      HYPRE_BoomerAMGSetRelaxType(&solver, &three);   /* G-S/Jacobi hybrid relaxation */
      HYPRE_BoomerAMGSetNumSweeps(&solver, &one);   /* Sweeeps on each level */
      HYPRE_BoomerAMGSetMaxLevels(&solver, &twenty);  /* maximum number of levels */
      HYPRE_BoomerAMGSetTol(&solver, &tol);      /* conv. tolerance */
#else
      HYPRE_BoomerAMGSetPrintLevel(solver, 3);  /* print solve info + parameters */
      HYPRE_BoomerAMGSetCoarsenType(solver, 6); /* Falgout coarsening */
      HYPRE_BoomerAMGSetRelaxType(solver, 3);   /* G-S/Jacobi hybrid relaxation */
      HYPRE_BoomerAMGSetNumSweeps(solver, 1);   /* Sweeeps on each level */
      HYPRE_BoomerAMGSetMaxLevels(solver, 20);  /* maximum number of levels */
      HYPRE_BoomerAMGSetTol(solver, 1e-7);      /* conv. tolerance */
#endif

      /* Now setup and solve! */
#ifdef HYPRE_FORTRAN
      HYPRE_BoomerAMGSetup(&solver, &parcsr_A, &par_b, &par_x);
      HYPRE_BoomerAMGSolve(&solver, &parcsr_A, &par_b, &par_x);
#else
      HYPRE_BoomerAMGSetup(solver, parcsr_A, par_b, par_x);
      HYPRE_BoomerAMGSolve(solver, parcsr_A, par_b, par_x);
#endif

      /* Run info - needed logging turned on */
#ifdef HYPRE_FORTRAN
      HYPRE_BoomerAMGGetNumIterations(&solver, &num_iterations);
      HYPRE_BoomerAMGGetFinalRelativeResidualNorm(&solver, &final_res_norm);
#else
      HYPRE_BoomerAMGGetNumIterations(solver, &num_iterations);
      HYPRE_BoomerAMGGetFinalRelativeResidualNorm(solver, &final_res_norm);
#endif
      if (myid == 0)
      {
         hypre_printf("\n");
         hypre_printf("Iterations = %d\n", num_iterations);
         hypre_printf("Final Relative Residual Norm = %e\n", final_res_norm);
         hypre_printf("\n");
      }

      /* Destroy solver */
#ifdef HYPRE_FORTRAN
      HYPRE_BoomerAMGDestroy(&solver);
#else
      HYPRE_BoomerAMGDestroy(solver);
#endif
   }
   /* PCG */
   else if (solver_id == 50)
   {
      HYPRE_Int num_iterations;
      double final_res_norm;

      /* Create solver */
#ifdef HYPRE_FORTRAN
      HYPRE_ParCSRPCGCreate(&temp_COMM, &solver);
#else
      HYPRE_ParCSRPCGCreate(hypre_MPI_COMM_WORLD, &solver);
#endif

      /* Set some parameters (See Reference Manual for more parameters) */
#ifdef HYPRE_FORTRAN
      HYPRE_ParCSRPCGSetMaxIter(&solver, &thousand); /* max iterations */
      HYPRE_ParCSRPCGSetTol(&solver, &tol); /* conv. tolerance */
      HYPRE_ParCSRPCGSetTwoNorm(&solver, &one); /* use the two norm as the stopping criteria */
      HYPRE_ParCSRPCGSetPrintLevel(&solver, &two); /* prints out the iteration info */
#else
      HYPRE_PCGSetMaxIter(solver, 1000); /* max iterations */
      HYPRE_PCGSetTol(solver, 1e-7); /* conv. tolerance */
      HYPRE_PCGSetTwoNorm(solver, 1); /* use the two norm as the stopping criteria */
      HYPRE_PCGSetPrintLevel(solver, 2); /* prints out the iteration info */
      HYPRE_PCGSetLogging(solver, 1); /* needed to get run info later */
#endif

      /* Now setup and solve! */
#ifdef HYPRE_FORTRAN
      HYPRE_ParCSRPCGSetup(&solver, &parcsr_A, &par_b, &par_x);
      HYPRE_ParCSRPCGSolve(&solver, &parcsr_A, &par_b, &par_x);
#else
      HYPRE_ParCSRPCGSetup(solver, parcsr_A, par_b, par_x);
      HYPRE_ParCSRPCGSolve(solver, parcsr_A, par_b, par_x);
#endif

      /* Run info - needed logging turned on */
#ifdef HYPRE_FORTRAN
      HYPRE_ParCSRPCGGetNumIterations(&solver, &num_iterations);
      HYPRE_ParCSRPCGGetFinalRelativeResidualNorm(&solver, &final_res_norm);
#else
      HYPRE_PCGGetNumIterations(solver, &num_iterations);
      HYPRE_PCGGetFinalRelativeResidualNorm(solver, &final_res_norm);
#endif
      if (myid == 0)
      {
         hypre_printf("\n");
         hypre_printf("Iterations = %d\n", num_iterations);
         hypre_printf("Final Relative Residual Norm = %e\n", final_res_norm);
         hypre_printf("\n");
      }

      /* Destroy solver */
#ifdef HYPRE_FORTRAN
      HYPRE_ParCSRPCGDestroy(&solver);
#else
      HYPRE_ParCSRPCGDestroy(solver);
#endif
   }
   /* PCG with AMG preconditioner */
   else if (solver_id == 1)
   {
      HYPRE_Int num_iterations;
      double final_res_norm;

      /* Create solver */
#ifdef HYPRE_FORTRAN
      HYPRE_ParCSRPCGCreate(&temp_COMM, &solver);
#else
      HYPRE_ParCSRPCGCreate(hypre_MPI_COMM_WORLD, &solver);
#endif

      /* Set some parameters (See Reference Manual for more parameters) */
#ifdef HYPRE_FORTRAN
      HYPRE_ParCSRPCGSetMaxIter(&solver, &thousand); /* max iterations */
      HYPRE_ParCSRPCGSetTol(&solver, &tol); /* conv. tolerance */
      HYPRE_ParCSRPCGSetTwoNorm(&solver, &one); /* use the two norm as the stopping criteria */
      HYPRE_ParCSRPCGSetPrintLevel(&solver, &two); /* print solve info */
#else
      HYPRE_PCGSetMaxIter(solver, 1000); /* max iterations */
      HYPRE_PCGSetTol(solver, 1e-7); /* conv. tolerance */
      HYPRE_PCGSetTwoNorm(solver, 1); /* use the two norm as the stopping criteria */
      HYPRE_PCGSetPrintLevel(solver, 2); /* print solve info */
      HYPRE_PCGSetLogging(solver, 1); /* needed to get run info later */
#endif

      /* Now set up the AMG preconditioner and specify any parameters */
#ifdef HYPRE_FORTRAN
      HYPRE_BoomerAMGCreate(&precond);
      HYPRE_BoomerAMGSetPrintLevel(&precond, &one); /* print amg solution info*/
      HYPRE_BoomerAMGSetCoarsenType(&precond, &six);
      HYPRE_BoomerAMGSetRelaxType(&precond, &three);
      HYPRE_BoomerAMGSetNumSweeps(&precond, &one);
      HYPRE_BoomerAMGSetTol(&precond, &oo1);
#else
      HYPRE_BoomerAMGCreate(&precond);
      HYPRE_BoomerAMGSetPrintLevel(precond, 1); /* print amg solution info*/
      HYPRE_BoomerAMGSetCoarsenType(precond, 6);
      HYPRE_BoomerAMGSetRelaxType(precond, 3);
      HYPRE_BoomerAMGSetNumSweeps(precond, 1);
      HYPRE_BoomerAMGSetTol(precond, 1e-3);
#endif

      /* Set the PCG preconditioner */
#ifdef HYPRE_FORTRAN
      precond_id = 2;
      HYPRE_ParCSRPCGSetPrecond(&solver, &precond_id, &precond);
#else
      HYPRE_PCGSetPrecond(solver, (HYPRE_PtrToSolverFcn) HYPRE_BoomerAMGSolve,
                          (HYPRE_PtrToSolverFcn) HYPRE_BoomerAMGSetup, precond);
#endif

      /* Now setup and solve! */
#ifdef HYPRE_FORTRAN
      HYPRE_ParCSRPCGSetup(&solver, &parcsr_A, &par_b, &par_x);
      HYPRE_ParCSRPCGSolve(&solver, &parcsr_A, &par_b, &par_x);
#else
      HYPRE_ParCSRPCGSetup(solver, parcsr_A, par_b, par_x);
      HYPRE_ParCSRPCGSolve(solver, parcsr_A, par_b, par_x);
#endif

      /* Run info - needed logging turned on */
#ifdef HYPRE_FORTRAN
      HYPRE_ParCSRPCGGetNumIterations(&solver, &num_iterations);
      HYPRE_ParCSRPCGGetFinalRelativeResidualNorm(&solver, &final_res_norm);
#else
      HYPRE_PCGGetNumIterations(solver, &num_iterations);
      HYPRE_PCGGetFinalRelativeResidualNorm(solver, &final_res_norm);
#endif
      if (myid == 0)
      {
         hypre_printf("\n");
         hypre_printf("Iterations = %d\n", num_iterations);
         hypre_printf("Final Relative Residual Norm = %e\n", final_res_norm);
         hypre_printf("\n");
      }

      /* Destroy solver and preconditioner */
#ifdef HYPRE_FORTRAN
      HYPRE_ParCSRPCGDestroy(&solver);
      HYPRE_BoomerAMGDestroy(&precond);
#else
      HYPRE_ParCSRPCGDestroy(solver);
      HYPRE_BoomerAMGDestroy(precond);
#endif
   }
   /* PCG with Parasails Preconditioner */
   else if (solver_id == 8)
   {
      HYPRE_Int    num_iterations;
      double final_res_norm;

      HYPRE_Int      sai_max_levels = 1;
      double   sai_threshold = 0.1;
      double   sai_filter = 0.05;
      HYPRE_Int      sai_sym = 1;

      /* Create solver */
#ifdef HYPRE_FORTRAN
      HYPRE_ParCSRPCGCreate(&temp_COMM, &solver);
#else
      HYPRE_ParCSRPCGCreate(hypre_MPI_COMM_WORLD, &solver);
#endif

      /* Set some parameters (See Reference Manual for more parameters) */
#ifdef HYPRE_FORTRAN
      HYPRE_ParCSRPCGSetMaxIter(&solver, &thousand); /* max iterations */
      HYPRE_ParCSRPCGSetTol(&solver, &tol); /* conv. tolerance */
      HYPRE_ParCSRPCGSetTwoNorm(&solver, &one); /* use the two norm as the stopping criteria */
      HYPRE_ParCSRPCGSetPrintLevel(&solver, &two); /* print solve info */
#else
      HYPRE_PCGSetMaxIter(solver, 1000); /* max iterations */
      HYPRE_PCGSetTol(solver, 1e-7); /* conv. tolerance */
      HYPRE_PCGSetTwoNorm(solver, 1); /* use the two norm as the stopping criteria */
      HYPRE_PCGSetPrintLevel(solver, 2); /* print solve info */
      HYPRE_PCGSetLogging(solver, 1); /* needed to get run info later */
#endif

      /* Now set up the ParaSails preconditioner and specify any parameters */
#ifdef HYPRE_FORTRAN
      HYPRE_ParaSailsCreate(&temp_COMM, &precond);
#else
      HYPRE_ParaSailsCreate(hypre_MPI_COMM_WORLD, &precond);
#endif

      /* Set some parameters (See Reference Manual for more parameters) */
#ifdef HYPRE_FORTRAN
      HYPRE_ParaSailsSetParams(&precond, &sai_threshold, &sai_max_levels);
      HYPRE_ParaSailsSetFilter(&precond, &sai_filter);
      HYPRE_ParaSailsSetSym(&precond, &sai_sym);
      HYPRE_ParaSailsSetLogging(&precond, &three);
#else
      HYPRE_ParaSailsSetParams(precond, sai_threshold, sai_max_levels);
      HYPRE_ParaSailsSetFilter(precond, sai_filter);
      HYPRE_ParaSailsSetSym(precond, sai_sym);
      HYPRE_ParaSailsSetLogging(precond, 3);
#endif

      /* Set the PCG preconditioner */
#ifdef HYPRE_FORTRAN
      precond_id = 4;
      HYPRE_ParCSRPCGSetPrecond(&solver, &precond_id, &precond);
#else
      HYPRE_PCGSetPrecond(solver, (HYPRE_PtrToSolverFcn) HYPRE_ParaSailsSolve,
                          (HYPRE_PtrToSolverFcn) HYPRE_ParaSailsSetup, precond);
#endif

      /* Now setup and solve! */
#ifdef HYPRE_FORTRAN
      HYPRE_ParCSRPCGSetup(&solver, &parcsr_A, &par_b, &par_x);
      HYPRE_ParCSRPCGSolve(&solver, &parcsr_A, &par_b, &par_x);
#else
      HYPRE_ParCSRPCGSetup(solver, parcsr_A, par_b, par_x);
      HYPRE_ParCSRPCGSolve(solver, parcsr_A, par_b, par_x);
#endif


      /* Run info - needed logging turned on */
#ifdef HYPRE_FORTRAN
      HYPRE_ParCSRPCGGetNumIterations(&solver, &num_iterations);
      HYPRE_ParCSRPCGGetFinalRelativeResidualNorm(&solver, &final_res_norm);
#else
      HYPRE_PCGGetNumIterations(solver, &num_iterations);
      HYPRE_PCGGetFinalRelativeResidualNorm(solver, &final_res_norm);
#endif
      if (myid == 0)
      {
         hypre_printf("\n");
         hypre_printf("Iterations = %d\n", num_iterations);
         hypre_printf("Final Relative Residual Norm = %e\n", final_res_norm);
         hypre_printf("\n");
      }

      /* Destory solver and preconditioner */
#ifdef HYPRE_FORTRAN
      HYPRE_ParCSRPCGDestroy(&solver);
      HYPRE_ParaSailsDestroy(&precond);
#else
      HYPRE_ParCSRPCGDestroy(solver);
      HYPRE_ParaSailsDestroy(precond);
#endif
   }
   else
   {
      if (myid ==0) hypre_printf("Invalid solver id specified.\n");
   }

   /* Print the solution */
#ifdef HYPRE_FORTRAN
   if (print_solution)
      HYPRE_IJVectorPrint(&x, "ij.out.x");
#else
   if (print_solution)
      HYPRE_IJVectorPrint(x, "ij.out.x");
#endif

   /* Clean up */
#ifdef HYPRE_FORTRAN
   HYPRE_IJMatrixDestroy(&A);
   HYPRE_IJVectorDestroy(&b);
   HYPRE_IJVectorDestroy(&x);
#else
   HYPRE_IJMatrixDestroy(A);
   HYPRE_IJVectorDestroy(b);
   HYPRE_IJVectorDestroy(x);
#endif

   /* Finalize MPI*/
   hypre_MPI_Finalize();

   return(0);
}
Beispiel #12
0
int hypre_StructGridAssembleWithAP( hypre_StructGrid *grid )
{
 


   int                  ierr = 0;
   int                  tmp_i;
   
   int                  size, global_num_boxes, num_local_boxes;
   int                  i, j, d, k, index;
   int                  num_procs, myid;
   int                  *sendbuf8, *recvbuf8, *sendbuf2, *recvbuf2;
   int                  min_box_size, max_box_size;
   int                  global_min_box_size, global_max_box_size;
   int                 *ids;
   int                  max_regions, max_refinements, ologp;
   double               gamma;
   hypre_Index          min_index, max_index;
   
  
   int                  prune;
       
   hypre_Box           *box;
   

   MPI_Comm             comm         = hypre_StructGridComm(grid);
   hypre_Box           *bounding_box = hypre_StructGridBoundingBox(grid);
   hypre_BoxArray      *local_boxes  = hypre_StructGridBoxes(grid);
   int                  dim          = hypre_StructGridDim(grid);
   hypre_BoxNeighbors  *neighbors    = hypre_StructGridNeighbors(grid);
   int                  max_distance = hypre_StructGridMaxDistance(grid);
   hypre_IndexRef       periodic     = hypre_StructGridPeriodic(grid);

   int                 *local_boxnums;

   double               dbl_global_size, tmp_dbl;
   
   hypre_BoxArray       *my_partition;
   int                  *part_ids, *part_boxnums;
     
   int                  *proc_array, proc_count, proc_alloc, count;
   int                  *tmp_proc_ids = NULL;
   
   int                  max_response_size;
   int                  *ap_proc_ids, *send_buf, *send_buf_starts;
   int                  *response_buf, *response_buf_starts;

   hypre_BoxArray      *neighbor_boxes, *n_boxes_copy;
   int                 *neighbor_proc_ids, *neighbor_boxnums;

   int                 *order_index, *delete_array;
   int                 tmp_id, start, first_local;
   
   int                 grow, grow_array[6];
   hypre_Box           *grow_box;
   
   
   int                  *numghost;
   int                   ghostsize;
   hypre_Box            *ghostbox;

   hypre_StructAssumedPart     *assumed_part;
   hypre_DataExchangeResponse  response_obj;
 
   int                  px = hypre_IndexX(periodic);
   int                  py = hypre_IndexY(periodic);
   int                  pz = hypre_IndexZ(periodic);

   int                  i_periodic = px ? 1 : 0;
   int                  j_periodic = py ? 1 : 0;
   int                  k_periodic = pz ? 1 : 0;

   int                  num_periods, multiple_ap, p;
   hypre_Box           *result_box, *period_box;
   hypre_Index         *pshifts;

   hypre_IndexRef       pshift;

#if NEIGH_PRINT
   double               start_time, end_time;
   
#endif



/*---------------------------------------------
  Step 1:  Initializations
  -----------------------------------------------*/

   prune = 1; /* default is to prune */ 
   
   num_local_boxes = hypre_BoxArraySize(local_boxes);
  
   num_periods = (1+2*i_periodic) * (1+2*j_periodic) * (1+2*k_periodic);


   MPI_Comm_size(comm, &num_procs);
   MPI_Comm_rank(comm, &myid);


 
/*---------------------------------------------
  Step 2:  Determine the global size, total number of boxes,
           and global bounding box.
           Also get the min and max box sizes
           since it is convenient to do so.
  -----------------------------------------------*/

   if (neighbors == NULL) 
   {
    
      /*these may not be needed - check later */
      ids =   hypre_TAlloc(int, num_local_boxes);
    
      /* for the vol and number of boxes */
      sendbuf2 = hypre_CTAlloc(int, 2);
      recvbuf2 = hypre_CTAlloc(int, 2);
      size = 0;
     
      bounding_box = hypre_BoxCreate();
      grow_box = hypre_BoxCreate();
      

      if (num_local_boxes) 
      {
         
         min_box_size = hypre_BoxVolume( hypre_BoxArrayBox(local_boxes, 0));
         max_box_size = hypre_BoxVolume( hypre_BoxArrayBox(local_boxes, 0));


         /* initialize min and max */
         for (d=0; d<3; d++)
         {
            hypre_IndexD(min_index, d) = pow(2,30); 
            hypre_IndexD(max_index, d) = -pow(2,30);
         }
         

         hypre_ForBoxI(i, local_boxes)
         {
            box = hypre_BoxArrayBox(local_boxes, i);
            /* get global size and number of boxes */ 
            tmp_i = hypre_BoxVolume(box);
            size += tmp_i;
            min_box_size = hypre_min(min_box_size, tmp_i);
            max_box_size = hypre_max(max_box_size, tmp_i);


            /* set id */  
            ids[i] = i;


            /* 1/3/05 we need this for the case of holes in the domain. (I had
               commented
               it out on 12/04 - as I thought this was not necessary. */
            
            
            /* zero volume boxes - still look at for getting the bounding box */
            if (hypre_BoxVolume(box) == 0) /* zero volume boxes - still count */
            {
               hypre_CopyBox(box, grow_box);
               for (d = 0; d < 3; d++)
               {
                  if(!hypre_BoxSizeD(box, d))
                  {
                     grow = (hypre_BoxIMinD(box, d) - hypre_BoxIMaxD(box, d) + 1)/2;
                     grow_array[2*d] = grow;
                     grow_array[2*d+1] = grow;
                  }
                  else
                  {
                     grow_array[2*d] = 0;
                     grow_array[2*d+1] = 0;
                  }
               }   
               /* expand the box */
               hypre_BoxExpand(grow_box, grow_array);
               box = grow_box; /*pointer copy*/
            }     
            /*now we have a vol > 0 box */
    
         
            for (d = 0; d < dim; d++) /* for each dimension */
            {
               hypre_IndexD(min_index, d) = hypre_min( hypre_IndexD(min_index, d), 
                                                       hypre_BoxIMinD(box, d));
               hypre_IndexD(max_index, d) = hypre_max( hypre_IndexD(max_index, d), 
                                                       hypre_BoxIMaxD(box, d));
            }
                        
         }/*end for each box loop */

         /* bounding box extents */ 
         hypre_BoxSetExtents(bounding_box, min_index, max_index);
   
      }
Beispiel #13
0
HYPRE_ParCSRMatrix 
GenerateSysLaplacianVCoef( MPI_Comm comm,
                      HYPRE_Int      nx,
                      HYPRE_Int      ny,
                      HYPRE_Int      nz, 
                      HYPRE_Int      P,
                      HYPRE_Int      Q,
                      HYPRE_Int      R,
                      HYPRE_Int      p,
                      HYPRE_Int      q,
                      HYPRE_Int      r,
                      HYPRE_Int      num_fun,
                      HYPRE_Real  *mtrx,
                      HYPRE_Real  *value )
{
   hypre_ParCSRMatrix *A;
   hypre_CSRMatrix *diag;
   hypre_CSRMatrix *offd;

   HYPRE_Int    *diag_i;
   HYPRE_Int    *diag_j;
   HYPRE_Real *diag_data;

   HYPRE_Int    *offd_i;
   HYPRE_Int    *offd_j;
   HYPRE_Real *offd_data;

   HYPRE_Int *global_part;
   HYPRE_Int ix, iy, iz;
   HYPRE_Int cnt, o_cnt;
   HYPRE_Int local_num_rows; 
   HYPRE_Int *col_map_offd;
   HYPRE_Int row_index, row, col;
   HYPRE_Int index, diag_index;
   HYPRE_Int i,j;

   HYPRE_Int nx_local, ny_local, nz_local;
   HYPRE_Int nx_size, ny_size, nz_size;
   HYPRE_Int num_cols_offd;
   HYPRE_Int grid_size;
   HYPRE_Int local_grid_size;
   HYPRE_Int first_j, j_ind;
   HYPRE_Int num_coeffs, num_offd_coeffs;

   HYPRE_Int *nx_part;
   HYPRE_Int *ny_part;
   HYPRE_Int *nz_part;

   HYPRE_Int num_procs, my_id;
   HYPRE_Int P_busy, Q_busy, R_busy;
   HYPRE_Real val;

  /* for indexing in values */
   HYPRE_Int sz = num_fun*num_fun;

   hypre_MPI_Comm_size(comm,&num_procs);
   hypre_MPI_Comm_rank(comm,&my_id);

   grid_size = nx*ny*nz;

   hypre_GeneratePartitioning(nx,P,&nx_part);
   hypre_GeneratePartitioning(ny,Q,&ny_part);
   hypre_GeneratePartitioning(nz,R,&nz_part);

   global_part = hypre_CTAlloc(HYPRE_Int,P*Q*R+1);

   global_part[0] = 0;
   cnt = 1;
   for (iz = 0; iz < R; iz++)
   {
      nz_size = nz_part[iz+1]-nz_part[iz];
      for (iy = 0; iy < Q; iy++)
      {
         ny_size = ny_part[iy+1]-ny_part[iy];
         for (ix = 0; ix < P; ix++)
         {
            nx_size = nx_part[ix+1] - nx_part[ix];
            global_part[cnt] = global_part[cnt-1];
            global_part[cnt++] += nx_size*ny_size*nz_size;
         }
      }
   }

   nx_local = nx_part[p+1] - nx_part[p];
   ny_local = ny_part[q+1] - ny_part[q];
   nz_local = nz_part[r+1] - nz_part[r];

   my_id = r*(P*Q) + q*P + p;
   num_procs = P*Q*R;

   local_grid_size = nx_local*ny_local*nz_local;
   local_num_rows = num_fun*local_grid_size;
   diag_i = hypre_CTAlloc(HYPRE_Int, local_num_rows+1);
   offd_i = hypre_CTAlloc(HYPRE_Int, local_num_rows+1);

   P_busy = hypre_min(nx,P);
   Q_busy = hypre_min(ny,Q);
   R_busy = hypre_min(nz,R);

   num_cols_offd = 0;
   if (p) num_cols_offd += ny_local*nz_local;
   if (p < P_busy-1) num_cols_offd += ny_local*nz_local;
   if (q) num_cols_offd += nx_local*nz_local;
   if (q < Q_busy-1) num_cols_offd += nx_local*nz_local;
   if (r) num_cols_offd += nx_local*ny_local;
   if (r < R_busy-1) num_cols_offd += nx_local*ny_local;
   num_cols_offd *= num_fun;

   if (!local_num_rows) num_cols_offd = 0;

   col_map_offd = hypre_CTAlloc(HYPRE_Int, num_cols_offd);

   cnt = 1;
   diag_i[0] = 0;
   offd_i[0] = 0;
   for (iz = nz_part[r]; iz < nz_part[r+1]; iz++)
   {
      for (iy = ny_part[q];  iy < ny_part[q+1]; iy++)
      {
         for (ix = nx_part[p]; ix < nx_part[p+1]; ix++)
         {
            diag_i[cnt] = diag_i[cnt-1];
            offd_i[cnt] = offd_i[cnt-1];
            diag_i[cnt] += num_fun;
            if (iz > nz_part[r]) 
               diag_i[cnt] += num_fun;
            else
            {
               if (iz) 
               {
                  offd_i[cnt] += num_fun;
               }
            }
            if (iy > ny_part[q]) 
               diag_i[cnt] += num_fun;
            else
            {
               if (iy) 
               {
                  offd_i[cnt] += num_fun;
               }
            }
            if (ix > nx_part[p]) 
               diag_i[cnt] += num_fun;
            else
            {
               if (ix) 
               {
                  offd_i[cnt] += num_fun; 
               }
            }
            if (ix+1 < nx_part[p+1]) 
               diag_i[cnt] += num_fun;
            else
            {
               if (ix+1 < nx) 
               {
                  offd_i[cnt] += num_fun; 
               }
            }
            if (iy+1 < ny_part[q+1]) 
               diag_i[cnt] += num_fun;
            else
            {
               if (iy+1 < ny) 
               {
                  offd_i[cnt] += num_fun;
               }
            }
            if (iz+1 < nz_part[r+1]) 
               diag_i[cnt] += num_fun;
            else
            {
               if (iz+1 < nz) 
               {
                  offd_i[cnt] += num_fun;
               }
            }
	    num_coeffs = diag_i[cnt]-diag_i[cnt-1];
	    num_offd_coeffs = offd_i[cnt]-offd_i[cnt-1];
            cnt++;
	    for (i=1; i < num_fun; i++)
            {
	       diag_i[cnt] = diag_i[cnt-1]+num_coeffs;
	       offd_i[cnt] = offd_i[cnt-1]+num_offd_coeffs;
               cnt++;
            }
         }
      }
   }

   diag_j = hypre_CTAlloc(HYPRE_Int, diag_i[local_num_rows]);
   diag_data = hypre_CTAlloc(HYPRE_Real, diag_i[local_num_rows]);

   if (num_procs > 1)
   {
      offd_j = hypre_CTAlloc(HYPRE_Int, offd_i[local_num_rows]);
      offd_data = hypre_CTAlloc(HYPRE_Real, offd_i[local_num_rows]);
   }

   row_index = 0;
   for (iz = nz_part[r]; iz < nz_part[r+1]; iz++)
   {
      for (iy = ny_part[q];  iy < ny_part[q+1]; iy++)
      {
         for (ix = nx_part[p]; ix < nx_part[p+1]; ix++)
         {
            cnt = diag_i[row_index];;
            o_cnt = offd_i[row_index];;
	    num_coeffs = diag_i[row_index+1]-diag_i[row_index];
	    num_offd_coeffs = offd_i[row_index+1]-offd_i[row_index];
            first_j = row_index;
            for (i=0; i < num_fun; i++)
	    {
               for (j=0; j < num_fun; j++)
	       {
                  j_ind = cnt+i*num_coeffs+j;
                  diag_j[j_ind] = first_j+j;
                  diag_data[j_ind] = value[0*sz + i*num_fun+j]*mtrx[i*num_fun+j];
	       }
	    }
            cnt += num_fun;
            if (iz > nz_part[r]) 
            {
               first_j = row_index-nx_local*ny_local*num_fun;
               for (i=0; i < num_fun; i++)
	       {
                  for (j=0; j < num_fun; j++)
	          {
                     j_ind = cnt+i*num_coeffs+j;
                     diag_j[j_ind] = first_j+j;
                     diag_data[j_ind] = value[3*sz + i*num_fun+j]*mtrx[i*num_fun+j];
	          }
	       }
               cnt += num_fun;
            }
            else
            {
               if (iz) 
               {
                  first_j = num_fun*hypre_map(ix,iy,iz-1,p,q,r-1,P,Q,R,
                                      nx_part,ny_part,nz_part,global_part);
                  for (i=0; i < num_fun; i++)
	          {
                     for (j=0; j < num_fun; j++)
	             {
                        j_ind = o_cnt+i*num_offd_coeffs+j;
                        offd_j[j_ind] = first_j+j;
                        offd_data[j_ind] = value[3*sz + i*num_fun+j]*mtrx[i*num_fun+j];
	             }
	          }
                  o_cnt += num_fun;
               }
            }
            if (iy > ny_part[q]) 
            {
               first_j = row_index-nx_local*num_fun;
               for (i=0; i < num_fun; i++)
	       {
                  for (j=0; j < num_fun; j++)
	          {
                     j_ind = cnt+i*num_coeffs+j;
                     diag_j[j_ind] = first_j+j;
                     diag_data[j_ind] = value[2*sz + i*num_fun+j]*mtrx[i*num_fun+j];
	          }
	       }
               cnt += num_fun;
            }
            else
            {
               if (iy) 
               {
                  first_j = num_fun*hypre_map(ix,iy-1,iz,p,q-1,r,P,Q,R,
                                      nx_part,ny_part,nz_part,global_part);
                  for (i=0; i < num_fun; i++)
	          {
                     for (j=0; j < num_fun; j++)
	             {
                        j_ind = o_cnt+i*num_offd_coeffs+j;
                        offd_j[j_ind] = first_j+j;
                        offd_data[j_ind] = value[2*sz + i*num_fun+j]*mtrx[i*num_fun+j];
	             }
	          }
                  o_cnt += num_fun;
               }
            }
            if (ix > nx_part[p]) 
            {
               first_j = row_index-num_fun;
               for (i=0; i < num_fun; i++)
	       {
                  for (j=0; j < num_fun; j++)
	          {
                     j_ind = cnt+i*num_coeffs+j;
                     diag_j[j_ind] = first_j+j;
                     diag_data[j_ind] = value[1*sz + i*num_fun+j]*mtrx[i*num_fun+j];
	          }
	       }
               cnt += num_fun;
            }
            else
            {
               if (ix) 
               {
                  first_j = num_fun*hypre_map(ix-1,iy,iz,p-1,q,r,P,Q,R,
                                      nx_part,ny_part,nz_part,global_part);
                  for (i=0; i < num_fun; i++)
	          {
                     for (j=0; j < num_fun; j++)
	             {
                        j_ind = o_cnt+i*num_offd_coeffs+j;
                        offd_j[j_ind] = first_j+j;
                        offd_data[j_ind] = value[1*sz + i*num_fun+j]*mtrx[i*num_fun+j];
	             }
	          }
                  o_cnt += num_fun;
               }
            }
            if (ix+1 < nx_part[p+1]) 
            {
               first_j = row_index+num_fun;
               for (i=0; i < num_fun; i++)
	       {
                  for (j=0; j < num_fun; j++)
	          {
                     j_ind = cnt+i*num_coeffs+j;
                     diag_j[j_ind] = first_j+j;
                     diag_data[j_ind] = value[1*sz + i*num_fun+j]*mtrx[i*num_fun+j];
	          }
	       }
               cnt += num_fun;
            }
            else
            {
               if (ix+1 < nx) 
               {
                  first_j = num_fun*hypre_map(ix+1,iy,iz,p+1,q,r,P,Q,R,
                                      nx_part,ny_part,nz_part,global_part);
                  for (i=0; i < num_fun; i++)
	          {
                     for (j=0; j < num_fun; j++)
	             {
                        j_ind = o_cnt+i*num_offd_coeffs+j;
                        offd_j[j_ind] = first_j+j;
                        offd_data[j_ind] = value[1*sz + i*num_fun+j]*mtrx[i*num_fun+j];
	             }
	          }
                  o_cnt += num_fun;
               }
            }
            if (iy+1 < ny_part[q+1]) 
            {
               first_j = row_index+nx_local*num_fun;
               for (i=0; i < num_fun; i++)
	       {
                  for (j=0; j < num_fun; j++)
	          {
                     j_ind = cnt+i*num_coeffs+j;
                     diag_j[j_ind] = first_j+j;
                     diag_data[j_ind] = value[2*sz + i*num_fun+j]*mtrx[i*num_fun+j];
	          }
	       }
               cnt += num_fun;
            }
            else
            {
               if (iy+1 < ny) 
               {
                  first_j = num_fun*hypre_map(ix,iy+1,iz,p,q+1,r,P,Q,R,
                                      nx_part,ny_part,nz_part,global_part);
                  for (i=0; i < num_fun; i++)
	          {
                     for (j=0; j < num_fun; j++)
	             {
                        j_ind = o_cnt+i*num_offd_coeffs+j;
                        offd_j[j_ind] = first_j+j;
                        offd_data[j_ind] = value[2*sz + i*num_fun+j]*mtrx[i*num_fun+j];
	             }
	          }
                  o_cnt += num_fun;
               }
            }
            if (iz+1 < nz_part[r+1]) 
            {
               first_j = row_index+nx_local*ny_local*num_fun;
               for (i=0; i < num_fun; i++)
	       {
                  for (j=0; j < num_fun; j++)
	          {
                     j_ind = cnt+i*num_coeffs+j;
                     diag_j[j_ind] = first_j+j;
                     diag_data[j_ind] = value[3*sz + i*num_fun+j]*mtrx[i*num_fun+j];
	          }
	       }
               cnt += num_fun;
            }
            else
            {
               if (iz+1 < nz) 
               {
                  first_j = num_fun*hypre_map(ix,iy,iz+1,p,q,r+1,P,Q,R,
                                      nx_part,ny_part,nz_part,global_part);
                  for (i=0; i < num_fun; i++)
	          {
                     for (j=0; j < num_fun; j++)
	             {
                        j_ind = o_cnt+i*num_offd_coeffs+j;
                        offd_j[j_ind] = first_j+j;
                        offd_data[j_ind] = value[3*sz + i*num_fun+j]*mtrx[i*num_fun+j];
	             }
	          }
                  o_cnt += num_fun;
               }
            }
            row_index += num_fun;
         }
      }
   }

   if (num_procs > 1)
   {
         cnt = 0;
         for (i=0; i < local_num_rows; i+=num_fun)
 	 {
	    for (j=offd_i[i]; j < offd_i[i+1]; j++)
            {
               col_map_offd[cnt++] = offd_j[j];
	    }
	 }  
   	
      hypre_qsort0(col_map_offd, 0, num_cols_offd-1);

      for (i=0; i < num_fun*num_cols_offd; i++)
         for (j=hypre_min(0,abs(i-num_fun)); j < num_cols_offd; j++)
            if (offd_j[i] == col_map_offd[j])
            {
               offd_j[i] = j;
               break;
            }
   }

   for (i=0; i < num_procs+1; i++)
      global_part[i] *= num_fun;

   for (j=1; j< num_fun; j++)
   {
      for (i=0; i<local_grid_size; i++)
      {
	  row = i*num_fun+j;
	  diag_index = diag_i[row];
	  index = diag_index+j;
	  val = diag_data[diag_index];
	  col = diag_j[diag_index];
	  diag_data[diag_index] = diag_data[index];
	  diag_j[diag_index] = diag_j[index];
	  diag_data[index] = val;
	  diag_j[index] = col;
      }
   }


#ifdef HYPRE_NO_GLOBAL_PARTITION
/* ideally we would use less storage earlier in this function, but this is fine
   for testing */
   {
      HYPRE_Int tmp1, tmp2;
      tmp1 = global_part[my_id];
      tmp2 = global_part[my_id + 1];
      hypre_TFree(global_part);
      global_part = hypre_CTAlloc(HYPRE_Int, 2);
      global_part[0] = tmp1;
      global_part[1] = tmp2;
   }
#endif

   A = hypre_ParCSRMatrixCreate(comm, num_fun*grid_size, num_fun*grid_size,
                                global_part, global_part, num_cols_offd,
                                diag_i[local_num_rows],
                                offd_i[local_num_rows]);

   hypre_ParCSRMatrixColMapOffd(A) = col_map_offd;

   diag = hypre_ParCSRMatrixDiag(A);
   hypre_CSRMatrixI(diag) = diag_i;
   hypre_CSRMatrixJ(diag) = diag_j;
   hypre_CSRMatrixData(diag) = diag_data;

   offd = hypre_ParCSRMatrixOffd(A);
   hypre_CSRMatrixI(offd) = offd_i;
   if (num_cols_offd)
   {
      hypre_CSRMatrixJ(offd) = offd_j;
      hypre_CSRMatrixData(offd) = offd_data;
   }

   hypre_TFree(nx_part);
   hypre_TFree(ny_part);
   hypre_TFree(nz_part);

   return (HYPRE_ParCSRMatrix) A;
}
Beispiel #14
0
HYPRE_ParCSRMatrix 
GenerateLaplacian( MPI_Comm comm,
                   HYPRE_Int      nx,
                   HYPRE_Int      ny,
                   HYPRE_Int      nz, 
                   HYPRE_Int      P,
                   HYPRE_Int      Q,
                   HYPRE_Int      R,
                   HYPRE_Int      p,
                   HYPRE_Int      q,
                   HYPRE_Int      r,
                   HYPRE_Real  *value )
{
   hypre_ParCSRMatrix *A;
   hypre_CSRMatrix *diag;
   hypre_CSRMatrix *offd;

   HYPRE_Int    *diag_i;
   HYPRE_Int    *diag_j;
   HYPRE_Real *diag_data;

   HYPRE_Int    *offd_i;
   HYPRE_Int    *offd_j = NULL;
   HYPRE_Real *offd_data = NULL;

   HYPRE_Int *global_part;
   HYPRE_Int ix, iy, iz;
   HYPRE_Int cnt, o_cnt;
   HYPRE_Int local_num_rows; 
   HYPRE_Int *col_map_offd;
   HYPRE_Int row_index;
   HYPRE_Int i,j;

   HYPRE_Int nx_local, ny_local, nz_local;
   HYPRE_Int nx_size, ny_size, nz_size;
   HYPRE_Int num_cols_offd;
   HYPRE_Int grid_size;

   HYPRE_Int *nx_part;
   HYPRE_Int *ny_part;
   HYPRE_Int *nz_part;

   HYPRE_Int num_procs, my_id;
   HYPRE_Int P_busy, Q_busy, R_busy;

   hypre_MPI_Comm_size(comm,&num_procs);
   hypre_MPI_Comm_rank(comm,&my_id);

   grid_size = nx*ny*nz;


   hypre_GeneratePartitioning(nx,P,&nx_part);
   hypre_GeneratePartitioning(ny,Q,&ny_part);
   hypre_GeneratePartitioning(nz,R,&nz_part);

   global_part = hypre_CTAlloc(HYPRE_Int,P*Q*R+1);

   global_part[0] = 0;
   cnt = 1;
   for (iz = 0; iz < R; iz++)
   {
      nz_size = nz_part[iz+1]-nz_part[iz];
      for (iy = 0; iy < Q; iy++)
      {
         ny_size = ny_part[iy+1]-ny_part[iy];
         for (ix = 0; ix < P; ix++)
         {
            nx_size = nx_part[ix+1] - nx_part[ix];
            global_part[cnt] = global_part[cnt-1];
            global_part[cnt++] += nx_size*ny_size*nz_size;
         }
      }
   }

   nx_local = nx_part[p+1] - nx_part[p];
   ny_local = ny_part[q+1] - ny_part[q];
   nz_local = nz_part[r+1] - nz_part[r];

   my_id = r*(P*Q) + q*P + p;
   num_procs = P*Q*R;

   local_num_rows = nx_local*ny_local*nz_local;
   diag_i = hypre_CTAlloc(HYPRE_Int, local_num_rows+1);
   offd_i = hypre_CTAlloc(HYPRE_Int, local_num_rows+1);

   P_busy = hypre_min(nx,P);
   Q_busy = hypre_min(ny,Q);
   R_busy = hypre_min(nz,R);

   num_cols_offd = 0;
   if (p) num_cols_offd += ny_local*nz_local;
   if (p < P_busy-1) num_cols_offd += ny_local*nz_local;
   if (q) num_cols_offd += nx_local*nz_local;
   if (q < Q_busy-1) num_cols_offd += nx_local*nz_local;
   if (r) num_cols_offd += nx_local*ny_local;
   if (r < R_busy-1) num_cols_offd += nx_local*ny_local;

   if (!local_num_rows) num_cols_offd = 0;

   col_map_offd = hypre_CTAlloc(HYPRE_Int, num_cols_offd);

   cnt = 1;
   o_cnt = 1;
   diag_i[0] = 0;
   offd_i[0] = 0;
   for (iz = nz_part[r]; iz < nz_part[r+1]; iz++)
   {
      for (iy = ny_part[q];  iy < ny_part[q+1]; iy++)
      {
         for (ix = nx_part[p]; ix < nx_part[p+1]; ix++)
         {
            diag_i[cnt] = diag_i[cnt-1];
            offd_i[o_cnt] = offd_i[o_cnt-1];
            diag_i[cnt]++;
            if (iz > nz_part[r]) 
               diag_i[cnt]++;
            else
            {
               if (iz) 
               {
                  offd_i[o_cnt]++;
               }
            }
            if (iy > ny_part[q]) 
               diag_i[cnt]++;
            else
            {
               if (iy) 
               {
                  offd_i[o_cnt]++;
               }
            }
            if (ix > nx_part[p]) 
               diag_i[cnt]++;
            else
            {
               if (ix) 
               {
                  offd_i[o_cnt]++; 
               }
            }
            if (ix+1 < nx_part[p+1]) 
               diag_i[cnt]++;
            else
            {
               if (ix+1 < nx) 
               {
                  offd_i[o_cnt]++; 
               }
            }
            if (iy+1 < ny_part[q+1]) 
               diag_i[cnt]++;
            else
            {
               if (iy+1 < ny) 
               {
                  offd_i[o_cnt]++;
               }
            }
            if (iz+1 < nz_part[r+1]) 
               diag_i[cnt]++;
            else
            {
               if (iz+1 < nz) 
               {
                  offd_i[o_cnt]++;
               }
            }
            cnt++;
            o_cnt++;
         }
      }
   }

   diag_j = hypre_CTAlloc(HYPRE_Int, diag_i[local_num_rows]);
   diag_data = hypre_CTAlloc(HYPRE_Real, diag_i[local_num_rows]);

   if (num_procs > 1)
   {
      offd_j = hypre_CTAlloc(HYPRE_Int, offd_i[local_num_rows]);
      offd_data = hypre_CTAlloc(HYPRE_Real, offd_i[local_num_rows]);
   }

   row_index = 0;
   cnt = 0;
   o_cnt = 0;
   for (iz = nz_part[r]; iz < nz_part[r+1]; iz++)
   {
      for (iy = ny_part[q];  iy < ny_part[q+1]; iy++)
      {
         for (ix = nx_part[p]; ix < nx_part[p+1]; ix++)
         {
            diag_j[cnt] = row_index;
            diag_data[cnt++] = value[0];
            if (iz > nz_part[r]) 
            {
               diag_j[cnt] = row_index-nx_local*ny_local;
               diag_data[cnt++] = value[3];
            }
            else
            {
               if (iz) 
               {
                  offd_j[o_cnt] = hypre_map(ix,iy,iz-1,p,q,r-1,P,Q,R,
                                      nx_part,ny_part,nz_part,global_part);
                  offd_data[o_cnt++] = value[3];
               }
            }
            if (iy > ny_part[q]) 
            {
               diag_j[cnt] = row_index-nx_local;
               diag_data[cnt++] = value[2];
            }
            else
            {
               if (iy) 
               {
                  offd_j[o_cnt] = hypre_map(ix,iy-1,iz,p,q-1,r,P,Q,R,
                                      nx_part,ny_part,nz_part,global_part);
                  offd_data[o_cnt++] = value[2];
               }
            }
            if (ix > nx_part[p]) 
            {
               diag_j[cnt] = row_index-1;
               diag_data[cnt++] = value[1];
            }
            else
            {
               if (ix) 
               {
                  offd_j[o_cnt] = hypre_map(ix-1,iy,iz,p-1,q,r,P,Q,R,
                                      nx_part,ny_part,nz_part,global_part);
                  offd_data[o_cnt++] = value[1];
               }
            }
            if (ix+1 < nx_part[p+1]) 
            {
               diag_j[cnt] = row_index+1;
               diag_data[cnt++] = value[1];
            }
            else
            {
               if (ix+1 < nx) 
               {
                  offd_j[o_cnt] = hypre_map(ix+1,iy,iz,p+1,q,r,P,Q,R,
                                      nx_part,ny_part,nz_part,global_part);
                  offd_data[o_cnt++] = value[1];
               }
            }
            if (iy+1 < ny_part[q+1]) 
            {
               diag_j[cnt] = row_index+nx_local;
               diag_data[cnt++] = value[2];
            }
            else
            {
               if (iy+1 < ny) 
               {
                  offd_j[o_cnt] = hypre_map(ix,iy+1,iz,p,q+1,r,P,Q,R,
                                      nx_part,ny_part,nz_part,global_part);
                  offd_data[o_cnt++] = value[2];
               }
            }
            if (iz+1 < nz_part[r+1]) 
            {
               diag_j[cnt] = row_index+nx_local*ny_local;
               diag_data[cnt++] = value[3];
            }
            else
            {
               if (iz+1 < nz) 
               {
                  offd_j[o_cnt] = hypre_map(ix,iy,iz+1,p,q,r+1,P,Q,R,
                                      nx_part,ny_part,nz_part,global_part);
                  offd_data[o_cnt++] = value[3];
               }
            }
            row_index++;
         }
      }
   }

   if (num_procs > 1)
   {
      for (i=0; i < num_cols_offd; i++)
         col_map_offd[i] = offd_j[i];
   	
      hypre_qsort0(col_map_offd, 0, num_cols_offd-1);

      for (i=0; i < num_cols_offd; i++)
         for (j=0; j < num_cols_offd; j++)
            if (offd_j[i] == col_map_offd[j])
            {
               offd_j[i] = j;
               break;
            }
   }


#ifdef HYPRE_NO_GLOBAL_PARTITION
/* ideally we would use less storage earlier in this function, but this is fine
   for testing */
   {
      HYPRE_Int tmp1, tmp2;
      tmp1 = global_part[my_id];
      tmp2 = global_part[my_id + 1];
      hypre_TFree(global_part);
      global_part = hypre_CTAlloc(HYPRE_Int, 2);
      global_part[0] = tmp1;
      global_part[1] = tmp2;
   }
#endif



   A = hypre_ParCSRMatrixCreate(comm, grid_size, grid_size,
                                global_part, global_part, num_cols_offd,
                                diag_i[local_num_rows],
                                offd_i[local_num_rows]);

   hypre_ParCSRMatrixColMapOffd(A) = col_map_offd;

   diag = hypre_ParCSRMatrixDiag(A);
   hypre_CSRMatrixI(diag) = diag_i;
   hypre_CSRMatrixJ(diag) = diag_j;
   hypre_CSRMatrixData(diag) = diag_data;

   offd = hypre_ParCSRMatrixOffd(A);
   hypre_CSRMatrixI(offd) = offd_i;
   if (num_cols_offd)
   {
      hypre_CSRMatrixJ(offd) = offd_j;
      hypre_CSRMatrixData(offd) = offd_data;
   }

   hypre_TFree(nx_part);
   hypre_TFree(ny_part);
   hypre_TFree(nz_part);

   return (HYPRE_ParCSRMatrix) A;
}
Beispiel #15
0
/**
 * Partition the input so that
 * a1[0:*out1) and a2[0:*out2) contain the smallest k elements
 */
static void kth_element(
   HYPRE_Int *out1, HYPRE_Int *out2,
   HYPRE_Int *a1, HYPRE_Int *a2, HYPRE_Int n1, HYPRE_Int n2, HYPRE_Int k)
{
   // either of the inputs is empty
   if (n1 == 0)
   {
      *out1 = 0; *out2 = k;
      return;
   }
   if (n2 == 0)
   {
      *out1 = k; *out2 = 0;
      return;
   }
   if (k >= n1 + n2)
   {
      *out1 = n1; *out2 = n2;
      return;
   }

   // one is greater than the other
   if (k < n1 && a1[k] <= a2[0])
   {
      *out1 = k; *out2 = 0;
      return;
   }
   if (k - n1 >= 0 && a2[k - n1] >= a1[n1 - 1])
   {
      *out1 = n1; *out2 = k - n1;
      return;
   }
   if (k < n2 && a2[k] <= a1[0])
   {
      *out1 = 0; *out2 = k;
      return;
   }
   if (k - n2 >= 0 && a1[k - n2] >= a2[n2 - 1])
   {
      *out1 = k - n2; *out2 = n2;
      return;
   }
   // now k > 0

   // faster to do binary search on the shorter sequence
   if (n1 > n2)
   {
      SWAP(HYPRE_Int, n1, n2);
      SWAP(HYPRE_Int *, a1, a2);
      SWAP(HYPRE_Int *, out1, out2);
   }

   if (k < (n1 + n2)/2)
   {
      kth_element_(out1, out2, a1, a2, 0, hypre_min(n1 - 1, k), n1, n2, k);
   }
   else
   {
      // when k is big, faster to find (n1 + n2 - k)th biggest element
      HYPRE_Int offset1 = hypre_max(k - n2, 0), offset2 = hypre_max(k - n1, 0);
      HYPRE_Int new_k = k - offset1 - offset2;

      HYPRE_Int new_n1 = hypre_min(n1 - offset1, new_k + 1);
      HYPRE_Int new_n2 = hypre_min(n2 - offset2, new_k + 1);
      kth_element_(out1, out2, a1 + offset1, a2 + offset2, 0, new_n1 - 1, new_n1, new_n2, new_k);

      *out1 += offset1;
      *out2 += offset2;
   }
#ifdef DBG_MERGE_SORT
   assert(*out1 + *out2 == k);
#endif
}
Beispiel #16
0
HYPRE_Int
hypre_PFMGSetup( void               *pfmg_vdata,
                 hypre_StructMatrix *A,
                 hypre_StructVector *b,
                 hypre_StructVector *x        )
{
   hypre_PFMGData       *pfmg_data = pfmg_vdata;

   MPI_Comm              comm = (pfmg_data -> comm);
                     
   HYPRE_Int             relax_type =       (pfmg_data -> relax_type);
   HYPRE_Int             usr_jacobi_weight= (pfmg_data -> usr_jacobi_weight);
   double                jacobi_weight    = (pfmg_data -> jacobi_weight);
   HYPRE_Int             skip_relax =       (pfmg_data -> skip_relax);
   double               *dxyz       =       (pfmg_data -> dxyz);
   HYPRE_Int             rap_type;
                     
   HYPRE_Int             max_iter;
   HYPRE_Int             max_levels;
                      
   HYPRE_Int             num_levels;
                     
   hypre_Index           cindex;
   hypre_Index           findex;
   hypre_Index           stride;

   hypre_Index           coarsen;

   HYPRE_Int            *cdir_l;
   HYPRE_Int            *active_l;
   hypre_StructGrid    **grid_l;
   hypre_StructGrid    **P_grid_l;
                    
   double               *data;
   HYPRE_Int             data_size = 0;
   double               *relax_weights;
   double               *mean, *deviation;
   double                alpha, beta;

   hypre_StructMatrix  **A_l;
   hypre_StructMatrix  **P_l;
   hypre_StructMatrix  **RT_l;
   hypre_StructVector  **b_l;
   hypre_StructVector  **x_l;

   /* temp vectors */
   hypre_StructVector  **tx_l;
   hypre_StructVector  **r_l;
   hypre_StructVector  **e_l;

   void                **relax_data_l;
   void                **matvec_data_l;
   void                **restrict_data_l;
   void                **interp_data_l;

   hypre_StructGrid     *grid;
   HYPRE_Int             dim;

   hypre_Box            *cbox;

   double                min_dxyz;
   HYPRE_Int             cdir, periodic, cmaxsize;
   HYPRE_Int             d, l;
   HYPRE_Int             dxyz_flag;
                       
   HYPRE_Int             b_num_ghost[]  = {0, 0, 0, 0, 0, 0};
   HYPRE_Int             x_num_ghost[]  = {1, 1, 1, 1, 1, 1};

   HYPRE_Int             ierr = 0;
#if DEBUG
   char                  filename[255];
#endif


   /*-----------------------------------------------------
    * Set up coarse grids
    *-----------------------------------------------------*/

   grid  = hypre_StructMatrixGrid(A);
   dim   = hypre_StructGridDim(grid);

   /* Compute a new max_levels value based on the grid */
   cbox = hypre_BoxDuplicate(hypre_StructGridBoundingBox(grid));
   max_levels =
      hypre_Log2(hypre_BoxSizeD(cbox, 0)) + 2 +
      hypre_Log2(hypre_BoxSizeD(cbox, 1)) + 2 +
      hypre_Log2(hypre_BoxSizeD(cbox, 2)) + 2;
   if ((pfmg_data -> max_levels) > 0)
   {
      max_levels = hypre_min(max_levels, (pfmg_data -> max_levels));
   }
   (pfmg_data -> max_levels) = max_levels;

   /* compute dxyz */
   if ((dxyz[0] == 0) || (dxyz[1] == 0) || (dxyz[2] == 0))
   {
      mean = hypre_CTAlloc(double, 3);
      deviation = hypre_CTAlloc(double, 3);
      hypre_PFMGComputeDxyz(A, dxyz, mean, deviation);
        
      dxyz_flag= 0;
      for (d = 0; d < dim; d++)
      {
         deviation[d] -= mean[d]*mean[d];
         /* square of coeff. of variation */
         if (deviation[d]/(mean[d]*mean[d]) > .1)
         {
            dxyz_flag= 1;
            break;
         }
      }
      hypre_TFree(mean);
      hypre_TFree(deviation);
   }
void hypre_BoomerAMGJacobiInterp_1( hypre_ParCSRMatrix * A,
                                    hypre_ParCSRMatrix ** P,
                                    hypre_ParCSRMatrix * S,
                                    HYPRE_Int * CF_marker, HYPRE_Int level,
                                    HYPRE_Real truncation_threshold,
                                    HYPRE_Real truncation_threshold_minus,
                                    HYPRE_Int * dof_func, HYPRE_Int * dof_func_offd,
                                    HYPRE_Real weight_AF)
/* One step of Jacobi interpolation:
   A is the linear system.
   P is an interpolation matrix, input and output
   CF_marker identifies coarse and fine points
   If we imagine P and A as split into coarse and fine submatrices,

       [ AFF  AFC ]   [ AF ]            [ IFC ]
   A = [          ] = [    ] ,      P = [     ]
       [ ACF  ACC ]   [ AC ]            [ ICC ]
   (note that ICC is an identity matrix, applied to coarse points only)
   then this function computes

   IFCnew = IFCold - DFF(-1) * ( AFF*IFCold + AFC )
          = IFCold - DFF(-1) * AF * Pold)
   where DFF is the diagonal of AFF, (-1) represents the inverse, and
   where "old" denotes a value on entry to this function, "new" a returned value.

*/
{
   hypre_ParCSRMatrix * Pnew;
   hypre_ParCSRMatrix * C;
   hypre_CSRMatrix *P_diag = hypre_ParCSRMatrixDiag(*P);
   hypre_CSRMatrix *P_offd = hypre_ParCSRMatrixOffd(*P);
   HYPRE_Real      *P_diag_data = hypre_CSRMatrixData(P_diag);
   HYPRE_Int             *P_diag_i = hypre_CSRMatrixI(P_diag);
   HYPRE_Int             *P_diag_j = hypre_CSRMatrixJ(P_diag);
   HYPRE_Real      *P_offd_data = hypre_CSRMatrixData(P_offd);
   HYPRE_Int             *P_offd_i = hypre_CSRMatrixI(P_offd);
   hypre_CSRMatrix *C_diag;
   hypre_CSRMatrix *C_offd;
   hypre_CSRMatrix *Pnew_diag;
   hypre_CSRMatrix *Pnew_offd;
   HYPRE_Int	num_rows_diag_P = hypre_CSRMatrixNumRows(P_diag);
   HYPRE_Int i;
   HYPRE_Int Jnochanges=0, Jchanges, Pnew_num_nonzeros;
   HYPRE_Int CF_coarse=0;
   HYPRE_Int * J_marker = hypre_CTAlloc( HYPRE_Int, num_rows_diag_P );
   HYPRE_Int nc, ncmax, ncmin, nc1;
   HYPRE_Int num_procs, my_id;
   MPI_Comm comm = hypre_ParCSRMatrixComm( A );
#ifdef HYPRE_JACINT_PRINT_ROW_SUMS
   HYPRE_Int m, nmav, npav;
   HYPRE_Real PIi, PIimax, PIimin, PIimav, PIipav, randthresh;
   HYPRE_Real eps = 1.0e-17;
#endif
#ifdef HYPRE_JACINT_PRINT_MATRICES
   char filename[80];
   HYPRE_Int i_dummy, j_dummy;
   HYPRE_Int *base_i_ptr = &i_dummy;
   HYPRE_Int *base_j_ptr = &j_dummy;
#endif
#ifdef HYPRE_JACINT_PRINT_SOME_ROWS
   HYPRE_Int sample_rows[50], n_sample_rows=0, isamp;
#endif

   hypre_MPI_Comm_size(comm, &num_procs);   
   hypre_MPI_Comm_rank(comm,&my_id);


   for ( i=0; i<num_rows_diag_P; ++i )
   {
      J_marker[i] = CF_marker[i];
      if (CF_marker[i]>=0) ++CF_coarse;
   }
#ifdef HYPRE_JACINT_PRINT_DIAGNOSTICS
   hypre_printf("%i %i Jacobi_Interp_1, P has %i+%i=%i nonzeros, local sum %e\n", my_id, level,
          hypre_CSRMatrixNumNonzeros(P_diag), hypre_CSRMatrixNumNonzeros(P_offd),
          hypre_CSRMatrixNumNonzeros(P_diag)+hypre_CSRMatrixNumNonzeros(P_offd),
          hypre_ParCSRMatrixLocalSumElts(*P) );
#endif

   /* row sum computations, for output */
#ifdef HYPRE_JACINT_PRINT_ROW_SUMS
   PIimax=-1.0e12, PIimin=1.0e12, PIimav=0, PIipav=0;
   nmav=0, npav=0;
   for ( i=0; i<num_rows_diag_P; ++i )
   {
      PIi = 0;  /* i-th value of P*1, i.e. sum of row i of P */
      for ( m=P_diag_i[i]; m<P_diag_i[i+1]; ++m )
         PIi += P_diag_data[m];
      for ( m=P_offd_i[i]; m<P_offd_i[i+1]; ++m )
         PIi += P_offd_data[m];
      if (CF_marker[i]<0)
      {
         PIimax = hypre_max( PIimax, PIi );
         PIimin = hypre_min( PIimin, PIi );
         if (PIi<=1-eps) { PIimav+=PIi; ++nmav; };
         if (PIi>=1+eps) { PIipav+=PIi; ++npav; };
      }
   }
   if ( nmav>0 ) PIimav = PIimav/nmav;
   if ( npav>0 ) PIipav = PIipav/npav;
   hypre_printf("%i %i P in max,min row sums %e %e\n", my_id, level, PIimax, PIimin );
#endif

   ncmax=0; ncmin=num_rows_diag_P; nc1=0;
   for ( i=0; i<num_rows_diag_P; ++i )
      if (CF_marker[i]<0)
      {
         nc = P_diag_i[i+1] - P_diag_i[i];
         if (nc<=1)
         {
            ++nc1;
         }
         ncmax = hypre_max( nc, ncmax );
         ncmin = hypre_min( nc, ncmin );
      }
#if 0
   /* a very agressive reduction in how much the Jacobi step does: */
   for ( i=0; i<num_rows_diag_P; ++i )
      if (CF_marker[i]<0)
      {
         nc = P_diag_i[i+1] - P_diag_i[i];
         if (nc>ncmin+1)
            /*if ( nc > ncmin + 0.5*(ncmax-ncmin) )*/
         {
            J_marker[i] = 1;
            ++Jnochanges;
         }
      }
#endif
   Jchanges = num_rows_diag_P - Jnochanges - CF_coarse;

#ifdef HYPRE_JACINT_PRINT_SOME_ROWS
   hypre_printf("some rows to be changed: ");
   randthresh = 15/(HYPRE_Real)Jchanges;
   for ( i=0; i<num_rows_diag_P; ++i )
   {
      if ( J_marker[i]<0 )
      {
         if ( ((HYPRE_Real)rand())/RAND_MAX < randthresh )
         {
            hypre_printf( "%i: ", i );
            for ( m=P_diag_i[i]; m<P_diag_i[i+1]; ++m )
               hypre_printf( " %i %f, ", P_diag_j[m], P_diag_data[m] );
            hypre_printf(";  ");
            sample_rows[n_sample_rows] = i;
            ++n_sample_rows;
         }
      }
   }
   hypre_printf("\n");
#endif
#ifdef HYPRE_JACINT_PRINT_DIAGNOSTICS
   hypre_printf("%i %i P has %i rows, %i changeable, %i don't change-good, %i coarse\n",
          my_id, level, num_rows_diag_P, Jchanges, Jnochanges, CF_coarse );
   hypre_printf("%i %i min,max diag cols per row: %i, %i;  no.rows w.<=1 col: %i\n", my_id, level, ncmin, ncmax, nc1 );
#endif
#ifdef HYPRE_JACINT_PRINT_MATRICES
   if ( num_rows_diag_P <= HYPRE_MAX_PRINTABLE_MATRIX )
   {
      hypre_sprintf( filename, "Ain%i", level );
      hypre_ParCSRMatrixPrintIJ( A,0,0,filename);
      hypre_sprintf( filename, "Sin%i", level );
      hypre_ParCSRMatrixPrintIJ( S,0,0,filename);
      hypre_sprintf( filename, "Pin%i", level );
      hypre_ParCSRMatrixPrintIJ( *P,0,0,filename);
   }
#endif

   C = hypre_ParMatmul_FC( A, *P, J_marker, dof_func, dof_func_offd );
   /* hypre_parMatmul_FC creates and returns C, a variation of the
      matrix product A*P in which only the "Fine"-designated rows have
      been computed.  (all columns are Coarse because all columns of P
      are).  "Fine" is defined solely by the marker array, and for
      example could be a proper subset of the fine points of a
      multigrid hierarchy.
      As a matrix, C is the size of A*P.  But only the marked rows have
      been computed.
   */
#ifdef HYPRE_JACINT_PRINT_MATRICES
   hypre_sprintf( filename, "C%i", level );
   if ( num_rows_diag_P <= HYPRE_MAX_PRINTABLE_MATRIX ) hypre_ParCSRMatrixPrintIJ( C,0,0,filename);
#endif
   C_diag = hypre_ParCSRMatrixDiag(C);
   C_offd = hypre_ParCSRMatrixOffd(C);
#ifdef HYPRE_JACINT_PRINT_DIAGNOSTICS
   hypre_printf("%i %i Jacobi_Interp_1 after matmul, C has %i+%i=%i nonzeros, local sum %e\n",
          my_id, level, hypre_CSRMatrixNumNonzeros(C_diag),
          hypre_CSRMatrixNumNonzeros(C_offd),
          hypre_CSRMatrixNumNonzeros(C_diag)+hypre_CSRMatrixNumNonzeros(C_offd),
          hypre_ParCSRMatrixLocalSumElts(C) );
#endif

   hypre_ParMatScaleDiagInv_F( C, A, weight_AF, J_marker );
   /* hypre_ParMatScaleDiagInv scales of its first argument by premultiplying with
      a submatrix of the inverse of the diagonal of its second argument.
      The marker array determines which diagonal elements are used.  The marker
      array should select exactly the right number of diagonal elements (the number
      of rows of AP_FC).
   */
#ifdef HYPRE_JACINT_PRINT_MATRICES
   hypre_sprintf( filename, "Cout%i", level );
   if ( num_rows_diag_P <= HYPRE_MAX_PRINTABLE_MATRIX )  hypre_ParCSRMatrixPrintIJ( C,0,0,filename);
#endif

   Pnew = hypre_ParMatMinus_F( *P, C, J_marker );
   /* hypre_ParMatMinus_F subtracts rows of its second argument from selected rows
      of its first argument.  The marker array determines which rows of the first
      argument are affected, and they should exactly correspond to all the rows
      of the second argument.
   */
   Pnew_diag = hypre_ParCSRMatrixDiag(Pnew);
   Pnew_offd = hypre_ParCSRMatrixOffd(Pnew);
   Pnew_num_nonzeros = hypre_CSRMatrixNumNonzeros(Pnew_diag)+hypre_CSRMatrixNumNonzeros(Pnew_offd);
#ifdef HYPRE_JACINT_PRINT_DIAGNOSTICS
   hypre_printf("%i %i Jacobi_Interp_1 after MatMinus, Pnew has %i+%i=%i nonzeros, local sum %e\n",
          my_id, level, hypre_CSRMatrixNumNonzeros(Pnew_diag),
          hypre_CSRMatrixNumNonzeros(Pnew_offd), Pnew_num_nonzeros,
          hypre_ParCSRMatrixLocalSumElts(Pnew) );
#endif

   /* Transfer ownership of col_starts from P to Pnew  ... */
   if ( hypre_ParCSRMatrixColStarts(*P) &&
        hypre_ParCSRMatrixColStarts(*P)==hypre_ParCSRMatrixColStarts(Pnew) )
   {
      if ( hypre_ParCSRMatrixOwnsColStarts(*P) && !hypre_ParCSRMatrixOwnsColStarts(Pnew) )
      {
         hypre_ParCSRMatrixSetColStartsOwner(*P,0);
         hypre_ParCSRMatrixSetColStartsOwner(Pnew,1);
      }
   }

   hypre_ParCSRMatrixDestroy( C );
   hypre_ParCSRMatrixDestroy( *P );

   /* Note that I'm truncating all the fine rows, not just the J-marked ones. */
#if 0
   if ( Pnew_num_nonzeros < 10000 )  /* a fixed number like this makes it no.procs.-depdendent */
   {  /* ad-hoc attempt to reduce zero-matrix problems seen in testing..*/
      truncation_threshold = 1.0e-6 * truncation_threshold; 
      truncation_threshold_minus = 1.0e-6 * truncation_threshold_minus;
  }
#endif
   hypre_BoomerAMGTruncateInterp( Pnew, truncation_threshold,
                                  truncation_threshold_minus, CF_marker );

   hypre_MatvecCommPkgCreate ( Pnew );


   *P = Pnew;

   P_diag = hypre_ParCSRMatrixDiag(*P);
   P_offd = hypre_ParCSRMatrixOffd(*P);
   P_diag_data = hypre_CSRMatrixData(P_diag);
   P_diag_i = hypre_CSRMatrixI(P_diag);
   P_diag_j = hypre_CSRMatrixJ(P_diag);
   P_offd_data = hypre_CSRMatrixData(P_offd);
   P_offd_i = hypre_CSRMatrixI(P_offd);

   /* row sum computations, for output */
#ifdef HYPRE_JACINT_PRINT_ROW_SUMS
   PIimax=-1.0e12, PIimin=1.0e12, PIimav=0, PIipav=0;
   nmav=0, npav=0;
   for ( i=0; i<num_rows_diag_P; ++i )
   {
      PIi = 0;  /* i-th value of P*1, i.e. sum of row i of P */
      for ( m=P_diag_i[i]; m<P_diag_i[i+1]; ++m )
         PIi += P_diag_data[m];
      for ( m=P_offd_i[i]; m<P_offd_i[i+1]; ++m )
         PIi += P_offd_data[m];
      if (CF_marker[i]<0)
      {
         PIimax = hypre_max( PIimax, PIi );
         PIimin = hypre_min( PIimin, PIi );
         if (PIi<=1-eps) { PIimav+=PIi; ++nmav; };
         if (PIi>=1+eps) { PIipav+=PIi; ++npav; };
      }
   }
   if ( nmav>0 ) PIimav = PIimav/nmav;
   if ( npav>0 ) PIipav = PIipav/npav;
   hypre_printf("%i %i P out max,min row sums %e %e\n", my_id, level, PIimax, PIimin );
#endif

#ifdef HYPRE_JACINT_PRINT_SOME_ROWS
   hypre_printf("some changed rows: ");
   for ( isamp=0; isamp<n_sample_rows; ++isamp )
   {
      i = sample_rows[isamp];
      hypre_printf( "%i: ", i );
      for ( m=P_diag_i[i]; m<P_diag_i[i+1]; ++m )
         hypre_printf( " %i %f, ", P_diag_j[m], P_diag_data[m] );
      hypre_printf(";  ");
   }
   hypre_printf("\n");
#endif
   ncmax=0; ncmin=num_rows_diag_P; nc1=0;
   for ( i=0; i<num_rows_diag_P; ++i )
      if (CF_marker[i]<0)
      {
         nc = P_diag_i[i+1] - P_diag_i[i];
         if (nc<=1) ++nc1;
         ncmax = hypre_max( nc, ncmax );
         ncmin = hypre_min( nc, ncmin );
      }
#ifdef HYPRE_JACINT_PRINT_DIAGNOSTICS
   hypre_printf("%i %i P has %i rows, %i changeable, %i too good, %i coarse\n",
          my_id, level, num_rows_diag_P, num_rows_diag_P-Jnochanges-CF_coarse, Jnochanges, CF_coarse );
   hypre_printf("%i %i min,max diag cols per row: %i, %i;  no.rows w.<=1 col: %i\n", my_id, level, ncmin, ncmax, nc1 );

   hypre_printf("%i %i Jacobi_Interp_1 after truncation (%e), Pnew has %i+%i=%i nonzeros, local sum %e\n",
          my_id, level, truncation_threshold,
          hypre_CSRMatrixNumNonzeros(Pnew_diag), hypre_CSRMatrixNumNonzeros(Pnew_offd),
          hypre_CSRMatrixNumNonzeros(Pnew_diag)+hypre_CSRMatrixNumNonzeros(Pnew_offd),
          hypre_ParCSRMatrixLocalSumElts(Pnew) );
#endif

   /* Programming Notes:
      1. Judging by around line 299 of par_interp.c, they typical use of CF_marker
      is that CF_marker>=0 means Coarse, CF_marker<0 means Fine.
   */
#ifdef HYPRE_JACINT_PRINT_MATRICES
   hypre_sprintf( filename, "Pout%i", level );
   if ( num_rows_diag_P <= HYPRE_MAX_PRINTABLE_MATRIX )  hypre_ParCSRMatrixPrintIJ( *P,0,0,filename);
#endif

   hypre_TFree( J_marker );
      
}
Beispiel #18
0
int main (int argc, char *argv[])
{
   HYPRE_Int i;
   int myid, num_procs;
   int N, n;

   HYPRE_Int ilower, iupper;
   HYPRE_Int local_size, extra;

   int solver_id;
   int print_solution, print_system;

   double h, h2;

   HYPRE_IJMatrix A;
   HYPRE_ParCSRMatrix parcsr_A;
   HYPRE_IJVector b;
   HYPRE_ParVector par_b;
   HYPRE_IJVector x;
   HYPRE_ParVector par_x;

   HYPRE_Solver solver, precond;

   /* Initialize MPI */
   MPI_Init(&argc, &argv);
   MPI_Comm_rank(MPI_COMM_WORLD, &myid);
   MPI_Comm_size(MPI_COMM_WORLD, &num_procs);

   /* Default problem parameters */
   n = 33;
   solver_id = 0;
   print_solution  = 0;
   print_system = 0;


   /* Parse command line */
   {
      int arg_index = 0;
      int print_usage = 0;

      while (arg_index < argc)
      {
         if ( strcmp(argv[arg_index], "-n") == 0 )
         {
            arg_index++;
            n = atoi(argv[arg_index++]);
         }
         else if ( strcmp(argv[arg_index], "-solver") == 0 )
         {
            arg_index++;
            solver_id = atoi(argv[arg_index++]);
         }
         else if ( strcmp(argv[arg_index], "-print_solution") == 0 )
         {
            arg_index++;
            print_solution = 1;
         }
         else if ( strcmp(argv[arg_index], "-print_system") == 0 )
         {
            arg_index++;
            print_system = 1;
         }


         else if ( strcmp(argv[arg_index], "-help") == 0 )
         {
            print_usage = 1;
            break;
         }
         else
         {
            arg_index++;
         }
      }

      if ((print_usage) && (myid == 0))
      {
         printf("\n");
         printf("Usage: %s [<options>]\n", argv[0]);
         printf("\n");
         printf("  -n <n>              : problem size in each direction (default: 33)\n");
         printf("  -solver <ID>        : solver ID\n");
         printf("                        0  - AMG (default) \n");
         printf("                        1  - AMG-PCG\n");
         printf("                        8  - ParaSails-PCG\n");
         printf("                        50 - PCG\n");
         printf("                        61 - AMG-FlexGMRES\n");
         printf("  -print_solution     : print the solution vector\n");
         printf("  -print_system       : print the matrix and rhs\n");
         printf("\n");
      }

      if (print_usage)
      {
         MPI_Finalize();
         return (0);
      }
   }

   /* Preliminaries: want at least one processor per row */
   if (n*n < num_procs) n = sqrt(num_procs) + 1;
   N = n*n; /* global number of rows */
   h = 1.0/(n+1); /* mesh size*/
   h2 = h*h;

   /* Each processor knows only of its own rows - the range is denoted by ilower
      and upper.  Here we partition the rows. We account for the fact that
      N may not divide evenly by the number of processors. */
   local_size = N/num_procs;
   extra = N - local_size*num_procs;

   ilower = local_size*myid;
   ilower += hypre_min(myid, extra);

   iupper = local_size*(myid+1);
   iupper += hypre_min(myid+1, extra);
   iupper = iupper - 1;

   /* How many rows do I have? */
   local_size = iupper - ilower + 1;

   /* Create the matrix.
      Note that this is a square matrix, so we indicate the row partition
      size twice (since number of rows = number of cols) */
   HYPRE_IJMatrixCreate(MPI_COMM_WORLD, ilower, iupper, ilower, iupper, &A);

   /* Choose a parallel csr format storage (see the User's Manual) */
   HYPRE_IJMatrixSetObjectType(A, HYPRE_PARCSR);

   /* Initialize before setting coefficients */
   HYPRE_IJMatrixInitialize(A);

   /* Now go through my local rows and set the matrix entries.
      Each row has at most 5 entries. For example, if n=3:

      A = [M -I 0; -I M -I; 0 -I M]
      M = [4 -1 0; -1 4 -1; 0 -1 4]

      Note that here we are setting one row at a time, though
      one could set all the rows together (see the User's Manual).
   */
   {
      HYPRE_Int nnz;
      double values[5];
      HYPRE_Int cols[5];

      for (i = ilower; i <= iupper; i++)
      {
         nnz = 0;

         /* The left identity block:position i-n */
         if ((i-n)>=0)
         {
            cols[nnz] = i-n;
            values[nnz] = -1.0;
            nnz++;
         }

         /* The left -1: position i-1 */
         if (i%n)
         {
            cols[nnz] = i-1;
            values[nnz] = -1.0;
            nnz++;
         }

         /* Set the diagonal: position i */
         cols[nnz] = i;
         values[nnz] = 4.0;
         nnz++;

         /* The right -1: position i+1 */
         if ((i+1)%n)
         {
            cols[nnz] = i+1;
            values[nnz] = -1.0;
            nnz++;
         }

         /* The right identity block:position i+n */
         if ((i+n)< N)
         {
            cols[nnz] = i+n;
            values[nnz] = -1.0;
            nnz++;
         }

         /* Set the values for row i */
         HYPRE_IJMatrixSetValues(A, 1, &nnz, &i, cols, values);
      }
   }

   /* Assemble after setting the coefficients */
   HYPRE_IJMatrixAssemble(A);

   /* Note: for the testing of small problems, one may wish to read
      in a matrix in IJ format (for the format, see the output files
      from the -print_system option).
      In this case, one would use the following routine:
      HYPRE_IJMatrixRead( <filename>, MPI_COMM_WORLD,
                          HYPRE_PARCSR, &A );
      <filename>  = IJ.A.out to read in what has been printed out
      by -print_system (processor numbers are omitted).
      A call to HYPRE_IJMatrixRead is an *alternative* to the
      following sequence of HYPRE_IJMatrix calls:
      Create, SetObjectType, Initialize, SetValues, and Assemble
   */


   /* Get the parcsr matrix object to use */
   HYPRE_IJMatrixGetObject(A, (void**) &parcsr_A);


   /* Create the rhs and solution */
   HYPRE_IJVectorCreate(MPI_COMM_WORLD, ilower, iupper,&b);
   HYPRE_IJVectorSetObjectType(b, HYPRE_PARCSR);
   HYPRE_IJVectorInitialize(b);

   HYPRE_IJVectorCreate(MPI_COMM_WORLD, ilower, iupper,&x);
   HYPRE_IJVectorSetObjectType(x, HYPRE_PARCSR);
   HYPRE_IJVectorInitialize(x);

   /* Set the rhs values to h^2 and the solution to zero */
   {
      double *rhs_values, *x_values;
      HYPRE_Int *rows;

      rhs_values = calloc(local_size, sizeof(double));
      x_values = calloc(local_size, sizeof(double));
      rows = calloc(local_size, sizeof(HYPRE_Int));

      for (i=0; i<local_size; i++)
      {
         rhs_values[i] = h2;
         x_values[i] = 0.0;
         rows[i] = ilower + i;
      }

      HYPRE_IJVectorSetValues(b, local_size, rows, rhs_values);
      HYPRE_IJVectorSetValues(x, local_size, rows, x_values);

      free(x_values);
      free(rhs_values);
      free(rows);
   }


   HYPRE_IJVectorAssemble(b);
   /*  As with the matrix, for testing purposes, one may wish to read in a rhs:
       HYPRE_IJVectorRead( <filename>, MPI_COMM_WORLD,
                                 HYPRE_PARCSR, &b );
       as an alternative to the
       following sequence of HYPRE_IJVectors calls:
       Create, SetObjectType, Initialize, SetValues, and Assemble
   */
   HYPRE_IJVectorGetObject(b, (void **) &par_b);

   HYPRE_IJVectorAssemble(x);
   HYPRE_IJVectorGetObject(x, (void **) &par_x);


  /*  Print out the system  - files names will be IJ.out.A.XXXXX
       and IJ.out.b.XXXXX, where XXXXX = processor id */
   if (print_system)
   {
      HYPRE_IJMatrixPrint(A, "IJ.out.A");
      HYPRE_IJVectorPrint(b, "IJ.out.b");
   }


   /* Choose a solver and solve the system */

   /* AMG */
   if (solver_id == 0)
   {
      HYPRE_Int num_iterations;
      double final_res_norm;

      /* Create solver */
      HYPRE_BoomerAMGCreate(&solver);

      /* Set some parameters (See Reference Manual for more parameters) */
      HYPRE_BoomerAMGSetPrintLevel(solver, 3);  /* print solve info + parameters */
      HYPRE_BoomerAMGSetCoarsenType(solver, 6); /* Falgout coarsening */
      HYPRE_BoomerAMGSetRelaxType(solver, 3);   /* G-S/Jacobi hybrid relaxation */
      HYPRE_BoomerAMGSetNumSweeps(solver, 1);   /* Sweeeps on each level */
      HYPRE_BoomerAMGSetMaxLevels(solver, 20);  /* maximum number of levels */
      HYPRE_BoomerAMGSetTol(solver, 1e-7);      /* conv. tolerance */

      /* Now setup and solve! */
      HYPRE_BoomerAMGSetup(solver, parcsr_A, par_b, par_x);
      HYPRE_BoomerAMGSolve(solver, parcsr_A, par_b, par_x);

      /* Run info - needed logging turned on */
      HYPRE_BoomerAMGGetNumIterations(solver, &num_iterations);
      HYPRE_BoomerAMGGetFinalRelativeResidualNorm(solver, &final_res_norm);
      if (myid == 0)
      {
         printf("\n");
         printf("Iterations = %lld\n", num_iterations);
         printf("Final Relative Residual Norm = %e\n", final_res_norm);
         printf("\n");
      }

      /* Destroy solver */
      HYPRE_BoomerAMGDestroy(solver);
   }
   /* PCG */
   else if (solver_id == 50)
   {
      HYPRE_Int num_iterations;
      double final_res_norm;

      /* Create solver */
      HYPRE_ParCSRPCGCreate(MPI_COMM_WORLD, &solver);

      /* Set some parameters (See Reference Manual for more parameters) */
      HYPRE_PCGSetMaxIter(solver, 1000); /* max iterations */
      HYPRE_PCGSetTol(solver, 1e-7); /* conv. tolerance */
      HYPRE_PCGSetTwoNorm(solver, 1); /* use the two norm as the stopping criteria */
      HYPRE_PCGSetPrintLevel(solver, 2); /* prints out the iteration info */
      HYPRE_PCGSetLogging(solver, 1); /* needed to get run info later */

      /* Now setup and solve! */
      HYPRE_ParCSRPCGSetup(solver, parcsr_A, par_b, par_x);
      HYPRE_ParCSRPCGSolve(solver, parcsr_A, par_b, par_x);

      /* Run info - needed logging turned on */
      HYPRE_PCGGetNumIterations(solver, &num_iterations);
      HYPRE_PCGGetFinalRelativeResidualNorm(solver, &final_res_norm);
      if (myid == 0)
      {
         printf("\n");
         printf("Iterations = %lld\n", num_iterations);
         printf("Final Relative Residual Norm = %e\n", final_res_norm);
         printf("\n");
      }

      /* Destroy solver */
      HYPRE_ParCSRPCGDestroy(solver);
   }
   /* PCG with AMG preconditioner */
   else if (solver_id == 1)
   {
      HYPRE_Int num_iterations;
      double final_res_norm;

      /* Create solver */
      HYPRE_ParCSRPCGCreate(MPI_COMM_WORLD, &solver);

      /* Set some parameters (See Reference Manual for more parameters) */
      HYPRE_PCGSetMaxIter(solver, 1000); /* max iterations */
      HYPRE_PCGSetTol(solver, 1e-7); /* conv. tolerance */
      HYPRE_PCGSetTwoNorm(solver, 1); /* use the two norm as the stopping criteria */
      HYPRE_PCGSetPrintLevel(solver, 2); /* print solve info */
      HYPRE_PCGSetLogging(solver, 1); /* needed to get run info later */

      /* Now set up the AMG preconditioner and specify any parameters */
      HYPRE_BoomerAMGCreate(&precond);
      HYPRE_BoomerAMGSetPrintLevel(precond, 1); /* print amg solution info */
      HYPRE_BoomerAMGSetCoarsenType(precond, 6);
      HYPRE_BoomerAMGSetRelaxType(precond, 6); /* Sym G.S./Jacobi hybrid */
      HYPRE_BoomerAMGSetNumSweeps(precond, 1);
      HYPRE_BoomerAMGSetTol(precond, 0.0); /* conv. tolerance zero */
      HYPRE_BoomerAMGSetMaxIter(precond, 1); /* do only one iteration! */

      /* Set the PCG preconditioner */
      HYPRE_PCGSetPrecond(solver, (HYPRE_PtrToSolverFcn) HYPRE_BoomerAMGSolve,
                          (HYPRE_PtrToSolverFcn) HYPRE_BoomerAMGSetup, precond);

      /* Now setup and solve! */
      HYPRE_ParCSRPCGSetup(solver, parcsr_A, par_b, par_x);
      HYPRE_ParCSRPCGSolve(solver, parcsr_A, par_b, par_x);

      /* Run info - needed logging turned on */
      HYPRE_PCGGetNumIterations(solver, &num_iterations);
      HYPRE_PCGGetFinalRelativeResidualNorm(solver, &final_res_norm);
      if (myid == 0)
      {
         printf("\n");
         printf("Iterations = %lld\n", num_iterations);
         printf("Final Relative Residual Norm = %e\n", final_res_norm);
         printf("\n");
      }

      /* Destroy solver and preconditioner */
      HYPRE_ParCSRPCGDestroy(solver);
      HYPRE_BoomerAMGDestroy(precond);
   }
   /* PCG with Parasails Preconditioner */
   else if (solver_id == 8)
   {
      HYPRE_Int num_iterations;
      double final_res_norm;

      int      sai_max_levels = 1;
      double   sai_threshold = 0.1;
      double   sai_filter = 0.05;
      int      sai_sym = 1;

      /* Create solver */
      HYPRE_ParCSRPCGCreate(MPI_COMM_WORLD, &solver);

      /* Set some parameters (See Reference Manual for more parameters) */
      HYPRE_PCGSetMaxIter(solver, 1000); /* max iterations */
      HYPRE_PCGSetTol(solver, 1e-7); /* conv. tolerance */
      HYPRE_PCGSetTwoNorm(solver, 1); /* use the two norm as the stopping criteria */
      HYPRE_PCGSetPrintLevel(solver, 2); /* print solve info */
      HYPRE_PCGSetLogging(solver, 1); /* needed to get run info later */

      /* Now set up the ParaSails preconditioner and specify any parameters */
      HYPRE_ParaSailsCreate(MPI_COMM_WORLD, &precond);

      /* Set some parameters (See Reference Manual for more parameters) */
      HYPRE_ParaSailsSetParams(precond, sai_threshold, sai_max_levels);
      HYPRE_ParaSailsSetFilter(precond, sai_filter);
      HYPRE_ParaSailsSetSym(precond, sai_sym);
      HYPRE_ParaSailsSetLogging(precond, 3);

      /* Set the PCG preconditioner */
      HYPRE_PCGSetPrecond(solver, (HYPRE_PtrToSolverFcn) HYPRE_ParaSailsSolve,
                          (HYPRE_PtrToSolverFcn) HYPRE_ParaSailsSetup, precond);

      /* Now setup and solve! */
      HYPRE_ParCSRPCGSetup(solver, parcsr_A, par_b, par_x);
      HYPRE_ParCSRPCGSolve(solver, parcsr_A, par_b, par_x);


      /* Run info - needed logging turned on */
      HYPRE_PCGGetNumIterations(solver, &num_iterations);
      HYPRE_PCGGetFinalRelativeResidualNorm(solver, &final_res_norm);
      if (myid == 0)
      {
         printf("\n");
         printf("Iterations = %lld\n", num_iterations);
         printf("Final Relative Residual Norm = %e\n", final_res_norm);
         printf("\n");
      }

      /* Destory solver and preconditioner */
      HYPRE_ParCSRPCGDestroy(solver);
      HYPRE_ParaSailsDestroy(precond);
   }
   /* Flexible GMRES with  AMG Preconditioner */
   else if (solver_id == 61)
   {
      HYPRE_Int num_iterations;
      double final_res_norm;
      int    restart = 30;
      int    modify = 1;


      /* Create solver */
      HYPRE_ParCSRFlexGMRESCreate(MPI_COMM_WORLD, &solver);

      /* Set some parameters (See Reference Manual for more parameters) */
      HYPRE_FlexGMRESSetKDim(solver, restart);
      HYPRE_FlexGMRESSetMaxIter(solver, 1000); /* max iterations */
      HYPRE_FlexGMRESSetTol(solver, 1e-7); /* conv. tolerance */
      HYPRE_FlexGMRESSetPrintLevel(solver, 2); /* print solve info */
      HYPRE_FlexGMRESSetLogging(solver, 1); /* needed to get run info later */


      /* Now set up the AMG preconditioner and specify any parameters */
      HYPRE_BoomerAMGCreate(&precond);
      HYPRE_BoomerAMGSetPrintLevel(precond, 1); /* print amg solution info */
      HYPRE_BoomerAMGSetCoarsenType(precond, 6);
      HYPRE_BoomerAMGSetRelaxType(precond, 6); /* Sym G.S./Jacobi hybrid */
      HYPRE_BoomerAMGSetNumSweeps(precond, 1);
      HYPRE_BoomerAMGSetTol(precond, 0.0); /* conv. tolerance zero */
      HYPRE_BoomerAMGSetMaxIter(precond, 1); /* do only one iteration! */

      /* Set the FlexGMRES preconditioner */
      HYPRE_FlexGMRESSetPrecond(solver, (HYPRE_PtrToSolverFcn) HYPRE_BoomerAMGSolve,
                          (HYPRE_PtrToSolverFcn) HYPRE_BoomerAMGSetup, precond);


      if (modify)
      /* this is an optional call  - if you don't call it, hypre_FlexGMRESModifyPCDefault
         is used - which does nothing.  Otherwise, you can define your own, similar to
         the one used here */
         HYPRE_FlexGMRESSetModifyPC( solver,
                                     (HYPRE_PtrToModifyPCFcn) hypre_FlexGMRESModifyPCAMGExample);


      /* Now setup and solve! */
      HYPRE_ParCSRFlexGMRESSetup(solver, parcsr_A, par_b, par_x);
      HYPRE_ParCSRFlexGMRESSolve(solver, parcsr_A, par_b, par_x);

      /* Run info - needed logging turned on */
      HYPRE_FlexGMRESGetNumIterations(solver, &num_iterations);
      HYPRE_FlexGMRESGetFinalRelativeResidualNorm(solver, &final_res_norm);
      if (myid == 0)
      {
         printf("\n");
         printf("Iterations = %lld\n", num_iterations);
         printf("Final Relative Residual Norm = %e\n", final_res_norm);
         printf("\n");
      }

      /* Destory solver and preconditioner */
      HYPRE_ParCSRFlexGMRESDestroy(solver);
      HYPRE_BoomerAMGDestroy(precond);

   }
   else
   {
      if (myid ==0) printf("Invalid solver id specified.\n");
   }

   /* Print the solution */
   if (print_solution)
      HYPRE_IJVectorPrint(x, "ij.out.x");

   /* Clean up */
   HYPRE_IJMatrixDestroy(A);
   HYPRE_IJVectorDestroy(b);
   HYPRE_IJVectorDestroy(x);

   /* Finalize MPI*/
   MPI_Finalize();

   return(0);
}
Beispiel #19
0
HYPRE_ParCSRMatrix 
GenerateRotate7pt( MPI_Comm comm,
                      HYPRE_Int      nx,
                      HYPRE_Int      ny,
                      HYPRE_Int      P,
                      HYPRE_Int      Q,
                      HYPRE_Int      p,
                      HYPRE_Int      q,
                      double   alpha,
                      double   eps )
{
   hypre_ParCSRMatrix *A;
   hypre_CSRMatrix *diag;
   hypre_CSRMatrix *offd;

   HYPRE_Int    *diag_i;
   HYPRE_Int    *diag_j;
   double *diag_data;

   HYPRE_Int    *offd_i;
   HYPRE_Int    *offd_j;
   double *offd_data;

   double *value;
   double ac, bc, cc, s, c, pi, x;
   HYPRE_Int *global_part;
   HYPRE_Int ix, iy;
   HYPRE_Int cnt, o_cnt;
   HYPRE_Int local_num_rows; 
   HYPRE_Int *col_map_offd;
   HYPRE_Int *work;
   HYPRE_Int row_index;
   HYPRE_Int i,j;

   HYPRE_Int nx_local, ny_local;
   HYPRE_Int nx_size, ny_size;
   HYPRE_Int num_cols_offd;
   HYPRE_Int grid_size;

   HYPRE_Int *nx_part;
   HYPRE_Int *ny_part;

   HYPRE_Int num_procs, my_id;
   HYPRE_Int P_busy, Q_busy;

   hypre_MPI_Comm_size(comm,&num_procs);
   hypre_MPI_Comm_rank(comm,&my_id);

   grid_size = nx*ny;

   value = hypre_CTAlloc(double,4);
   pi = 4.0*atan(1.0);
   x = pi*alpha/180.0;
   s = sin(x);
   c = cos(x);
   ac = -(c*c + eps*s*s);
   bc = 2.0*(1.0 - eps)*s*c;
   cc = -(s*s + eps*c*c);
   value[0] = -2*(2*ac+bc+2*cc);
   value[1] = 2*ac+bc;
   value[2] = bc+2*cc;
   value[3] = -bc;

   hypre_GeneratePartitioning(nx,P,&nx_part);
   hypre_GeneratePartitioning(ny,Q,&ny_part);

   global_part = hypre_CTAlloc(HYPRE_Int,P*Q+1);

   global_part[0] = 0;
   cnt = 1;
   for (iy = 0; iy < Q; iy++)
   {
      ny_size = ny_part[iy+1]-ny_part[iy];
      for (ix = 0; ix < P; ix++)
      {
         nx_size = nx_part[ix+1] - nx_part[ix];
         global_part[cnt] = global_part[cnt-1];
         global_part[cnt++] += nx_size*ny_size;
      }
   }

   nx_local = nx_part[p+1] - nx_part[p];
   ny_local = ny_part[q+1] - ny_part[q];

   my_id = q*P + p;
   num_procs = P*Q;

   local_num_rows = nx_local*ny_local;
   diag_i = hypre_CTAlloc(HYPRE_Int, local_num_rows+1);
   offd_i = hypre_CTAlloc(HYPRE_Int, local_num_rows+1);

   P_busy = hypre_min(nx,P);
   Q_busy = hypre_min(ny,Q);

   num_cols_offd = 0;
   if (p) num_cols_offd += ny_local;
   if (p < P_busy-1) num_cols_offd += ny_local;
   if (q) num_cols_offd += nx_local;
   if (q < Q_busy-1) num_cols_offd += nx_local;
   if (p && q) num_cols_offd++;
   if (p && q < Q_busy-1 ) num_cols_offd++;
   if (p < P_busy-1 && q ) num_cols_offd++;
   if (p < P_busy-1 && q < Q_busy-1 ) num_cols_offd++;

   if (!local_num_rows) num_cols_offd = 0;

   col_map_offd = hypre_CTAlloc(HYPRE_Int, num_cols_offd);

   cnt = 0;
   o_cnt = 0;
   diag_i[0] = 0;
   offd_i[0] = 0;
   for (iy = ny_part[q];  iy < ny_part[q+1]; iy++)
   {
      for (ix = nx_part[p]; ix < nx_part[p+1]; ix++)
      {
         cnt++;
         o_cnt++;
         diag_i[cnt] = diag_i[cnt-1];
         offd_i[o_cnt] = offd_i[o_cnt-1];
         diag_i[cnt]++;
         if (iy > ny_part[q]) 
         {
            diag_i[cnt]++;
	    if (ix > nx_part[p])
	    {
	       diag_i[cnt]++;
	    }
	    else
	    {
	       if (ix) 
		  offd_i[o_cnt]++;
	    }
         }
         else
         {
            if (iy) 
            {
               offd_i[o_cnt]++;
	       if (ix > nx_part[p])
	       {
	          offd_i[o_cnt]++;
	       }
	       else if (ix)
	       {
	          offd_i[o_cnt]++;
	       }
            }
         }
         if (ix > nx_part[p]) 
            diag_i[cnt]++;
         else
         {
            if (ix) 
            {
               offd_i[o_cnt]++; 
            }
         }
         if (ix+1 < nx_part[p+1]) 
            diag_i[cnt]++;
         else
         {
            if (ix+1 < nx) 
            {
               offd_i[o_cnt]++; 
            }
         }
         if (iy+1 < ny_part[q+1]) 
         {
            diag_i[cnt]++;
	    if (ix < nx_part[p+1]-1)
	    {
	       diag_i[cnt]++;
	    }
	    else
	    {
	       if (ix+1 < nx) 
		  offd_i[o_cnt]++;
	    }
         }
         else
         {
            if (iy+1 < ny) 
            {
               offd_i[o_cnt]++;
	       if (ix < nx_part[p+1]-1)
	       {
	          offd_i[o_cnt]++;
	       }
	       else if (ix < nx-1)
	       {
	          offd_i[o_cnt]++;
	       }
            }
         }
      }
   }

   diag_j = hypre_CTAlloc(HYPRE_Int, diag_i[local_num_rows]);
   diag_data = hypre_CTAlloc(double, diag_i[local_num_rows]);

   if (num_procs > 1)
   {
      offd_j = hypre_CTAlloc(HYPRE_Int, offd_i[local_num_rows]);
      offd_data = hypre_CTAlloc(double, offd_i[local_num_rows]);
   }
Beispiel #20
0
HYPRE_ParCSRMatrix
GenerateRotate7pt( MPI_Comm comm,
                   HYPRE_Int      nx,
                   HYPRE_Int      ny,
                   HYPRE_Int      P,
                   HYPRE_Int      Q,
                   HYPRE_Int      p,
                   HYPRE_Int      q,
                   HYPRE_Real   alpha,
                   HYPRE_Real   eps )
{
    hypre_ParCSRMatrix *A;
    hypre_CSRMatrix *diag;
    hypre_CSRMatrix *offd;

    HYPRE_Int    *diag_i;
    HYPRE_Int    *diag_j;
    HYPRE_Real *diag_data;

    HYPRE_Int    *offd_i;
    HYPRE_Int    *offd_j;
    HYPRE_Real *offd_data;

    HYPRE_Real *value;
    HYPRE_Real ac, bc, cc, s, c, pi, x;
    HYPRE_Int *global_part;
    HYPRE_Int ix, iy;
    HYPRE_Int cnt, o_cnt;
    HYPRE_Int local_num_rows;
    HYPRE_Int *col_map_offd;
    HYPRE_Int *work;
    HYPRE_Int row_index;
    HYPRE_Int i,j;

    HYPRE_Int nx_local, ny_local;
    HYPRE_Int nx_size, ny_size;
    HYPRE_Int num_cols_offd;
    HYPRE_Int grid_size;

    HYPRE_Int *nx_part;
    HYPRE_Int *ny_part;

    HYPRE_Int num_procs, my_id;
    HYPRE_Int P_busy, Q_busy;

    hypre_MPI_Comm_size(comm,&num_procs);
    hypre_MPI_Comm_rank(comm,&my_id);

    grid_size = nx*ny;

    value = hypre_CTAlloc(HYPRE_Real,4);
    pi = 4.0*atan(1.0);
    x = pi*alpha/180.0;
    s = sin(x);
    c = cos(x);
    ac = -(c*c + eps*s*s);
    bc = 2.0*(1.0 - eps)*s*c;
    cc = -(s*s + eps*c*c);
    value[0] = -2*(2*ac+bc+2*cc);
    value[1] = 2*ac+bc;
    value[2] = bc+2*cc;
    value[3] = -bc;

    hypre_GeneratePartitioning(nx,P,&nx_part);
    hypre_GeneratePartitioning(ny,Q,&ny_part);

    global_part = hypre_CTAlloc(HYPRE_Int,P*Q+1);

    global_part[0] = 0;
    cnt = 1;
    for (iy = 0; iy < Q; iy++)
    {
        ny_size = ny_part[iy+1]-ny_part[iy];
        for (ix = 0; ix < P; ix++)
        {
            nx_size = nx_part[ix+1] - nx_part[ix];
            global_part[cnt] = global_part[cnt-1];
            global_part[cnt++] += nx_size*ny_size;
        }
    }

    nx_local = nx_part[p+1] - nx_part[p];
    ny_local = ny_part[q+1] - ny_part[q];

    my_id = q*P + p;
    num_procs = P*Q;

    local_num_rows = nx_local*ny_local;
    diag_i = hypre_CTAlloc(HYPRE_Int, local_num_rows+1);
    offd_i = hypre_CTAlloc(HYPRE_Int, local_num_rows+1);

    P_busy = hypre_min(nx,P);
    Q_busy = hypre_min(ny,Q);

    num_cols_offd = 0;
    if (p) num_cols_offd += ny_local;
    if (p < P_busy-1) num_cols_offd += ny_local;
    if (q) num_cols_offd += nx_local;
    if (q < Q_busy-1) num_cols_offd += nx_local;
    if (p && q) num_cols_offd++;
    if (p && q < Q_busy-1 ) num_cols_offd++;
    if (p < P_busy-1 && q ) num_cols_offd++;
    if (p < P_busy-1 && q < Q_busy-1 ) num_cols_offd++;

    if (!local_num_rows) num_cols_offd = 0;

    col_map_offd = hypre_CTAlloc(HYPRE_Int, num_cols_offd);

    cnt = 0;
    o_cnt = 0;
    diag_i[0] = 0;
    offd_i[0] = 0;
    for (iy = ny_part[q];  iy < ny_part[q+1]; iy++)
    {
        for (ix = nx_part[p]; ix < nx_part[p+1]; ix++)
        {
            cnt++;
            o_cnt++;
            diag_i[cnt] = diag_i[cnt-1];
            offd_i[o_cnt] = offd_i[o_cnt-1];
            diag_i[cnt]++;
            if (iy > ny_part[q])
            {
                diag_i[cnt]++;
                if (ix > nx_part[p])
                {
                    diag_i[cnt]++;
                }
                else
                {
                    if (ix)
                        offd_i[o_cnt]++;
                }
            }
            else
            {
                if (iy)
                {
                    offd_i[o_cnt]++;
                    if (ix > nx_part[p])
                    {
                        offd_i[o_cnt]++;
                    }
                    else if (ix)
                    {
                        offd_i[o_cnt]++;
                    }
                }
            }
            if (ix > nx_part[p])
                diag_i[cnt]++;
            else
            {
                if (ix)
                {
                    offd_i[o_cnt]++;
                }
            }
            if (ix+1 < nx_part[p+1])
                diag_i[cnt]++;
            else
            {
                if (ix+1 < nx)
                {
                    offd_i[o_cnt]++;
                }
            }
            if (iy+1 < ny_part[q+1])
            {
                diag_i[cnt]++;
                if (ix < nx_part[p+1]-1)
                {
                    diag_i[cnt]++;
                }
                else
                {
                    if (ix+1 < nx)
                        offd_i[o_cnt]++;
                }
            }
            else
            {
                if (iy+1 < ny)
                {
                    offd_i[o_cnt]++;
                    if (ix < nx_part[p+1]-1)
                    {
                        offd_i[o_cnt]++;
                    }
                    else if (ix < nx-1)
                    {
                        offd_i[o_cnt]++;
                    }
                }
            }
        }
    }

    diag_j = hypre_CTAlloc(HYPRE_Int, diag_i[local_num_rows]);
    diag_data = hypre_CTAlloc(HYPRE_Real, diag_i[local_num_rows]);

    if (num_procs > 1)
    {
        offd_j = hypre_CTAlloc(HYPRE_Int, offd_i[local_num_rows]);
        offd_data = hypre_CTAlloc(HYPRE_Real, offd_i[local_num_rows]);
    }

    row_index = 0;
    cnt = 0;
    o_cnt = 0;
    for (iy = ny_part[q];  iy < ny_part[q+1]; iy++)
    {
        for (ix = nx_part[p]; ix < nx_part[p+1]; ix++)
        {
            diag_j[cnt] = row_index;
            diag_data[cnt++] = value[0];
            if (iy > ny_part[q])
            {
                if (ix > nx_part[p])
                {
                    diag_j[cnt] = row_index-nx_local-1 ;
                    diag_data[cnt++] = value[3];
                }
                else
                {
                    if (ix)
                    {
                        offd_j[o_cnt] = hypre_map2(ix-1,iy-1,p-1,q,P,Q,
                                                   nx_part,ny_part,global_part);
                        offd_data[o_cnt++] = value[3];
                    }
                }
                diag_j[cnt] = row_index-nx_local;
                diag_data[cnt++] = value[2];
            }
            else
            {
                if (iy)
                {
                    if (ix > nx_part[p])
                    {
                        offd_j[o_cnt] = hypre_map2(ix-1,iy-1,p,q-1,P,Q,
                                                   nx_part,ny_part,global_part);
                        offd_data[o_cnt++] = value[3];
                    }
                    else if (ix)
                    {
                        offd_j[o_cnt] = hypre_map2(ix-1,iy-1,p-1,q-1,P,Q,
                                                   nx_part,ny_part,global_part);
                        offd_data[o_cnt++] = value[3];
                    }
                    offd_j[o_cnt] = hypre_map2(ix,iy-1,p,q-1,P,Q,
                                               nx_part,ny_part,global_part);
                    offd_data[o_cnt++] = value[2];
                }
            }
            if (ix > nx_part[p])
            {
                diag_j[cnt] = row_index-1;
                diag_data[cnt++] = value[1];
            }
            else
            {
                if (ix)
                {
                    offd_j[o_cnt] = hypre_map2(ix-1,iy,p-1,q,P,Q,
                                               nx_part,ny_part,global_part);
                    offd_data[o_cnt++] = value[1];
                }
            }
            if (ix+1 < nx_part[p+1])
            {
                diag_j[cnt] = row_index+1;
                diag_data[cnt++] = value[1];
            }
            else
            {
                if (ix+1 < nx)
                {
                    offd_j[o_cnt] = hypre_map2(ix+1,iy,p+1,q,P,Q,
                                               nx_part,ny_part,global_part);
                    offd_data[o_cnt++] = value[1];
                }
            }
            if (iy+1 < ny_part[q+1])
            {
                diag_j[cnt] = row_index+nx_local;
                diag_data[cnt++] = value[2];
                if (ix < nx_part[p+1]-1)
                {
                    diag_j[cnt] = row_index+nx_local+1 ;
                    diag_data[cnt++] = value[3];
                }
                else
                {
                    if (ix+1 < nx)
                    {
                        offd_j[o_cnt] = hypre_map2(ix+1,iy+1,p+1,q,P,Q,
                                                   nx_part,ny_part,global_part);
                        offd_data[o_cnt++] = value[3];
                    }
                }
            }
            else
            {
                if (iy+1 < ny)
                {
                    offd_j[o_cnt] = hypre_map2(ix,iy+1,p,q+1,P,Q,
                                               nx_part,ny_part,global_part);
                    offd_data[o_cnt++] = value[2];
                    if (ix < nx_part[p+1]-1)
                    {
                        offd_j[o_cnt] = hypre_map2(ix+1,iy+1,p,q+1,P,Q,
                                                   nx_part,ny_part,global_part);
                        offd_data[o_cnt++] = value[3];
                    }
                    else if (ix < nx-1)
                    {
                        offd_j[o_cnt] = hypre_map2(ix+1,iy+1,p+1,q+1,P,Q,
                                                   nx_part,ny_part,global_part);
                        offd_data[o_cnt++] = value[3];
                    }
                }
            }
            row_index++;
        }
    }

    if (num_procs > 1)
    {
        work = hypre_CTAlloc(HYPRE_Int,o_cnt);

        for (i=0; i < o_cnt; i++)
            work[i] = offd_j[i];

        qsort0(work, 0, o_cnt-1);

        col_map_offd[0] = work[0];
        cnt = 0;
        for (i=0; i < o_cnt; i++)
        {
            if (work[i] > col_map_offd[cnt])
            {
                cnt++;
                col_map_offd[cnt] = work[i];
            }
        }

        for (i=0; i < o_cnt; i++)
        {
            for (j=0; j < num_cols_offd; j++)
            {
                if (offd_j[i] == col_map_offd[j])
                {
                    offd_j[i] = j;
                    break;
                }
            }
        }

        hypre_TFree(work);
    }

    A = hypre_ParCSRMatrixCreate(comm, grid_size, grid_size,
                                 global_part, global_part, num_cols_offd,
                                 diag_i[local_num_rows],
                                 offd_i[local_num_rows]);

    hypre_ParCSRMatrixColMapOffd(A) = col_map_offd;

    diag = hypre_ParCSRMatrixDiag(A);
    hypre_CSRMatrixI(diag) = diag_i;
    hypre_CSRMatrixJ(diag) = diag_j;
    hypre_CSRMatrixData(diag) = diag_data;

    offd = hypre_ParCSRMatrixOffd(A);
    hypre_CSRMatrixI(offd) = offd_i;
    if (num_cols_offd)
    {
        hypre_CSRMatrixJ(offd) = offd_j;
        hypre_CSRMatrixData(offd) = offd_data;
    }

    hypre_TFree(nx_part);
    hypre_TFree(ny_part);
    hypre_TFree(value);

    return (HYPRE_ParCSRMatrix) A;
}
Beispiel #21
0
int
hypre_BoomerAMGSetupStats( void               *amg_vdata,
                        hypre_ParCSRMatrix *A         )
{
   MPI_Comm 	      comm = hypre_ParCSRMatrixComm(A);   

   hypre_ParAMGData *amg_data = (hypre_ParAMGData*)amg_vdata;

   /*hypre_SeqAMGData *seq_data = hypre_ParAMGDataSeqData(amg_data);*/

   /* Data Structure variables */

   hypre_ParCSRMatrix **A_array;
   hypre_ParCSRMatrix **P_array;

   hypre_CSRMatrix *A_diag;
   double          *A_diag_data;
   int             *A_diag_i;

   hypre_CSRMatrix *A_offd;   
   double          *A_offd_data;
   int             *A_offd_i;

   hypre_CSRMatrix *P_diag;
   double          *P_diag_data;
   int             *P_diag_i;

   hypre_CSRMatrix *P_offd;   
   double          *P_offd_data;
   int             *P_offd_i;


   int	    numrows;

   HYPRE_BigInt	    *row_starts;

 
   int      num_levels; 
   int      coarsen_type;
   int      interp_type;
   int      measure_type;
   double   global_nonzeros;

   double  *send_buff;
   double  *gather_buff;
 
   /* Local variables */

   int       level;
   int       j;
   HYPRE_BigInt fine_size;
 
   int       min_entries;
   int       max_entries;

   int       num_procs,my_id, num_threads;


   double    min_rowsum;
   double    max_rowsum;
   double    sparse;


   int       i;
   

   HYPRE_BigInt coarse_size;
   int       entries;

   double    avg_entries;
   double    rowsum;

   double    min_weight;
   double    max_weight;

   int       global_min_e;
   int       global_max_e;
   double    global_min_rsum;
   double    global_max_rsum;
   double    global_min_wt;
   double    global_max_wt;

   double  *num_coeffs;
   double  *num_variables;
   double   total_variables; 
   double   operat_cmplxty;
   double   grid_cmplxty;

   /* amg solve params */
   int      max_iter;
   int      cycle_type;    
   int     *num_grid_sweeps;  
   int     *grid_relax_type;   
   int      relax_order;
   int    **grid_relax_points; 
   double  *relax_weight;
   double  *omega;
   double   tol;


   int one = 1;
   int minus_one = -1;
   int zero = 0;
   int smooth_type;
   int smooth_num_levels;
   int agg_num_levels;
   /*int seq_cg = 0;*/
   
   /*if (seq_data)
      seq_cg = 1;*/


   MPI_Comm_size(comm, &num_procs);   
   MPI_Comm_rank(comm,&my_id);
   num_threads = hypre_NumThreads();

   if (my_id == 0)
      printf("\nNumber of MPI processes: %d , Number of OpenMP threads: %d\n", num_procs, num_threads);
   A_array = hypre_ParAMGDataAArray(amg_data);
   P_array = hypre_ParAMGDataPArray(amg_data);
   num_levels = hypre_ParAMGDataNumLevels(amg_data);
   coarsen_type = hypre_ParAMGDataCoarsenType(amg_data);
   interp_type = hypre_ParAMGDataInterpType(amg_data);
   measure_type = hypre_ParAMGDataMeasureType(amg_data);
   smooth_type = hypre_ParAMGDataSmoothType(amg_data);
   smooth_num_levels = hypre_ParAMGDataSmoothNumLevels(amg_data);
   agg_num_levels = hypre_ParAMGDataAggNumLevels(amg_data);


   /*----------------------------------------------------------
    * Get the amg_data data
    *----------------------------------------------------------*/

   num_levels = hypre_ParAMGDataNumLevels(amg_data);
   max_iter   = hypre_ParAMGDataMaxIter(amg_data);
   cycle_type = hypre_ParAMGDataCycleType(amg_data);    
   num_grid_sweeps = hypre_ParAMGDataNumGridSweeps(amg_data);  
   grid_relax_type = hypre_ParAMGDataGridRelaxType(amg_data);
   grid_relax_points = hypre_ParAMGDataGridRelaxPoints(amg_data);
   relax_weight = hypre_ParAMGDataRelaxWeight(amg_data); 
   relax_order = hypre_ParAMGDataRelaxOrder(amg_data); 
   omega = hypre_ParAMGDataOmega(amg_data); 
   tol = hypre_ParAMGDataTol(amg_data);

   /*block_mode = hypre_ParAMGDataBlockMode(amg_data);*/

   send_buff     = hypre_CTAlloc(double, 6);
#ifdef HYPRE_NO_GLOBAL_PARTITION
   gather_buff = hypre_CTAlloc(double,6);    
#else
   gather_buff = hypre_CTAlloc(double,6*num_procs);    
#endif

   if (my_id==0)
   {
      printf("\nBoomerAMG SETUP PARAMETERS:\n\n");
      printf(" Max levels = %d\n",hypre_ParAMGDataMaxLevels(amg_data));
      printf(" Num levels = %d\n\n",num_levels);
      printf(" Strength Threshold = %f\n", 
                         hypre_ParAMGDataStrongThreshold(amg_data));
      printf(" Interpolation Truncation Factor = %f\n", 
                         hypre_ParAMGDataTruncFactor(amg_data));
      printf(" Maximum Row Sum Threshold for Dependency Weakening = %f\n\n", 
                         hypre_ParAMGDataMaxRowSum(amg_data));

      if (coarsen_type == 0)
      {
	printf(" Coarsening Type = Cleary-Luby-Jones-Plassman\n");
      }
      else if (abs(coarsen_type) == 1) 
      {
	printf(" Coarsening Type = Ruge\n");
      }
      else if (abs(coarsen_type) == 2) 
      {
	printf(" Coarsening Type = Ruge2B\n");
      }
      else if (abs(coarsen_type) == 3) 
      {
	printf(" Coarsening Type = Ruge3\n");
      }
      else if (abs(coarsen_type) == 4) 
      {
	printf(" Coarsening Type = Ruge 3c \n");
      }
      else if (abs(coarsen_type) == 5) 
      {
	printf(" Coarsening Type = Ruge relax special points \n");
      }
      else if (abs(coarsen_type) == 6) 
      {
	printf(" Coarsening Type = Falgout-CLJP \n");
      }
      else if (abs(coarsen_type) == 8) 
      {
	printf(" Coarsening Type = PMIS \n");
      }
      else if (abs(coarsen_type) == 10) 
      {
	printf(" Coarsening Type = HMIS \n");
      }
      else if (abs(coarsen_type) == 11) 
      {
	printf(" Coarsening Type = Ruge 1st pass only \n");
      }
      else if (abs(coarsen_type) == 9) 
      {
	printf(" Coarsening Type = PMIS fixed random \n");
      }
      else if (abs(coarsen_type) == 7) 
      {
	printf(" Coarsening Type = CLJP, fixed random \n");
      }
      if (coarsen_type > 0) 
      {
	printf(" Hybrid Coarsening (switch to CLJP when coarsening slows)\n");
      }
      

      if (coarsen_type)
      	printf(" measures are determined %s\n\n", 
                  (measure_type ? "globally" : "locally"));

      if (agg_num_levels)
	printf(" no. of levels of aggressive coarsening: %d\n\n", agg_num_levels);

#ifdef HYPRE_NO_GLOBAL_PARTITION
      printf( "\n No global partition option chosen.\n\n");
#endif

      if (interp_type == 0)
      {
	printf(" Interpolation = modified classical interpolation\n");
      }
      else if (interp_type == 1) 
      {
	printf(" Interpolation = LS interpolation \n");
      }
      else if (interp_type == 2) 
      {
	printf(" Interpolation = modified classical interpolation for hyperbolic PDEs\n");
      }
      else if (interp_type == 3) 
      {
	printf(" Interpolation = direct interpolation with separation of weights\n");
      }
      else if (interp_type == 4) 
      {
	printf(" Interpolation = multipass interpolation\n");
      }
      else if (interp_type == 5) 
      {
	printf(" Interpolation = multipass interpolation with separation of weights\n");
      }
      else if (interp_type == 6) 
      {
	printf(" Interpolation = extended+i interpolation\n");
      }
      else if (interp_type == 7) 
      {
	printf(" Interpolation = extended+i interpolation (only when needed)\n");
      }
      else if (interp_type == 8) 
      {
	printf(" Interpolation = standard interpolation\n");
      }
      else if (interp_type == 9) 
      {
	printf(" Interpolation = standard interpolation with separation of weights\n");
      }
      else if (interp_type == 12) 
      {
	printf(" FF interpolation \n");
      }
      else if (interp_type == 13) 
      {
	printf(" FF1 interpolation \n");
      }

      {
         printf( "\nOperator Matrix Information:\n\n");
      }
#if HYPRE_LONG_LONG
      printf("                  nonzero         entries p");
      printf("er row        row sums\n");
      printf("lev        rows   entries  sparse  min  max   ");
      printf("avg       min         max\n");
      printf("=======================================");
      printf("==================================\n");
#else      
      printf("            nonzero         entries p");
      printf("er row        row sums\n");
      printf("lev   rows  entries  sparse  min  max   ");
      printf("avg       min         max\n");
      printf("=======================================");
      printf("============================\n");
#endif
   }
  
   /*-----------------------------------------------------
    *  Enter Statistics Loop
    *-----------------------------------------------------*/

   num_coeffs = hypre_CTAlloc(double,num_levels);

   num_variables = hypre_CTAlloc(double,num_levels);

   for (level = 0; level < num_levels; level++)
   { 

      {
         A_diag = hypre_ParCSRMatrixDiag(A_array[level]);
         A_diag_data = hypre_CSRMatrixData(A_diag);
         A_diag_i = hypre_CSRMatrixI(A_diag);
         
         A_offd = hypre_ParCSRMatrixOffd(A_array[level]);   
         A_offd_data = hypre_CSRMatrixData(A_offd);
         A_offd_i = hypre_CSRMatrixI(A_offd);
         
         row_starts = hypre_ParCSRMatrixRowStarts(A_array[level]);
         
         fine_size = hypre_ParCSRMatrixGlobalNumRows(A_array[level]);
         global_nonzeros = hypre_ParCSRMatrixDNumNonzeros(A_array[level]);
         num_coeffs[level] = global_nonzeros;
         num_variables[level] = (double) fine_size;
         
         sparse = global_nonzeros /((double) fine_size * (double) fine_size);

         min_entries = 0;
         max_entries = 0;
         min_rowsum = 0.0;
         max_rowsum = 0.0;
         
         if (hypre_CSRMatrixNumRows(A_diag))
         {
            min_entries = (A_diag_i[1]-A_diag_i[0])+(A_offd_i[1]-A_offd_i[0]);
            for (j = A_diag_i[0]; j < A_diag_i[1]; j++)
               min_rowsum += A_diag_data[j];
            for (j = A_offd_i[0]; j < A_offd_i[1]; j++)
               min_rowsum += A_offd_data[j];
            
            max_rowsum = min_rowsum;
            
            for (j = 0; j < hypre_CSRMatrixNumRows(A_diag); j++)
            {
               entries = (A_diag_i[j+1]-A_diag_i[j])+(A_offd_i[j+1]-A_offd_i[j]);
               min_entries = hypre_min(entries, min_entries);
               max_entries = hypre_max(entries, max_entries);
               
               rowsum = 0.0;
               for (i = A_diag_i[j]; i < A_diag_i[j+1]; i++)
                  rowsum += A_diag_data[i];
               
               for (i = A_offd_i[j]; i < A_offd_i[j+1]; i++)
                  rowsum += A_offd_data[i];
               
               min_rowsum = hypre_min(rowsum, min_rowsum);
               max_rowsum = hypre_max(rowsum, max_rowsum);
            }
         }
         avg_entries = global_nonzeros / ((double) fine_size);
      }
      
#ifdef HYPRE_NO_GLOBAL_PARTITION       

       numrows = (int)(row_starts[1]-row_starts[0]);
       if (!numrows) /* if we don't have any rows, then don't have this count toward
                         min row sum or min num entries */
       {
          min_entries = 1000000;
          min_rowsum =  1.0e7;
       }
       
       send_buff[0] = - (double) min_entries;
       send_buff[1] = (double) max_entries;
       send_buff[2] = - min_rowsum;
       send_buff[3] = max_rowsum;

       MPI_Reduce(send_buff, gather_buff, 4, MPI_DOUBLE, MPI_MAX, 0, comm);
       
       if (my_id ==0)
       {
          global_min_e = - gather_buff[0];
          global_max_e = gather_buff[1];
          global_min_rsum = - gather_buff[2];
          global_max_rsum = gather_buff[3];
#ifdef HYPRE_LONG_LONG
          printf( "%2d %12lld %8.0f  %0.3f  %4d %4d",
                  level, fine_size, global_nonzeros, sparse, global_min_e, 
                  global_max_e);
#else          
          printf( "%2d %7d %8.0f  %0.3f  %4d %4d",
                  level, fine_size, global_nonzeros, sparse, global_min_e, 
                  global_max_e);
#endif          
          printf("  %4.1f  %10.3e  %10.3e\n", avg_entries,
                 global_min_rsum, global_max_rsum);
       }
       
#else

       send_buff[0] = (double) min_entries;
       send_buff[1] = (double) max_entries;
       send_buff[2] = min_rowsum;
       send_buff[3] = max_rowsum;
       
       MPI_Gather(send_buff,4,MPI_DOUBLE,gather_buff,4,MPI_DOUBLE,0,comm);

       if (my_id == 0)
       {
          global_min_e = 1000000;
          global_max_e = 0;
          global_min_rsum = 1.0e7;
          global_max_rsum = 0.0;
          for (j = 0; j < num_procs; j++)
          {
             numrows = row_starts[j+1]-row_starts[j];
             if (numrows)
             {
                global_min_e = hypre_min(global_min_e, (int) gather_buff[j*4]);
                global_min_rsum = hypre_min(global_min_rsum, gather_buff[j*4 +2]);
             }
             global_max_e = hypre_max(global_max_e, (int) gather_buff[j*4 +1]);
             global_max_rsum = hypre_max(global_max_rsum, gather_buff[j*4 +3]);
          }

#ifdef HYPRE_LONG_LONG
          printf( "%2d %12lld %8.0f  %0.3f  %4d %4d",
                  level, fine_size, global_nonzeros, sparse, global_min_e, 
                  global_max_e);
#else          
          printf( "%2d %7d %8.0f  %0.3f  %4d %4d",
                  level, fine_size, global_nonzeros, sparse, global_min_e, 
                  global_max_e);
#endif          
          printf("  %4.1f  %10.3e  %10.3e\n", avg_entries,
                 global_min_rsum, global_max_rsum);
       }

#endif

        
   }

       
   if (my_id == 0)
   {
      {
         printf( "\n\nInterpolation Matrix Information:\n\n");
      }
#if HYPRE_LONG_LONG
      printf("                             entries/row    min     max");
      printf("         row sums\n");
      printf("lev        rows x cols          min max  ");
      printf("   weight   weight     min       max \n");
      printf("=======================================");
      printf("======================================\n");
#else      
      printf("                 entries/row    min     max");
      printf("         row sums\n");
      printf("lev  rows cols    min max  ");
      printf("   weight   weight     min       max \n");
      printf("=======================================");
      printf("==========================\n");
#endif
   }
  
   /*-----------------------------------------------------
    *  Enter Statistics Loop
    *-----------------------------------------------------*/


   for (level = 0; level < num_levels-1; level++)
   {
    
      {
         P_diag = hypre_ParCSRMatrixDiag(P_array[level]);
         P_diag_data = hypre_CSRMatrixData(P_diag);
         P_diag_i = hypre_CSRMatrixI(P_diag);
         
         P_offd = hypre_ParCSRMatrixOffd(P_array[level]);   
         P_offd_data = hypre_CSRMatrixData(P_offd);
         P_offd_i = hypre_CSRMatrixI(P_offd);
         
         row_starts = hypre_ParCSRMatrixRowStarts(P_array[level]);
         
         fine_size = hypre_ParCSRMatrixGlobalNumRows(P_array[level]);
         coarse_size = hypre_ParCSRMatrixGlobalNumCols(P_array[level]);
         global_nonzeros = hypre_ParCSRMatrixNumNonzeros(P_array[level]);
         
         min_weight = 1.0;
         max_weight = 0.0;
         max_rowsum = 0.0;
         min_rowsum = 0.0;
         min_entries = 0;
         max_entries = 0;
         
         if (hypre_CSRMatrixNumRows(P_diag))
         {
            if (hypre_CSRMatrixNumCols(P_diag)) min_weight = P_diag_data[0];
            for (j = P_diag_i[0]; j < P_diag_i[1]; j++)
            {
               min_weight = hypre_min(min_weight, P_diag_data[j]);
               if (P_diag_data[j] != 1.0)
                  max_weight = hypre_max(max_weight, P_diag_data[j]);
               min_rowsum += P_diag_data[j];
            }
            for (j = P_offd_i[0]; j < P_offd_i[1]; j++)
            {        
               min_weight = hypre_min(min_weight, P_offd_data[j]); 
               if (P_offd_data[j] != 1.0)
                  max_weight = hypre_max(max_weight, P_offd_data[j]);     
               min_rowsum += P_offd_data[j];
            }
            
            max_rowsum = min_rowsum;
            
            min_entries = (P_diag_i[1]-P_diag_i[0])+(P_offd_i[1]-P_offd_i[0]); 
            max_entries = 0;
            
            for (j = 0; j < hypre_CSRMatrixNumRows(P_diag); j++)
            {
               entries = (P_diag_i[j+1]-P_diag_i[j])+(P_offd_i[j+1]-P_offd_i[j]);
               min_entries = hypre_min(entries, min_entries);
               max_entries = hypre_max(entries, max_entries);
               
               rowsum = 0.0;
               for (i = P_diag_i[j]; i < P_diag_i[j+1]; i++)
               {
                  min_weight = hypre_min(min_weight, P_diag_data[i]);
                  if (P_diag_data[i] != 1.0)
                     max_weight = hypre_max(max_weight, P_diag_data[i]);
                  rowsum += P_diag_data[i];
               }
               
               for (i = P_offd_i[j]; i < P_offd_i[j+1]; i++)
               {
                  min_weight = hypre_min(min_weight, P_offd_data[i]);
                  if (P_offd_data[i] != 1.0) 
                     max_weight = hypre_max(max_weight, P_offd_data[i]);
                  rowsum += P_offd_data[i];
               }
               
               min_rowsum = hypre_min(rowsum, min_rowsum);
               max_rowsum = hypre_max(rowsum, max_rowsum);
            }
         
         }
         avg_entries = ((double) global_nonzeros) / ((double) fine_size);
      }

#ifdef HYPRE_NO_GLOBAL_PARTITION

      numrows = (int)(row_starts[1]-row_starts[0]);
      if (!numrows) /* if we don't have any rows, then don't have this count toward
                       min row sum or min num entries */
      {
         min_entries = 1000000;
         min_rowsum =  1.0e7;
         min_weight = 1.0e7;
       }
       
      send_buff[0] = - (double) min_entries;
      send_buff[1] = (double) max_entries;
      send_buff[2] = - min_rowsum;
      send_buff[3] = max_rowsum;
      send_buff[4] = - min_weight;
      send_buff[5] = max_weight;

      MPI_Reduce(send_buff, gather_buff, 6, MPI_DOUBLE, MPI_MAX, 0, comm);

      if (my_id == 0)
      {
         global_min_e = - gather_buff[0];
         global_max_e = gather_buff[1];
         global_min_rsum = -gather_buff[2];
         global_max_rsum = gather_buff[3];
         global_min_wt = -gather_buff[4];
         global_max_wt = gather_buff[5];

#ifdef HYPRE_LONG_LONG
          printf( "%2d %12lld x %-12lld %3d %3d",
                 level, fine_size, coarse_size,  global_min_e, global_max_e);
#else          
          printf( "%2d %5d x %-5d %3d %3d",
                 level, fine_size, coarse_size,  global_min_e, global_max_e);
#endif          
         printf("  %10.3e %9.3e %9.3e %9.3e\n",
                global_min_wt, global_max_wt, 
                global_min_rsum, global_max_rsum);
      }


#else
      
      send_buff[0] = (double) min_entries;
      send_buff[1] = (double) max_entries;
      send_buff[2] = min_rowsum;
      send_buff[3] = max_rowsum;
      send_buff[4] = min_weight;
      send_buff[5] = max_weight;
      
      MPI_Gather(send_buff,6,MPI_DOUBLE,gather_buff,6,MPI_DOUBLE,0,comm);
      
      if (my_id == 0)
      {
         global_min_e = 1000000;
         global_max_e = 0;
         global_min_rsum = 1.0e7;
         global_max_rsum = 0.0;
         global_min_wt = 1.0e7;
         global_max_wt = 0.0;
         
         for (j = 0; j < num_procs; j++)
         {
            numrows = row_starts[j+1] - row_starts[j];
            if (numrows)
            {
               global_min_e = hypre_min(global_min_e, (int) gather_buff[j*6]);
               global_min_rsum = hypre_min(global_min_rsum, gather_buff[j*6+2]);
               global_min_wt = hypre_min(global_min_wt, gather_buff[j*6+4]);
            }
            global_max_e = hypre_max(global_max_e, (int) gather_buff[j*6+1]);
            global_max_rsum = hypre_max(global_max_rsum, gather_buff[j*6+3]);
            global_max_wt = hypre_max(global_max_wt, gather_buff[j*6+5]);
         }
         
#ifdef HYPRE_LONG_LONG
         printf( "%2d %12lld x %-12lld %3d %3d",
                 level, fine_size, coarse_size,  global_min_e, global_max_e);
#else          
         printf( "%2d %5d x %-5d %3d %3d",
                 level, fine_size, coarse_size,  global_min_e, global_max_e);
#endif          
         printf("  %10.3e %9.3e %9.3e %9.3e\n",
                global_min_wt, global_max_wt, 
                global_min_rsum, global_max_rsum);
      }

#endif

   }


   total_variables = 0;
   operat_cmplxty = 0;
   for (j=0;j<hypre_ParAMGDataNumLevels(amg_data);j++)
   {
      operat_cmplxty +=  num_coeffs[j] / num_coeffs[0];
      total_variables += num_variables[j];
   }
   if (num_variables[0] != 0)
      grid_cmplxty = total_variables / num_variables[0];
 
   if (my_id == 0 )
   {
      printf("\n\n     Complexity:    grid = %f\n",grid_cmplxty);
      printf("                operator = %f\n",operat_cmplxty);
   }

   if (my_id == 0) printf("\n\n");

   if (my_id == 0)
   { 
      printf("\n\nBoomerAMG SOLVER PARAMETERS:\n\n");
      printf( "  Maximum number of cycles:         %d \n",max_iter);
      printf( "  Stopping Tolerance:               %e \n",tol); 
      printf( "  Cycle type (1 = V, 2 = W, etc.):  %d\n\n", cycle_type);
      printf( "  Relaxation Parameters:\n");
      printf( "   Visiting Grid:                     down   up  coarse\n");
      printf( "            Number of partial sweeps: %4d   %2d  %4d \n",
              num_grid_sweeps[1],
              num_grid_sweeps[2],num_grid_sweeps[3]);
      printf( "   Type 0=Jac, 3=hGS, 6=hSGS, 9=GE:   %4d   %2d  %4d \n",
              grid_relax_type[1],
              grid_relax_type[2],grid_relax_type[3]);
#if 1 /* TO DO: may not want this to print if CG in the coarse grid */
      printf( "   Point types, partial sweeps (1=C, -1=F):\n");
      if (grid_relax_points)
      {
         printf( "                  Pre-CG relaxation (down):");
         for (j = 0; j < num_grid_sweeps[1]; j++)
              printf("  %2d", grid_relax_points[1][j]);
         printf( "\n");
         printf( "                   Post-CG relaxation (up):");
         for (j = 0; j < num_grid_sweeps[2]; j++)
              printf("  %2d", grid_relax_points[2][j]);
         printf( "\n");
         printf( "                             Coarsest grid:");
         for (j = 0; j < num_grid_sweeps[3]; j++)
              printf("  %2d", grid_relax_points[3][j]);
         printf( "\n\n");
      }
      else if (relax_order == 1)
      {
         printf( "                  Pre-CG relaxation (down):");
         for (j = 0; j < num_grid_sweeps[1]; j++)
              printf("  %2d  %2d", one, minus_one);
         printf( "\n");
         printf( "                   Post-CG relaxation (up):");
         for (j = 0; j < num_grid_sweeps[2]; j++)
              printf("  %2d  %2d", minus_one, one);
         printf( "\n");
         printf( "                             Coarsest grid:");
         for (j = 0; j < num_grid_sweeps[3]; j++)
              printf("  %2d", zero);
         printf( "\n\n");
      }
      else 
      {
         printf( "                  Pre-CG relaxation (down):");
         for (j = 0; j < num_grid_sweeps[1]; j++)
              printf("  %2d", zero);
         printf( "\n");
         printf( "                   Post-CG relaxation (up):");
         for (j = 0; j < num_grid_sweeps[2]; j++)
              printf("  %2d", zero);
         printf( "\n");
         printf( "                             Coarsest grid:");
         for (j = 0; j < num_grid_sweeps[3]; j++)
              printf("  %2d", zero);
         printf( "\n\n");
      }
#endif
      if (smooth_type == 6)
         for (j=0; j < smooth_num_levels; j++)
            printf( " Schwarz Relaxation Weight %f level %d\n",
			hypre_ParAMGDataSchwarzRlxWeight(amg_data),j);
      for (j=0; j < num_levels; j++)
         if (relax_weight[j] != 1)
	       printf( " Relaxation Weight %f level %d\n",relax_weight[j],j);
      for (j=0; j < num_levels; j++)
         if (omega[j] != 1)
               printf( " Outer relaxation weight %f level %d\n",omega[j],j);
   }


   /*if (seq_cg) 
   {
      hypre_seqAMGSetupStats(amg_data,num_coeffs[0],num_variables[0], 
                             operat_cmplxty, grid_cmplxty );
   }*/
   




   hypre_TFree(num_coeffs);
   hypre_TFree(num_variables);
   hypre_TFree(send_buff);
   hypre_TFree(gather_buff);
   
   return(0);
}  
Beispiel #22
0
HYPRE_Int
hypre_AMGSetupStats( void *amg_vdata )
{
   hypre_AMGData *amg_data = amg_vdata;

   /* Data Structure variables */

   hypre_CSRMatrix **A_array;
   hypre_CSRMatrix **P_array;

   HYPRE_Int      num_levels; 
   HYPRE_Int      num_nonzeros;
/*   HYPRE_Int      amg_ioutdat;
   char    *log_file_name;
*/ 

   /* Local variables */

   HYPRE_Int      *A_i;
   double   *A_data;

   HYPRE_Int      *P_i;
   double   *P_data;

   HYPRE_Int       level;
   HYPRE_Int       i,j;
   HYPRE_Int       fine_size;
   HYPRE_Int       coarse_size;
   HYPRE_Int       entries;
   HYPRE_Int       total_entries;
   HYPRE_Int       min_entries;
   HYPRE_Int       max_entries;
   double    avg_entries;
   double    rowsum;
   double    min_rowsum;
   double    max_rowsum;
   double    sparse;
   double    min_weight;
   double    max_weight;
   double    op_complxty=0;
   double    grid_complxty=0;
   double    num_nz0;
   double    num_var0;

   A_array = hypre_AMGDataAArray(amg_data);
   P_array = hypre_AMGDataPArray(amg_data);
   num_levels = hypre_AMGDataNumLevels(amg_data);
/*   amg_ioutdat = hypre_AMGDataIOutDat(amg_data);
   log_file_name = hypre_AMGDataLogFileName(amg_data);
*/    
   hypre_printf("\n  AMG SETUP PARAMETERS:\n\n");
   hypre_printf(" Strength threshold = %f\n",hypre_AMGDataStrongThreshold(amg_data));
   hypre_printf(" Max levels = %d\n",hypre_AMGDataMaxLevels(amg_data));
   hypre_printf(" Num levels = %d\n\n",num_levels);

   hypre_printf( "\nOperator Matrix Information:\n\n");

   hypre_printf("         nonzero         entries p");
   hypre_printf("er row        row sums\n");
   hypre_printf("lev rows entries  sparse  min max  ");
   hypre_printf("avg       min         max\n");
   hypre_printf("=======================================");
   hypre_printf("==========================\n");

  
   /*-----------------------------------------------------
    *  Enter Statistics Loop
    *-----------------------------------------------------*/

   num_var0 = (double) hypre_CSRMatrixNumRows(A_array[0]);
   num_nz0 = (double) hypre_CSRMatrixNumNonzeros(A_array[0]);
 
   for (level = 0; level < num_levels; level++)
   {
       A_i = hypre_CSRMatrixI(A_array[level]);
       A_data = hypre_CSRMatrixData(A_array[level]);

       fine_size = hypre_CSRMatrixNumRows(A_array[level]);
       num_nonzeros = hypre_CSRMatrixNumNonzeros(A_array[level]);
       sparse = num_nonzeros /((double) fine_size * (double) fine_size);
       op_complxty += ((double)num_nonzeros/num_nz0);
       grid_complxty += ((double)fine_size/num_var0);

       min_entries = A_i[1]-A_i[0];
       max_entries = 0;
       total_entries = 0;
       min_rowsum = 0.0;
       max_rowsum = 0.0;

       for (j = A_i[0]; j < A_i[1]; j++)
                    min_rowsum += A_data[j];

       max_rowsum = min_rowsum;

       for (j = 0; j < fine_size; j++)
       {
           entries = A_i[j+1] - A_i[j];
           min_entries = hypre_min(entries, min_entries);
           max_entries = hypre_max(entries, max_entries);
           total_entries += entries;

           rowsum = 0.0;
           for (i = A_i[j]; i < A_i[j+1]; i++)
               rowsum += A_data[i];

           min_rowsum = hypre_min(rowsum, min_rowsum);
           max_rowsum = hypre_max(rowsum, max_rowsum);
       }

       avg_entries = ((double) total_entries) / ((double) fine_size);

       hypre_printf( "%2d %5d %7d  %0.3f  %3d %3d",
                 level, fine_size, num_nonzeros, sparse, min_entries, 
                 max_entries);
       hypre_printf("  %4.1f  %10.3e  %10.3e\n", avg_entries,
                                 min_rowsum, max_rowsum);
   }
       
   hypre_printf( "\n\nInterpolation Matrix Information:\n\n");

   hypre_printf("                 entries/row    min     max");
   hypre_printf("         row sums\n");
   hypre_printf("lev  rows cols    min max  ");
   hypre_printf("   weight   weight     min       max \n");
   hypre_printf("=======================================");
   hypre_printf("==========================\n");

  
   /*-----------------------------------------------------
    *  Enter Statistics Loop
    *-----------------------------------------------------*/

   for (level = 0; level < num_levels-1; level++)
   {
       P_i = hypre_CSRMatrixI(P_array[level]);
       P_data = hypre_CSRMatrixData(P_array[level]);

       fine_size = hypre_CSRMatrixNumRows(P_array[level]);
       coarse_size = hypre_CSRMatrixNumCols(P_array[level]);
       num_nonzeros = hypre_CSRMatrixNumNonzeros(P_array[level]);

       min_entries = P_i[1]-P_i[0];
       max_entries = 0;
       total_entries = 0;
       min_rowsum = 0.0;
       max_rowsum = 0.0;
       min_weight = P_data[0];
       max_weight = 0.0;

       for (j = P_i[0]; j < P_i[1]; j++)
                    min_rowsum += P_data[j];

       max_rowsum = min_rowsum;

       for (j = 0; j < num_nonzeros; j++)
       {
          if (P_data[j] != 1.0)
          {
             min_weight = hypre_min(min_weight,P_data[j]);
             max_weight = hypre_max(max_weight,P_data[j]);
          }
       }

       for (j = 0; j < fine_size; j++)
       {
           entries = P_i[j+1] - P_i[j];
           min_entries = hypre_min(entries, min_entries);
           max_entries = hypre_max(entries, max_entries);
           total_entries += entries;

           rowsum = 0.0;
           for (i = P_i[j]; i < P_i[j+1]; i++)
               rowsum += P_data[i];

           min_rowsum = hypre_min(rowsum, min_rowsum);
           max_rowsum = hypre_max(rowsum, max_rowsum);
       }

       hypre_printf( "%2d %5d x %-5d %3d %3d",
             level, fine_size, coarse_size,  min_entries, max_entries);
       hypre_printf("  %5.3e  %5.3e %5.3e  %5.3e\n",
                 min_weight, max_weight, min_rowsum, max_rowsum);
   }
     
   hypre_printf("\n Operator Complexity: %8.3f\n", op_complxty); 
   hypre_printf(" Grid Complexity:     %8.3f\n", grid_complxty); 
   hypre_WriteSolverParams(amg_data);  
   
   return(0);
}  
Beispiel #23
0
int
hypre_StructCoarsen( hypre_StructGrid  *fgrid,
                     hypre_Index        index,
                     hypre_Index        stride,
                     int                prune,
                     hypre_StructGrid **cgrid_ptr )
{
   int ierr = 0;

   hypre_StructGrid   *cgrid;
                      
   MPI_Comm            comm;
   int                 dim;
   hypre_BoxNeighbors *neighbors;
   hypre_BoxArray     *hood_boxes;
   int                 num_hood;
   int                *hood_procs;
   int                *hood_ids;
   int                 first_local;
   int                 num_local;
   int                 num_periodic;
   int                 max_distance;
   hypre_Box          *bounding_box;
   hypre_Index         periodic;

   MPI_Request        *send_requests;
   MPI_Status         *send_status;
   int                *send_buffer;
   int                 send_size;
   MPI_Request        *recv_requests;
   MPI_Status         *recv_status;
   int               **recv_buffers;
   int                *recv_sizes;
   int                 my_rank;

   int                *send_procs;
   int                *recv_procs;
   int                 num_sends;
   int                 num_recvs;
                      
   hypre_BoxArray     *new_hood_boxes;
   int                 new_num_hood;
   int                *new_hood_procs;
   int                *new_hood_ids;
   int                 new_first_local;
   int                 new_num_local;
   int                 new_num_periodic;

   hypre_Box          *box;
   hypre_Box          *local_box;
   hypre_Box          *neighbor_box;
   hypre_Box          *local_cbox;
   hypre_Box          *neighbor_cbox;
   hypre_Index         imin;
   hypre_Index         imax;
   int                 alloc_size;

   double              perimeter_count, cperimeter_count;
   /*double              diff, distance, perimeter_count, cperimeter_count;*/
                      
   int                *iarray;
   int                *jrecv;
   int                 i, j, d, ilocal;
   int                 data_id, min_id, jj;

   /*-----------------------------------------
    * Copy needed info from fgrid
    *-----------------------------------------*/

   comm         = hypre_StructGridComm(fgrid);
   dim          = hypre_StructGridDim(fgrid);
   neighbors    = hypre_StructGridNeighbors(fgrid);
   hood_boxes   = hypre_BoxArrayDuplicate(hypre_BoxNeighborsBoxes(neighbors));
   num_hood     = hypre_BoxArraySize(hood_boxes);

   iarray  = hypre_BoxNeighborsProcs(neighbors);
   hood_procs = hypre_TAlloc(int, num_hood);
   for (i = 0; i < num_hood; i++)
   {
      hood_procs[i] = iarray[i];
   }

   iarray = hypre_BoxNeighborsIDs(neighbors);
   hood_ids  = hypre_TAlloc(int, num_hood);
   for (i = 0; i < num_hood; i++)
   {
      hood_ids[i] = iarray[i];
   }

   first_local  = hypre_BoxNeighborsFirstLocal(neighbors);
   num_local    = hypre_BoxNeighborsNumLocal(neighbors);
   num_periodic = hypre_BoxNeighborsNumPeriodic(neighbors);

   max_distance = hypre_StructGridMaxDistance(fgrid);
   bounding_box = hypre_BoxDuplicate(hypre_StructGridBoundingBox(fgrid));
   hypre_CopyIndex(hypre_StructGridPeriodic(fgrid), periodic);

   MPI_Comm_rank(comm, &my_rank);

#if DEBUG
   sprintf(filename, "zcoarsen.%05d", my_rank);

   if ((file = fopen(filename, "a")) == NULL)
   {
      printf("Error: can't open output file %s\n", filename);
      exit(1);
   }

   fprintf(file, "\n\n============================\n\n");
   fprintf(file, "\n\n%d\n\n", debug_count++);
   fprintf(file, "num_hood = %d\n", num_hood);
   for (i = 0; i < num_hood; i++)
   {
      box = hypre_BoxArrayBox(hood_boxes, i);
      fprintf(file, "(%d,%d,%d) X (%d,%d,%d) ; (%d,%d); %d\n",
              hypre_BoxIMinX(box),hypre_BoxIMinY(box),hypre_BoxIMinZ(box),
              hypre_BoxIMaxX(box),hypre_BoxIMaxY(box),hypre_BoxIMaxZ(box),
              hood_procs[i], hood_ids[i], hypre_BoxVolume(box));
   }
   fprintf(file, "first_local  = %d\n", first_local);
   fprintf(file, "num_local    = %d\n", num_local);
   fprintf(file, "num_periodic = %d\n", num_periodic);
#endif

   /*-----------------------------------------
    * Coarsen bounding box
    *-----------------------------------------*/

   hypre_StructCoarsenBox(bounding_box, index, stride);

   /*-----------------------------------------
    * Coarsen neighborhood boxes & determine
    * send / recv procs
    *
    * NOTE: Currently, this always communicates
    * with all neighboring processes.
    *-----------------------------------------*/

   local_cbox = hypre_BoxCreate();
   neighbor_cbox = hypre_BoxCreate();

   num_recvs = 0;
   num_sends = 0;
   recv_procs = NULL;
   send_procs = NULL;
   for (i = 0; i < num_hood; i++)
   {
      if (hood_procs[i] != my_rank)
      {
         for (j = 0; j < num_local; j++)
         {
            ilocal = first_local + j;

            local_box    = hypre_BoxArrayBox(hood_boxes, ilocal);
            neighbor_box = hypre_BoxArrayBox(hood_boxes, i);

            /* coarsen boxes being considered */
            hypre_CopyBox(local_box, local_cbox);
            hypre_StructCoarsenBox(local_cbox, index, stride);
            hypre_CopyBox(neighbor_box, neighbor_cbox);
            hypre_StructCoarsenBox(neighbor_cbox, index, stride);

            /*-----------------------
             * Receive info?
             *-----------------------*/

/* always communicate */
#if 0
            perimeter_count = 0;
            cperimeter_count = 0;
            for (d = 0; d < 3; d++)
            {
               distance = max_distance;
               diff = hypre_BoxIMaxD(neighbor_box, d) -
                  hypre_BoxIMaxD(local_box, d);
               if (diff > 0)
               {
                  distance = hypre_min(distance, diff);
               }
               diff = hypre_BoxIMinD(local_box, d) -
                  hypre_BoxIMinD(neighbor_box, d);
               if (diff > 0)
               {
                  distance = hypre_min(distance, diff);
               }
               if (distance < max_distance)
               {
                  perimeter_count++;
               }

               distance = max_distance;
               diff = hypre_BoxIMaxD(neighbor_cbox, d) -
                  hypre_BoxIMaxD(local_cbox, d);
               if (diff > 0)
               {
                  distance = hypre_min(distance, diff);
               }
               diff = hypre_BoxIMinD(local_cbox, d) -
                  hypre_BoxIMinD(neighbor_cbox, d);
               if (diff > 0)
               {
                  distance = hypre_min(distance, diff);
               }
               if (distance < max_distance)
               {
                  cperimeter_count++;
               }
            }
#else
            perimeter_count = 0;
            cperimeter_count = 1;
#endif
            if (cperimeter_count > perimeter_count)
            {
               if (num_recvs == 0)
               {
                  recv_procs = hypre_TAlloc(int, num_hood);
                  recv_procs[num_recvs] = hood_procs[i];
                  num_recvs++;
               }
               else if (hood_procs[i] != recv_procs[num_recvs-1])
               {
                  recv_procs[num_recvs] = hood_procs[i];
                  num_recvs++;
               }
            }

            /*-----------------------
             * Send info?
             *-----------------------*/

/* always communicate */
#if 0
            perimeter_count = 0;
            cperimeter_count = 0;
            for (d = 0; d < 3; d++)
            {
               distance = max_distance;
               diff = hypre_BoxIMaxD(local_box, d) -
                  hypre_BoxIMaxD(neighbor_box, d);
               if (diff > 0)
               {
                  distance = hypre_min(distance, diff);
               }
               diff = hypre_BoxIMinD(neighbor_box, d) -
                  hypre_BoxIMinD(local_box, d);
               if (diff > 0)
               {
                  distance = hypre_min(distance, diff);
               }
               if (distance < max_distance)
               {
                  perimeter_count++;
               }

               distance = max_distance;
               diff = hypre_BoxIMaxD(local_cbox, d) -
                  hypre_BoxIMaxD(neighbor_cbox, d);
               if (diff > 0)
               {
                  distance = hypre_min(distance, diff);
               }
               diff = hypre_BoxIMinD(neighbor_cbox, d) -
                  hypre_BoxIMinD(local_cbox, d);
               if (diff > 0)
               {
                  distance = hypre_min(distance, diff);
               }
               if (distance < max_distance)
               {
                  cperimeter_count++;
               }
            }
#else
            perimeter_count = 0;
            cperimeter_count = 1;
#endif
            if (cperimeter_count > perimeter_count)
            {
               if (num_sends == 0)
               {
                  send_procs = hypre_TAlloc(int, num_hood);
                  send_procs[num_sends] = hood_procs[i];
                  num_sends++;
               }
               else if (hood_procs[i] != send_procs[num_sends-1])
               {
                  send_procs[num_sends] = hood_procs[i];
                  num_sends++;
               }
            }