Пример #1
0
void hypre_sort_and_create_inverse_map(
  HYPRE_Int *in, HYPRE_Int len, HYPRE_Int **out, hypre_UnorderedIntMap *inverse_map)
{
   if (len == 0)
   {
      return;
   }

#ifdef HYPRE_PROFILE
   hypre_profile_times[HYPRE_TIMER_ID_MERGE] -= hypre_MPI_Wtime();
#endif

   HYPRE_Int *temp = hypre_TAlloc(HYPRE_Int, len);
   hypre_merge_sort(in, temp, len, out);
   hypre_UnorderedIntMapCreate(inverse_map, 2*len, 16*hypre_NumThreads());
   HYPRE_Int i;
#pragma omp parallel for HYPRE_SMP_SCHEDULE
   for (i = 0; i < len; i++)
   {
      HYPRE_Int old = hypre_UnorderedIntMapPutIfAbsent(inverse_map, (*out)[i], i);
      assert(old == HYPRE_HOPSCOTCH_HASH_EMPTY);
#ifdef DBG_MERGE_SORT
      if (hypre_UnorderedIntMapGet(inverse_map, (*out)[i]) != i)
      {
         fprintf(stderr, "%d %d\n", i, (*out)[i]);
         assert(false);
      }
#endif
   }

#ifdef DBG_MERGE_SORT
  std::unordered_map<HYPRE_Int, HYPRE_Int> inverse_map2(len);
  for (HYPRE_Int i = 0; i < len; ++i) {
    inverse_map2[(*out)[i]] = i;
    if (hypre_UnorderedIntMapGet(inverse_map, (*out)[i]) != i)
    {
      fprintf(stderr, "%d %d\n", i, (*out)[i]);
      assert(false);
    }
  }
  assert(hypre_UnorderedIntMapSize(inverse_map) == len);
#endif

   if (*out == in)
   {
      hypre_TFree(temp);
   }
   else
   {
      hypre_TFree(in);
   }

#ifdef HYPRE_PROFILE
   hypre_profile_times[HYPRE_TIMER_ID_MERGE] += hypre_MPI_Wtime();
#endif
}
Пример #2
0
HYPRE_Int *hypre_UnorderedIntSetCopyToArray( hypre_UnorderedIntSet *s, HYPRE_Int *len )
{
  HYPRE_Int prefix_sum_workspace[hypre_NumThreads() + 1];
  HYPRE_Int *ret_array = NULL;

#ifdef HYPRE_CONCURRENT_HOPSCOTCH
#pragma omp parallel
#endif
  {
    HYPRE_Int n = s->bucketMask + HYPRE_HOPSCOTCH_HASH_INSERT_RANGE;
    HYPRE_Int i_begin, i_end;
    hypre_GetSimpleThreadPartition(&i_begin, &i_end, n);

    HYPRE_Int cnt = 0;
    HYPRE_Int i;
    for (i = i_begin; i < i_end; i++)
    {
      if (HYPRE_HOPSCOTCH_HASH_EMPTY != s->hash[i]) cnt++;
    }

    hypre_prefix_sum(&cnt, len, prefix_sum_workspace);

#ifdef HYPRE_CONCURRENT_HOPSCOTCH
#pragma omp barrier
#pragma omp master
#endif
    {
      ret_array = hypre_TAlloc(HYPRE_Int, *len);
    }
#ifdef HYPRE_CONCURRENT_HOPSCOTCH
#pragma omp barrier
#endif

    for (i = i_begin; i < i_end; i++)
    {
      if (HYPRE_HOPSCOTCH_HASH_EMPTY != s->hash[i]) ret_array[cnt++] = s->key[i];
    }
  }

  return ret_array;
}
Пример #3
0
int
hypre_BoomerAMGSetupStats( void               *amg_vdata,
                        hypre_ParCSRMatrix *A         )
{
   MPI_Comm 	      comm = hypre_ParCSRMatrixComm(A);   

   hypre_ParAMGData *amg_data = (hypre_ParAMGData*)amg_vdata;

   /*hypre_SeqAMGData *seq_data = hypre_ParAMGDataSeqData(amg_data);*/

   /* Data Structure variables */

   hypre_ParCSRMatrix **A_array;
   hypre_ParCSRMatrix **P_array;

   hypre_CSRMatrix *A_diag;
   double          *A_diag_data;
   int             *A_diag_i;

   hypre_CSRMatrix *A_offd;   
   double          *A_offd_data;
   int             *A_offd_i;

   hypre_CSRMatrix *P_diag;
   double          *P_diag_data;
   int             *P_diag_i;

   hypre_CSRMatrix *P_offd;   
   double          *P_offd_data;
   int             *P_offd_i;


   int	    numrows;

   HYPRE_BigInt	    *row_starts;

 
   int      num_levels; 
   int      coarsen_type;
   int      interp_type;
   int      measure_type;
   double   global_nonzeros;

   double  *send_buff;
   double  *gather_buff;
 
   /* Local variables */

   int       level;
   int       j;
   HYPRE_BigInt fine_size;
 
   int       min_entries;
   int       max_entries;

   int       num_procs,my_id, num_threads;


   double    min_rowsum;
   double    max_rowsum;
   double    sparse;


   int       i;
   

   HYPRE_BigInt coarse_size;
   int       entries;

   double    avg_entries;
   double    rowsum;

   double    min_weight;
   double    max_weight;

   int       global_min_e;
   int       global_max_e;
   double    global_min_rsum;
   double    global_max_rsum;
   double    global_min_wt;
   double    global_max_wt;

   double  *num_coeffs;
   double  *num_variables;
   double   total_variables; 
   double   operat_cmplxty;
   double   grid_cmplxty;

   /* amg solve params */
   int      max_iter;
   int      cycle_type;    
   int     *num_grid_sweeps;  
   int     *grid_relax_type;   
   int      relax_order;
   int    **grid_relax_points; 
   double  *relax_weight;
   double  *omega;
   double   tol;


   int one = 1;
   int minus_one = -1;
   int zero = 0;
   int smooth_type;
   int smooth_num_levels;
   int agg_num_levels;
   /*int seq_cg = 0;*/
   
   /*if (seq_data)
      seq_cg = 1;*/


   MPI_Comm_size(comm, &num_procs);   
   MPI_Comm_rank(comm,&my_id);
   num_threads = hypre_NumThreads();

   if (my_id == 0)
      printf("\nNumber of MPI processes: %d , Number of OpenMP threads: %d\n", num_procs, num_threads);
   A_array = hypre_ParAMGDataAArray(amg_data);
   P_array = hypre_ParAMGDataPArray(amg_data);
   num_levels = hypre_ParAMGDataNumLevels(amg_data);
   coarsen_type = hypre_ParAMGDataCoarsenType(amg_data);
   interp_type = hypre_ParAMGDataInterpType(amg_data);
   measure_type = hypre_ParAMGDataMeasureType(amg_data);
   smooth_type = hypre_ParAMGDataSmoothType(amg_data);
   smooth_num_levels = hypre_ParAMGDataSmoothNumLevels(amg_data);
   agg_num_levels = hypre_ParAMGDataAggNumLevels(amg_data);


   /*----------------------------------------------------------
    * Get the amg_data data
    *----------------------------------------------------------*/

   num_levels = hypre_ParAMGDataNumLevels(amg_data);
   max_iter   = hypre_ParAMGDataMaxIter(amg_data);
   cycle_type = hypre_ParAMGDataCycleType(amg_data);    
   num_grid_sweeps = hypre_ParAMGDataNumGridSweeps(amg_data);  
   grid_relax_type = hypre_ParAMGDataGridRelaxType(amg_data);
   grid_relax_points = hypre_ParAMGDataGridRelaxPoints(amg_data);
   relax_weight = hypre_ParAMGDataRelaxWeight(amg_data); 
   relax_order = hypre_ParAMGDataRelaxOrder(amg_data); 
   omega = hypre_ParAMGDataOmega(amg_data); 
   tol = hypre_ParAMGDataTol(amg_data);

   /*block_mode = hypre_ParAMGDataBlockMode(amg_data);*/

   send_buff     = hypre_CTAlloc(double, 6);
#ifdef HYPRE_NO_GLOBAL_PARTITION
   gather_buff = hypre_CTAlloc(double,6);    
#else
   gather_buff = hypre_CTAlloc(double,6*num_procs);    
#endif

   if (my_id==0)
   {
      printf("\nBoomerAMG SETUP PARAMETERS:\n\n");
      printf(" Max levels = %d\n",hypre_ParAMGDataMaxLevels(amg_data));
      printf(" Num levels = %d\n\n",num_levels);
      printf(" Strength Threshold = %f\n", 
                         hypre_ParAMGDataStrongThreshold(amg_data));
      printf(" Interpolation Truncation Factor = %f\n", 
                         hypre_ParAMGDataTruncFactor(amg_data));
      printf(" Maximum Row Sum Threshold for Dependency Weakening = %f\n\n", 
                         hypre_ParAMGDataMaxRowSum(amg_data));

      if (coarsen_type == 0)
      {
	printf(" Coarsening Type = Cleary-Luby-Jones-Plassman\n");
      }
      else if (abs(coarsen_type) == 1) 
      {
	printf(" Coarsening Type = Ruge\n");
      }
      else if (abs(coarsen_type) == 2) 
      {
	printf(" Coarsening Type = Ruge2B\n");
      }
      else if (abs(coarsen_type) == 3) 
      {
	printf(" Coarsening Type = Ruge3\n");
      }
      else if (abs(coarsen_type) == 4) 
      {
	printf(" Coarsening Type = Ruge 3c \n");
      }
      else if (abs(coarsen_type) == 5) 
      {
	printf(" Coarsening Type = Ruge relax special points \n");
      }
      else if (abs(coarsen_type) == 6) 
      {
	printf(" Coarsening Type = Falgout-CLJP \n");
      }
      else if (abs(coarsen_type) == 8) 
      {
	printf(" Coarsening Type = PMIS \n");
      }
      else if (abs(coarsen_type) == 10) 
      {
	printf(" Coarsening Type = HMIS \n");
      }
      else if (abs(coarsen_type) == 11) 
      {
	printf(" Coarsening Type = Ruge 1st pass only \n");
      }
      else if (abs(coarsen_type) == 9) 
      {
	printf(" Coarsening Type = PMIS fixed random \n");
      }
      else if (abs(coarsen_type) == 7) 
      {
	printf(" Coarsening Type = CLJP, fixed random \n");
      }
      if (coarsen_type > 0) 
      {
	printf(" Hybrid Coarsening (switch to CLJP when coarsening slows)\n");
      }
      

      if (coarsen_type)
      	printf(" measures are determined %s\n\n", 
                  (measure_type ? "globally" : "locally"));

      if (agg_num_levels)
	printf(" no. of levels of aggressive coarsening: %d\n\n", agg_num_levels);

#ifdef HYPRE_NO_GLOBAL_PARTITION
      printf( "\n No global partition option chosen.\n\n");
#endif

      if (interp_type == 0)
      {
	printf(" Interpolation = modified classical interpolation\n");
      }
      else if (interp_type == 1) 
      {
	printf(" Interpolation = LS interpolation \n");
      }
      else if (interp_type == 2) 
      {
	printf(" Interpolation = modified classical interpolation for hyperbolic PDEs\n");
      }
      else if (interp_type == 3) 
      {
	printf(" Interpolation = direct interpolation with separation of weights\n");
      }
      else if (interp_type == 4) 
      {
	printf(" Interpolation = multipass interpolation\n");
      }
      else if (interp_type == 5) 
      {
	printf(" Interpolation = multipass interpolation with separation of weights\n");
      }
      else if (interp_type == 6) 
      {
	printf(" Interpolation = extended+i interpolation\n");
      }
      else if (interp_type == 7) 
      {
	printf(" Interpolation = extended+i interpolation (only when needed)\n");
      }
      else if (interp_type == 8) 
      {
	printf(" Interpolation = standard interpolation\n");
      }
      else if (interp_type == 9) 
      {
	printf(" Interpolation = standard interpolation with separation of weights\n");
      }
      else if (interp_type == 12) 
      {
	printf(" FF interpolation \n");
      }
      else if (interp_type == 13) 
      {
	printf(" FF1 interpolation \n");
      }

      {
         printf( "\nOperator Matrix Information:\n\n");
      }
#if HYPRE_LONG_LONG
      printf("                  nonzero         entries p");
      printf("er row        row sums\n");
      printf("lev        rows   entries  sparse  min  max   ");
      printf("avg       min         max\n");
      printf("=======================================");
      printf("==================================\n");
#else      
      printf("            nonzero         entries p");
      printf("er row        row sums\n");
      printf("lev   rows  entries  sparse  min  max   ");
      printf("avg       min         max\n");
      printf("=======================================");
      printf("============================\n");
#endif
   }
  
   /*-----------------------------------------------------
    *  Enter Statistics Loop
    *-----------------------------------------------------*/

   num_coeffs = hypre_CTAlloc(double,num_levels);

   num_variables = hypre_CTAlloc(double,num_levels);

   for (level = 0; level < num_levels; level++)
   { 

      {
         A_diag = hypre_ParCSRMatrixDiag(A_array[level]);
         A_diag_data = hypre_CSRMatrixData(A_diag);
         A_diag_i = hypre_CSRMatrixI(A_diag);
         
         A_offd = hypre_ParCSRMatrixOffd(A_array[level]);   
         A_offd_data = hypre_CSRMatrixData(A_offd);
         A_offd_i = hypre_CSRMatrixI(A_offd);
         
         row_starts = hypre_ParCSRMatrixRowStarts(A_array[level]);
         
         fine_size = hypre_ParCSRMatrixGlobalNumRows(A_array[level]);
         global_nonzeros = hypre_ParCSRMatrixDNumNonzeros(A_array[level]);
         num_coeffs[level] = global_nonzeros;
         num_variables[level] = (double) fine_size;
         
         sparse = global_nonzeros /((double) fine_size * (double) fine_size);

         min_entries = 0;
         max_entries = 0;
         min_rowsum = 0.0;
         max_rowsum = 0.0;
         
         if (hypre_CSRMatrixNumRows(A_diag))
         {
            min_entries = (A_diag_i[1]-A_diag_i[0])+(A_offd_i[1]-A_offd_i[0]);
            for (j = A_diag_i[0]; j < A_diag_i[1]; j++)
               min_rowsum += A_diag_data[j];
            for (j = A_offd_i[0]; j < A_offd_i[1]; j++)
               min_rowsum += A_offd_data[j];
            
            max_rowsum = min_rowsum;
            
            for (j = 0; j < hypre_CSRMatrixNumRows(A_diag); j++)
            {
               entries = (A_diag_i[j+1]-A_diag_i[j])+(A_offd_i[j+1]-A_offd_i[j]);
               min_entries = hypre_min(entries, min_entries);
               max_entries = hypre_max(entries, max_entries);
               
               rowsum = 0.0;
               for (i = A_diag_i[j]; i < A_diag_i[j+1]; i++)
                  rowsum += A_diag_data[i];
               
               for (i = A_offd_i[j]; i < A_offd_i[j+1]; i++)
                  rowsum += A_offd_data[i];
               
               min_rowsum = hypre_min(rowsum, min_rowsum);
               max_rowsum = hypre_max(rowsum, max_rowsum);
            }
         }
         avg_entries = global_nonzeros / ((double) fine_size);
      }
      
#ifdef HYPRE_NO_GLOBAL_PARTITION       

       numrows = (int)(row_starts[1]-row_starts[0]);
       if (!numrows) /* if we don't have any rows, then don't have this count toward
                         min row sum or min num entries */
       {
          min_entries = 1000000;
          min_rowsum =  1.0e7;
       }
       
       send_buff[0] = - (double) min_entries;
       send_buff[1] = (double) max_entries;
       send_buff[2] = - min_rowsum;
       send_buff[3] = max_rowsum;

       MPI_Reduce(send_buff, gather_buff, 4, MPI_DOUBLE, MPI_MAX, 0, comm);
       
       if (my_id ==0)
       {
          global_min_e = - gather_buff[0];
          global_max_e = gather_buff[1];
          global_min_rsum = - gather_buff[2];
          global_max_rsum = gather_buff[3];
#ifdef HYPRE_LONG_LONG
          printf( "%2d %12lld %8.0f  %0.3f  %4d %4d",
                  level, fine_size, global_nonzeros, sparse, global_min_e, 
                  global_max_e);
#else          
          printf( "%2d %7d %8.0f  %0.3f  %4d %4d",
                  level, fine_size, global_nonzeros, sparse, global_min_e, 
                  global_max_e);
#endif          
          printf("  %4.1f  %10.3e  %10.3e\n", avg_entries,
                 global_min_rsum, global_max_rsum);
       }
       
#else

       send_buff[0] = (double) min_entries;
       send_buff[1] = (double) max_entries;
       send_buff[2] = min_rowsum;
       send_buff[3] = max_rowsum;
       
       MPI_Gather(send_buff,4,MPI_DOUBLE,gather_buff,4,MPI_DOUBLE,0,comm);

       if (my_id == 0)
       {
          global_min_e = 1000000;
          global_max_e = 0;
          global_min_rsum = 1.0e7;
          global_max_rsum = 0.0;
          for (j = 0; j < num_procs; j++)
          {
             numrows = row_starts[j+1]-row_starts[j];
             if (numrows)
             {
                global_min_e = hypre_min(global_min_e, (int) gather_buff[j*4]);
                global_min_rsum = hypre_min(global_min_rsum, gather_buff[j*4 +2]);
             }
             global_max_e = hypre_max(global_max_e, (int) gather_buff[j*4 +1]);
             global_max_rsum = hypre_max(global_max_rsum, gather_buff[j*4 +3]);
          }

#ifdef HYPRE_LONG_LONG
          printf( "%2d %12lld %8.0f  %0.3f  %4d %4d",
                  level, fine_size, global_nonzeros, sparse, global_min_e, 
                  global_max_e);
#else          
          printf( "%2d %7d %8.0f  %0.3f  %4d %4d",
                  level, fine_size, global_nonzeros, sparse, global_min_e, 
                  global_max_e);
#endif          
          printf("  %4.1f  %10.3e  %10.3e\n", avg_entries,
                 global_min_rsum, global_max_rsum);
       }

#endif

        
   }

       
   if (my_id == 0)
   {
      {
         printf( "\n\nInterpolation Matrix Information:\n\n");
      }
#if HYPRE_LONG_LONG
      printf("                             entries/row    min     max");
      printf("         row sums\n");
      printf("lev        rows x cols          min max  ");
      printf("   weight   weight     min       max \n");
      printf("=======================================");
      printf("======================================\n");
#else      
      printf("                 entries/row    min     max");
      printf("         row sums\n");
      printf("lev  rows cols    min max  ");
      printf("   weight   weight     min       max \n");
      printf("=======================================");
      printf("==========================\n");
#endif
   }
  
   /*-----------------------------------------------------
    *  Enter Statistics Loop
    *-----------------------------------------------------*/


   for (level = 0; level < num_levels-1; level++)
   {
    
      {
         P_diag = hypre_ParCSRMatrixDiag(P_array[level]);
         P_diag_data = hypre_CSRMatrixData(P_diag);
         P_diag_i = hypre_CSRMatrixI(P_diag);
         
         P_offd = hypre_ParCSRMatrixOffd(P_array[level]);   
         P_offd_data = hypre_CSRMatrixData(P_offd);
         P_offd_i = hypre_CSRMatrixI(P_offd);
         
         row_starts = hypre_ParCSRMatrixRowStarts(P_array[level]);
         
         fine_size = hypre_ParCSRMatrixGlobalNumRows(P_array[level]);
         coarse_size = hypre_ParCSRMatrixGlobalNumCols(P_array[level]);
         global_nonzeros = hypre_ParCSRMatrixNumNonzeros(P_array[level]);
         
         min_weight = 1.0;
         max_weight = 0.0;
         max_rowsum = 0.0;
         min_rowsum = 0.0;
         min_entries = 0;
         max_entries = 0;
         
         if (hypre_CSRMatrixNumRows(P_diag))
         {
            if (hypre_CSRMatrixNumCols(P_diag)) min_weight = P_diag_data[0];
            for (j = P_diag_i[0]; j < P_diag_i[1]; j++)
            {
               min_weight = hypre_min(min_weight, P_diag_data[j]);
               if (P_diag_data[j] != 1.0)
                  max_weight = hypre_max(max_weight, P_diag_data[j]);
               min_rowsum += P_diag_data[j];
            }
            for (j = P_offd_i[0]; j < P_offd_i[1]; j++)
            {        
               min_weight = hypre_min(min_weight, P_offd_data[j]); 
               if (P_offd_data[j] != 1.0)
                  max_weight = hypre_max(max_weight, P_offd_data[j]);     
               min_rowsum += P_offd_data[j];
            }
            
            max_rowsum = min_rowsum;
            
            min_entries = (P_diag_i[1]-P_diag_i[0])+(P_offd_i[1]-P_offd_i[0]); 
            max_entries = 0;
            
            for (j = 0; j < hypre_CSRMatrixNumRows(P_diag); j++)
            {
               entries = (P_diag_i[j+1]-P_diag_i[j])+(P_offd_i[j+1]-P_offd_i[j]);
               min_entries = hypre_min(entries, min_entries);
               max_entries = hypre_max(entries, max_entries);
               
               rowsum = 0.0;
               for (i = P_diag_i[j]; i < P_diag_i[j+1]; i++)
               {
                  min_weight = hypre_min(min_weight, P_diag_data[i]);
                  if (P_diag_data[i] != 1.0)
                     max_weight = hypre_max(max_weight, P_diag_data[i]);
                  rowsum += P_diag_data[i];
               }
               
               for (i = P_offd_i[j]; i < P_offd_i[j+1]; i++)
               {
                  min_weight = hypre_min(min_weight, P_offd_data[i]);
                  if (P_offd_data[i] != 1.0) 
                     max_weight = hypre_max(max_weight, P_offd_data[i]);
                  rowsum += P_offd_data[i];
               }
               
               min_rowsum = hypre_min(rowsum, min_rowsum);
               max_rowsum = hypre_max(rowsum, max_rowsum);
            }
         
         }
         avg_entries = ((double) global_nonzeros) / ((double) fine_size);
      }

#ifdef HYPRE_NO_GLOBAL_PARTITION

      numrows = (int)(row_starts[1]-row_starts[0]);
      if (!numrows) /* if we don't have any rows, then don't have this count toward
                       min row sum or min num entries */
      {
         min_entries = 1000000;
         min_rowsum =  1.0e7;
         min_weight = 1.0e7;
       }
       
      send_buff[0] = - (double) min_entries;
      send_buff[1] = (double) max_entries;
      send_buff[2] = - min_rowsum;
      send_buff[3] = max_rowsum;
      send_buff[4] = - min_weight;
      send_buff[5] = max_weight;

      MPI_Reduce(send_buff, gather_buff, 6, MPI_DOUBLE, MPI_MAX, 0, comm);

      if (my_id == 0)
      {
         global_min_e = - gather_buff[0];
         global_max_e = gather_buff[1];
         global_min_rsum = -gather_buff[2];
         global_max_rsum = gather_buff[3];
         global_min_wt = -gather_buff[4];
         global_max_wt = gather_buff[5];

#ifdef HYPRE_LONG_LONG
          printf( "%2d %12lld x %-12lld %3d %3d",
                 level, fine_size, coarse_size,  global_min_e, global_max_e);
#else          
          printf( "%2d %5d x %-5d %3d %3d",
                 level, fine_size, coarse_size,  global_min_e, global_max_e);
#endif          
         printf("  %10.3e %9.3e %9.3e %9.3e\n",
                global_min_wt, global_max_wt, 
                global_min_rsum, global_max_rsum);
      }


#else
      
      send_buff[0] = (double) min_entries;
      send_buff[1] = (double) max_entries;
      send_buff[2] = min_rowsum;
      send_buff[3] = max_rowsum;
      send_buff[4] = min_weight;
      send_buff[5] = max_weight;
      
      MPI_Gather(send_buff,6,MPI_DOUBLE,gather_buff,6,MPI_DOUBLE,0,comm);
      
      if (my_id == 0)
      {
         global_min_e = 1000000;
         global_max_e = 0;
         global_min_rsum = 1.0e7;
         global_max_rsum = 0.0;
         global_min_wt = 1.0e7;
         global_max_wt = 0.0;
         
         for (j = 0; j < num_procs; j++)
         {
            numrows = row_starts[j+1] - row_starts[j];
            if (numrows)
            {
               global_min_e = hypre_min(global_min_e, (int) gather_buff[j*6]);
               global_min_rsum = hypre_min(global_min_rsum, gather_buff[j*6+2]);
               global_min_wt = hypre_min(global_min_wt, gather_buff[j*6+4]);
            }
            global_max_e = hypre_max(global_max_e, (int) gather_buff[j*6+1]);
            global_max_rsum = hypre_max(global_max_rsum, gather_buff[j*6+3]);
            global_max_wt = hypre_max(global_max_wt, gather_buff[j*6+5]);
         }
         
#ifdef HYPRE_LONG_LONG
         printf( "%2d %12lld x %-12lld %3d %3d",
                 level, fine_size, coarse_size,  global_min_e, global_max_e);
#else          
         printf( "%2d %5d x %-5d %3d %3d",
                 level, fine_size, coarse_size,  global_min_e, global_max_e);
#endif          
         printf("  %10.3e %9.3e %9.3e %9.3e\n",
                global_min_wt, global_max_wt, 
                global_min_rsum, global_max_rsum);
      }

#endif

   }


   total_variables = 0;
   operat_cmplxty = 0;
   for (j=0;j<hypre_ParAMGDataNumLevels(amg_data);j++)
   {
      operat_cmplxty +=  num_coeffs[j] / num_coeffs[0];
      total_variables += num_variables[j];
   }
   if (num_variables[0] != 0)
      grid_cmplxty = total_variables / num_variables[0];
 
   if (my_id == 0 )
   {
      printf("\n\n     Complexity:    grid = %f\n",grid_cmplxty);
      printf("                operator = %f\n",operat_cmplxty);
   }

   if (my_id == 0) printf("\n\n");

   if (my_id == 0)
   { 
      printf("\n\nBoomerAMG SOLVER PARAMETERS:\n\n");
      printf( "  Maximum number of cycles:         %d \n",max_iter);
      printf( "  Stopping Tolerance:               %e \n",tol); 
      printf( "  Cycle type (1 = V, 2 = W, etc.):  %d\n\n", cycle_type);
      printf( "  Relaxation Parameters:\n");
      printf( "   Visiting Grid:                     down   up  coarse\n");
      printf( "            Number of partial sweeps: %4d   %2d  %4d \n",
              num_grid_sweeps[1],
              num_grid_sweeps[2],num_grid_sweeps[3]);
      printf( "   Type 0=Jac, 3=hGS, 6=hSGS, 9=GE:   %4d   %2d  %4d \n",
              grid_relax_type[1],
              grid_relax_type[2],grid_relax_type[3]);
#if 1 /* TO DO: may not want this to print if CG in the coarse grid */
      printf( "   Point types, partial sweeps (1=C, -1=F):\n");
      if (grid_relax_points)
      {
         printf( "                  Pre-CG relaxation (down):");
         for (j = 0; j < num_grid_sweeps[1]; j++)
              printf("  %2d", grid_relax_points[1][j]);
         printf( "\n");
         printf( "                   Post-CG relaxation (up):");
         for (j = 0; j < num_grid_sweeps[2]; j++)
              printf("  %2d", grid_relax_points[2][j]);
         printf( "\n");
         printf( "                             Coarsest grid:");
         for (j = 0; j < num_grid_sweeps[3]; j++)
              printf("  %2d", grid_relax_points[3][j]);
         printf( "\n\n");
      }
      else if (relax_order == 1)
      {
         printf( "                  Pre-CG relaxation (down):");
         for (j = 0; j < num_grid_sweeps[1]; j++)
              printf("  %2d  %2d", one, minus_one);
         printf( "\n");
         printf( "                   Post-CG relaxation (up):");
         for (j = 0; j < num_grid_sweeps[2]; j++)
              printf("  %2d  %2d", minus_one, one);
         printf( "\n");
         printf( "                             Coarsest grid:");
         for (j = 0; j < num_grid_sweeps[3]; j++)
              printf("  %2d", zero);
         printf( "\n\n");
      }
      else 
      {
         printf( "                  Pre-CG relaxation (down):");
         for (j = 0; j < num_grid_sweeps[1]; j++)
              printf("  %2d", zero);
         printf( "\n");
         printf( "                   Post-CG relaxation (up):");
         for (j = 0; j < num_grid_sweeps[2]; j++)
              printf("  %2d", zero);
         printf( "\n");
         printf( "                             Coarsest grid:");
         for (j = 0; j < num_grid_sweeps[3]; j++)
              printf("  %2d", zero);
         printf( "\n\n");
      }
#endif
      if (smooth_type == 6)
         for (j=0; j < smooth_num_levels; j++)
            printf( " Schwarz Relaxation Weight %f level %d\n",
			hypre_ParAMGDataSchwarzRlxWeight(amg_data),j);
      for (j=0; j < num_levels; j++)
         if (relax_weight[j] != 1)
	       printf( " Relaxation Weight %f level %d\n",relax_weight[j],j);
      for (j=0; j < num_levels; j++)
         if (omega[j] != 1)
               printf( " Outer relaxation weight %f level %d\n",omega[j],j);
   }


   /*if (seq_cg) 
   {
      hypre_seqAMGSetupStats(amg_data,num_coeffs[0],num_variables[0], 
                             operat_cmplxty, grid_cmplxty );
   }*/
   




   hypre_TFree(num_coeffs);
   hypre_TFree(num_variables);
   hypre_TFree(send_buff);
   hypre_TFree(gather_buff);
   
   return(0);
}  
Пример #4
0
int
hypre_CSRMatrixMatvecT( double           alpha,
               hypre_CSRMatrix *A,
               hypre_Vector    *x,
               double           beta,
               hypre_Vector    *y     )
{
   double     *A_data    = hypre_CSRMatrixData(A);
   int        *A_i       = hypre_CSRMatrixI(A);
   int        *A_j       = hypre_CSRMatrixJ(A);
   int         num_rows  = hypre_CSRMatrixNumRows(A);
   int         num_cols  = hypre_CSRMatrixNumCols(A);

   double     *x_data = hypre_VectorData(x);
   double     *y_data = hypre_VectorData(y);
   int         x_size = hypre_VectorSize(x);
   int         y_size = hypre_VectorSize(y);
   int         num_vectors = hypre_VectorNumVectors(x);
   int         idxstride_y = hypre_VectorIndexStride(y);
   int         vecstride_y = hypre_VectorVectorStride(y);
   int         idxstride_x = hypre_VectorIndexStride(x);
   int         vecstride_x = hypre_VectorVectorStride(x);

   double      temp;

   int         i, i1, j, jv, jj, ns, ne, size, rest;
   int         num_threads;

   int         ierr  = 0;

   /*---------------------------------------------------------------------
    *  Check for size compatibility.  MatvecT returns ierr = 1 if
    *  length of X doesn't equal the number of rows of A,
    *  ierr = 2 if the length of Y doesn't equal the number of 
    *  columns of A, and ierr = 3 if both are true.
    *
    *  Because temporary vectors are often used in MatvecT, none of 
    *  these conditions terminates processing, and the ierr flag
    *  is informational only.
    *--------------------------------------------------------------------*/

    hypre_assert( num_vectors == hypre_VectorNumVectors(y) );
 
    if (num_rows != x_size)
              ierr = 1;

    if (num_cols != y_size)
              ierr = 2;

    if (num_rows != x_size && num_cols != y_size)
              ierr = 3;
   /*-----------------------------------------------------------------------
    * Do (alpha == 0.0) computation - RDF: USE MACHINE EPS
    *-----------------------------------------------------------------------*/

   if (alpha == 0.0)
   {

      for (i = 0; i < num_cols*num_vectors; i++)
	 y_data[i] *= beta;

      return ierr;
   }

   /*-----------------------------------------------------------------------
    * y = (beta/alpha)*y
    *-----------------------------------------------------------------------*/

   temp = beta / alpha;
   
   if (temp != 1.0)
   {
      if (temp == 0.0)
      {

	 for (i = 0; i < num_cols*num_vectors; i++)
	    y_data[i] = 0.0;
      }
      else
      {

	 for (i = 0; i < num_cols*num_vectors; i++)
	    y_data[i] *= temp;
      }
   }

   /*-----------------------------------------------------------------
    * y += A^T*x
    *-----------------------------------------------------------------*/
   num_threads = hypre_NumThreads();
   if (num_threads > 1)
   {


      for (i1 = 0; i1 < num_threads; i1++)
      {
         size = num_cols/num_threads;
         rest = num_cols - size*num_threads;
         if (i1 < rest)
         {
            ns = i1*size+i1-1;
            ne = (i1+1)*size+i1+1;
         }
         else
         {
            ns = i1*size+rest-1;
            ne = (i1+1)*size+rest;
         }
         if ( num_vectors==1 )
         {
            for (i = 0; i < num_rows; i++)
            {
               for (jj = A_i[i]; jj < A_i[i+1]; jj++)
               {
                  j = A_j[jj];
                  if (j > ns && j < ne)
                     y_data[j] += A_data[jj] * x_data[i];
               }
            }
         }
         else
         {
            for (i = 0; i < num_rows; i++)
            {
               for ( jv=0; jv<num_vectors; ++jv )
               {
                  for (jj = A_i[i]; jj < A_i[i+1]; jj++)
                  {
                     j = A_j[jj];
                     if (j > ns && j < ne)
                        y_data[ j*idxstride_y + jv*vecstride_y ] +=
                           A_data[jj] * x_data[ i*idxstride_x + jv*vecstride_x];
                  }
               }
            }
         }

      }
   }
   else 
   {
      for (i = 0; i < num_rows; i++)
      {
         if ( num_vectors==1 )
         {
            for (jj = A_i[i]; jj < A_i[i+1]; jj++)
            {
               j = A_j[jj];
               y_data[j] += A_data[jj] * x_data[i];
            }
         }
         else
         {
            for ( jv=0; jv<num_vectors; ++jv )
            {
               for (jj = A_i[i]; jj < A_i[i+1]; jj++)
               {
                  j = A_j[jj];
                  y_data[ j*idxstride_y + jv*vecstride_y ] +=
                     A_data[jj] * x_data[ i*idxstride_x + jv*vecstride_x ];
               }
            }
         }
      }
   }
   /*-----------------------------------------------------------------
    * y = alpha*y
    *-----------------------------------------------------------------*/

   if (alpha != 1.0)
   {

      for (i = 0; i < num_cols*num_vectors; i++)
	 y_data[i] *= alpha;
   }

   return ierr;
}
Пример #5
0
HYPRE_Int 
hypre_MaxwellSolve2( void                * maxwell_vdata,
                     hypre_SStructMatrix * A_in,
                     hypre_SStructVector * f,
                     hypre_SStructVector * u )
{
   hypre_MaxwellData     *maxwell_data = maxwell_vdata;

   hypre_ParVector       *f_edge;
   hypre_ParVector       *u_edge;

   HYPRE_Int              max_iter     = maxwell_data-> max_iter;
   double                 tol          = maxwell_data-> tol;
   HYPRE_Int              rel_change   = maxwell_data-> rel_change;
   HYPRE_Int              zero_guess   = maxwell_data-> zero_guess;
   HYPRE_Int              npre_relax   = maxwell_data-> num_pre_relax;
   HYPRE_Int              npost_relax  = maxwell_data-> num_post_relax;

   hypre_ParCSRMatrix   **Ann_l        = maxwell_data-> Ann_l;
   hypre_ParCSRMatrix   **Pn_l         = maxwell_data-> Pn_l;
   hypre_ParCSRMatrix   **RnT_l        = maxwell_data-> RnT_l;
   hypre_ParVector      **bn_l         = maxwell_data-> bn_l;
   hypre_ParVector      **xn_l         = maxwell_data-> xn_l;
   hypre_ParVector      **resn_l       = maxwell_data-> resn_l;
   hypre_ParVector      **en_l         = maxwell_data-> en_l;
   hypre_ParVector      **nVtemp2_l    = maxwell_data-> nVtemp2_l;
   HYPRE_Int            **nCF_marker_l = maxwell_data-> nCF_marker_l;
   double                *nrelax_weight= maxwell_data-> nrelax_weight;
   double                *nomega       = maxwell_data-> nomega;
   HYPRE_Int              nrelax_type  = maxwell_data-> nrelax_type;
   HYPRE_Int              node_numlevs = maxwell_data-> node_numlevels;

   hypre_ParCSRMatrix    *Tgrad        = maxwell_data-> Tgrad;
   hypre_ParCSRMatrix    *T_transpose  = maxwell_data-> T_transpose;

   hypre_ParCSRMatrix   **Aee_l        = maxwell_data-> Aee_l;
   hypre_IJMatrix       **Pe_l         = maxwell_data-> Pe_l;
   hypre_IJMatrix       **ReT_l        = maxwell_data-> ReT_l;
   hypre_ParVector      **be_l         = maxwell_data-> be_l;
   hypre_ParVector      **xe_l         = maxwell_data-> xe_l;
   hypre_ParVector      **rese_l       = maxwell_data-> rese_l;
   hypre_ParVector      **ee_l         = maxwell_data-> ee_l;
   hypre_ParVector      **eVtemp2_l    = maxwell_data-> eVtemp2_l;
   HYPRE_Int            **eCF_marker_l = maxwell_data-> eCF_marker_l;
   double                *erelax_weight= maxwell_data-> erelax_weight;
   double                *eomega       = maxwell_data-> eomega;
   HYPRE_Int              erelax_type  = maxwell_data-> erelax_type;
   HYPRE_Int              edge_numlevs = maxwell_data-> edge_numlevels;

   HYPRE_Int            **BdryRanks_l  = maxwell_data-> BdryRanks_l;
   HYPRE_Int             *BdryRanksCnts_l= maxwell_data-> BdryRanksCnts_l;

   HYPRE_Int              logging      = maxwell_data-> logging;
   double                *norms        = maxwell_data-> norms;
   double                *rel_norms    = maxwell_data-> rel_norms;

   HYPRE_Int              Solve_err_flag;
   HYPRE_Int              relax_local, cycle_param;
                                                                                                            
   double                 b_dot_b = 0, r_dot_r, eps = 0;
   double                 e_dot_e, x_dot_x;

   HYPRE_Int              i, j;
   HYPRE_Int              level;

   HYPRE_Int              ierr= 0;

   
   /* added for the relaxation routines */
   hypre_ParVector *ze = NULL;

   if (hypre_NumThreads() > 1)
   {
      /* Aee is always bigger than Ann */

      ze = hypre_ParVectorCreate(hypre_ParCSRMatrixComm(Aee_l[0]),
                                hypre_ParCSRMatrixGlobalNumRows(Aee_l[0]),
                                hypre_ParCSRMatrixRowStarts(Aee_l[0]));
      hypre_ParVectorInitialize(ze);
      hypre_ParVectorSetPartitioningOwner(ze,0);

   }

   hypre_BeginTiming(maxwell_data-> time_index);

   hypre_SStructVectorConvert(f, &f_edge);
   hypre_SStructVectorConvert(u, &u_edge);
   hypre_ParVectorZeroBCValues(f_edge, BdryRanks_l[0], BdryRanksCnts_l[0]);
   hypre_ParVectorZeroBCValues(u_edge, BdryRanks_l[0], BdryRanksCnts_l[0]);
   be_l[0]= f_edge;
   xe_l[0]= u_edge;

  /* the nodal fine vectors: xn= 0. bn= T'*(be- Aee*xe) is updated in the cycle. */
   hypre_ParVectorSetConstantValues(xn_l[0], 0.0);

   relax_local= 0;
   cycle_param= 0;

  (maxwell_data-> num_iterations) = 0;
  /* if max_iter is zero, return */
   if (max_iter == 0)
   {
      /* if using a zero initial guess, return zero */
      if (zero_guess)
      {
         hypre_ParVectorSetConstantValues(xe_l[0], 0.0);
      }
                                                                                                            
      hypre_EndTiming(maxwell_data -> time_index);
      return ierr;
   }
                                                                                                            
   /* part of convergence check */
   if (tol > 0.0)
   {
      /* eps = (tol^2) */
      b_dot_b= hypre_ParVectorInnerProd(be_l[0], be_l[0]);
      eps = tol*tol;
                                                                                                            
      /* if rhs is zero, return a zero solution */
      if (b_dot_b == 0.0)
      {
         hypre_ParVectorSetConstantValues(xe_l[0], 0.0);
         if (logging > 0)
         {
            norms[0]     = 0.0;
            rel_norms[0] = 0.0;
         }
                                                                                                            
         hypre_EndTiming(maxwell_data -> time_index);
         return ierr;
      }
   }

   /*-----------------------------------------------------
    * Do V-cycles:
    * For each index l, "fine" = l, "coarse" = (l-1)
    *   
    *   solution update:
    *      edge_sol= edge_sol + T*node_sol
    *-----------------------------------------------------*/
   for (i = 0; i < max_iter; i++)
   {
     /* compute fine grid residual & nodal rhs. */
      hypre_ParVectorCopy(be_l[0], rese_l[0]);
      hypre_ParCSRMatrixMatvec(-1.0, Aee_l[0], xe_l[0], 1.0, rese_l[0]);
      hypre_ParVectorZeroBCValues(rese_l[0], BdryRanks_l[0], BdryRanksCnts_l[0]);
      hypre_ParCSRMatrixMatvec(1.0, T_transpose, rese_l[0], 0.0, bn_l[0]);

      /* convergence check */
      if (tol > 0.0)
      {
         r_dot_r= hypre_ParVectorInnerProd(rese_l[0], rese_l[0]);

         if (logging > 0)
         {
            norms[i] = sqrt(r_dot_r);
            if (b_dot_b > 0)
               rel_norms[i] = sqrt(r_dot_r/b_dot_b);
            else
               rel_norms[i] = 0.0;
         }
                                                                                                            
         /* always do at least 1 V-cycle */
         if ((r_dot_r/b_dot_b < eps) && (i > 0))
         {
            if (rel_change)
            {
               if ((e_dot_e/x_dot_x) < eps)
                  break;
            }
            else
            {
               break;
            }
         }
      }

      hypre_ParVectorCopy(bn_l[0], resn_l[0]);
      hypre_ParCSRMatrixMatvec(-1.0, Ann_l[0], xn_l[0], 1.0, resn_l[0]);
      r_dot_r= hypre_ParVectorInnerProd(resn_l[0], resn_l[0]);

      for (level= 0; level<= node_numlevs-2; level++)
      {
         /*-----------------------------------------------
          * Down cycle
          *-----------------------------------------------*/
          for (j= 0; j< npre_relax; j++)
          {
             Solve_err_flag = hypre_BoomerAMGRelaxIF(Ann_l[level],
                                                     bn_l[level],
                                                     nCF_marker_l[level],
                                                     nrelax_type,
                                                     relax_local,
                                                     cycle_param,
                                                     nrelax_weight[level],
                                                     nomega[level],
                                                     NULL,
                                                     xn_l[level],
                                                     nVtemp2_l[level],
                                                     ze);
          }  /*for (j= 0; j< npre_relax; j++) */

         /* compute residuals */
          hypre_ParVectorCopy(bn_l[level], resn_l[level]);
          hypre_ParCSRMatrixMatvec(-1.0, Ann_l[level], xn_l[level], 
                                    1.0, resn_l[level]);

         /* restrict residuals */
          hypre_ParCSRMatrixMatvecT(1.0, RnT_l[level], resn_l[level],
                                    0.0, bn_l[level+1]);

         /* zero off initial guess for the next level */
          hypre_ParVectorSetConstantValues(xn_l[level+1], 0.0);

      }  /* for (level= 0; level<= node_numlevs-2; level++) */
 
      /* coarsest node solve */
      level= node_numlevs-1;
      Solve_err_flag = hypre_BoomerAMGRelaxIF(Ann_l[level],
                                              bn_l[level],
                                              nCF_marker_l[level],
                                              nrelax_type,
                                              relax_local,
                                              cycle_param,
                                              nrelax_weight[level],
                                              nomega[level],
                                              NULL,
                                              xn_l[level],
                                              nVtemp2_l[level],
                                              ze);

     /*---------------------------------------------------------------------
      *  Cycle up the levels.
      *---------------------------------------------------------------------*/
      for (level= (node_numlevs - 2); level>= 1; level--)
      {
          hypre_ParCSRMatrixMatvec(1.0, Pn_l[level], xn_l[level+1], 0.0,
                                   en_l[level]);
          hypre_ParVectorAxpy(1.0, en_l[level], xn_l[level]);

         /* post smooth */
          for (j= 0; j< npost_relax; j++)
          {
             Solve_err_flag = hypre_BoomerAMGRelaxIF(Ann_l[level],
                                                     bn_l[level],
                                                     nCF_marker_l[level],
                                                     nrelax_type,
                                                     relax_local,
                                                     cycle_param,
                                                     nrelax_weight[level],
                                                     nomega[level],
                                                     NULL,
                                                     xn_l[level],
                                                     nVtemp2_l[level],
                                                     ze);
          }
      }   /* for (level= (en_numlevs - 2); level>= 1; level--) */

      /* interpolate error and correct on finest grids */
      hypre_ParCSRMatrixMatvec(1.0, Pn_l[0], xn_l[1], 0.0, en_l[0]);
      hypre_ParVectorAxpy(1.0, en_l[0], xn_l[0]);
                                                                                                              
      for (j= 0; j< npost_relax; j++)
      {
         Solve_err_flag = hypre_BoomerAMGRelaxIF(Ann_l[0],
                                                 bn_l[0],
                                                 nCF_marker_l[0],
                                                 nrelax_type,
                                                 relax_local,
                                                 cycle_param,
                                                 nrelax_weight[0],
                                                 nomega[0],
                                                 NULL,
                                                 xn_l[0],
                                                 nVtemp2_l[0],
                                                 ze);
      }  /* for (j= 0; j< npost_relax; j++) */
      hypre_ParVectorCopy(bn_l[0], resn_l[0]);
      hypre_ParCSRMatrixMatvec(-1.0, Ann_l[0], xn_l[0], 1.0, resn_l[0]);

      /* add the gradient solution component to xe_l[0] */
      hypre_ParCSRMatrixMatvec(1.0, Tgrad, xn_l[0], 1.0, xe_l[0]);

      hypre_ParVectorCopy(be_l[0], rese_l[0]);
      hypre_ParCSRMatrixMatvec(-1.0, Aee_l[0], xe_l[0], 1.0, rese_l[0]);
      r_dot_r= hypre_ParVectorInnerProd(rese_l[0], rese_l[0]);

      for (level= 0; level<= edge_numlevs-2; level++)
      {
         /*-----------------------------------------------
          * Down cycle
          *-----------------------------------------------*/
          for (j= 0; j< npre_relax; j++)
          {
             Solve_err_flag = hypre_BoomerAMGRelaxIF(Aee_l[level],
                                                     be_l[level],
                                                     eCF_marker_l[level],
                                                     erelax_type,
                                                     relax_local,
                                                     cycle_param,
                                                     erelax_weight[level],
                                                     eomega[level],
                                                     NULL,
                                                     xe_l[level],
                                                     eVtemp2_l[level], 
                                                     ze);
          }  /*for (j= 0; j< npre_relax; j++) */
                                                                                                              
         /* compute residuals */
          hypre_ParVectorCopy(be_l[level], rese_l[level]);
          hypre_ParCSRMatrixMatvec(-1.0, Aee_l[level], xe_l[level],
                                    1.0, rese_l[level]);

         /* restrict residuals */
          hypre_ParCSRMatrixMatvecT(1.0,
             (hypre_ParCSRMatrix *) hypre_IJMatrixObject(ReT_l[level]),
                                    rese_l[level], 0.0, be_l[level+1]);
          hypre_ParVectorZeroBCValues(be_l[level+1], BdryRanks_l[level+1],
                                      BdryRanksCnts_l[level+1]);

         /* zero off initial guess for the next level */
          hypre_ParVectorSetConstantValues(xe_l[level+1], 0.0);
                                                                                                              
      }  /* for (level= 1; level<= edge_numlevels-2; level++) */
                                                                                                              
      /* coarsest edge solve */
      level= edge_numlevs-1;
      for (j= 0; j< npre_relax; j++)
      {
         Solve_err_flag = hypre_BoomerAMGRelaxIF(Aee_l[level],
                                                 be_l[level],
                                                 eCF_marker_l[level],
                                                 erelax_type,
                                                 relax_local,
                                                 cycle_param,
                                                 erelax_weight[level],
                                                 eomega[level],
                                                 NULL,
                                                 xe_l[level],
                                                 eVtemp2_l[level], 
                                                 ze);
      }

     /*---------------------------------------------------------------------
      *  Up cycle. 
      *---------------------------------------------------------------------*/
      for (level= (edge_numlevs - 2); level>= 1; level--)
      {
         hypre_ParCSRMatrixMatvec(1.0, 
           (hypre_ParCSRMatrix *) hypre_IJMatrixObject(Pe_l[level]), 
                                  xe_l[level+1], 0.0, ee_l[level]);
         hypre_ParVectorZeroBCValues(ee_l[level], BdryRanks_l[level],
                                     BdryRanksCnts_l[level]);
         hypre_ParVectorAxpy(1.0, ee_l[level], xe_l[level]);

         /* post smooth */
         for (j= 0; j< npost_relax; j++)
         {
            Solve_err_flag = hypre_BoomerAMGRelaxIF(Aee_l[level],
                                                    be_l[level],
                                                    eCF_marker_l[level],
                                                    erelax_type,
                                                    relax_local,
                                                    cycle_param,
                                                    erelax_weight[level],
                                                    eomega[level],
                                                    NULL,
                                                    xe_l[level],
                                                    eVtemp2_l[level], 
                                                    ze);
         }

      }  /* for (level= (edge_numlevs - 2); level>= 1; level--) */

      /* interpolate error and correct on finest grids */
      hypre_ParCSRMatrixMatvec(1.0, 
        (hypre_ParCSRMatrix *) hypre_IJMatrixObject(Pe_l[0]), 
                               xe_l[1], 0.0, ee_l[0]);
      hypre_ParVectorZeroBCValues(ee_l[0], BdryRanks_l[0],
                                  BdryRanksCnts_l[0]);
      hypre_ParVectorAxpy(1.0, ee_l[0], xe_l[0]);

      for (j= 0; j< npost_relax; j++)
      {
         Solve_err_flag = hypre_BoomerAMGRelaxIF(Aee_l[0],
                                                 be_l[0],
                                                 eCF_marker_l[0],
                                                 erelax_type,
                                                 relax_local,
                                                 cycle_param,
                                                 erelax_weight[0],
                                                 eomega[0],
                                                 NULL,
                                                 xe_l[0],
                                                 eVtemp2_l[0],
                                                 ze);
      }  /* for (j= 0; j< npost_relax; j++) */

      e_dot_e= hypre_ParVectorInnerProd(ee_l[0], ee_l[0]);
      x_dot_x= hypre_ParVectorInnerProd(xe_l[0], xe_l[0]);

      hypre_ParVectorCopy(be_l[0], rese_l[0]);
      hypre_ParCSRMatrixMatvec(-1.0, Aee_l[0], xe_l[0], 1.0, rese_l[0]);

      (maxwell_data -> num_iterations) = (i + 1);
   }

   hypre_EndTiming(maxwell_data -> time_index);


   if (ze)
      hypre_ParVectorDestroy(ze);

   return ierr;
}
Пример #6
0
int
hypre_CSRMatrixMatvecT( double           alpha,
               hypre_CSRMatrix *A,
               hypre_Vector    *x,
               double           beta,
               hypre_Vector    *y     )
{
   double     *A_data    = hypre_CSRMatrixData(A);
   int        *A_i       = hypre_CSRMatrixI(A);
   int        *A_j       = hypre_CSRMatrixJ(A);
   int         num_rows  = hypre_CSRMatrixNumRows(A);
   int         num_cols  = hypre_CSRMatrixNumCols(A);

   double     *x_data = hypre_VectorData(x);
   double     *y_data = hypre_VectorData(y);
   int         x_size = hypre_VectorSize(x);
   int         y_size = hypre_VectorSize(y);
   int         num_vectors = hypre_VectorNumVectors(x);
   int         idxstride_y = hypre_VectorIndexStride(y);
   int         vecstride_y = hypre_VectorVectorStride(y);
   int         idxstride_x = hypre_VectorIndexStride(x);
   int         vecstride_x = hypre_VectorVectorStride(x);

   double      temp;

   double      *y_data_expand = NULL;
   int         offset = 0;
#ifdef HYPRE_USING_OPENMP
   int         my_thread_num = 0;
#endif
   
   int         i, j, jv, jj;
   int         num_threads;

   int         ierr  = 0;

   /*---------------------------------------------------------------------
    *  Check for size compatibility.  MatvecT returns ierr = 1 if
    *  length of X doesn't equal the number of rows of A,
    *  ierr = 2 if the length of Y doesn't equal the number of 
    *  columns of A, and ierr = 3 if both are true.
    *
    *  Because temporary vectors are often used in MatvecT, none of 
    *  these conditions terminates processing, and the ierr flag
    *  is informational only.
    *--------------------------------------------------------------------*/

    hypre_assert( num_vectors == hypre_VectorNumVectors(y) );
 
    if (num_rows != x_size)
              ierr = 1;

    if (num_cols != y_size)
              ierr = 2;

    if (num_rows != x_size && num_cols != y_size)
              ierr = 3;
   /*-----------------------------------------------------------------------
    * Do (alpha == 0.0) computation - RDF: USE MACHINE EPS
    *-----------------------------------------------------------------------*/

   if (alpha == 0.0)
   {
#ifdef HYPRE_USING_OPENMP
#pragma omp parallel for private(i) schedule(static)
#endif
      for (i = 0; i < num_cols*num_vectors; i++)
	 y_data[i] *= beta;

      return ierr;
   }

   /*-----------------------------------------------------------------------
    * y = (beta/alpha)*y
    *-----------------------------------------------------------------------*/

   temp = beta / alpha;
   
   if (temp != 1.0)
   {
      if (temp == 0.0)
      {
#ifdef HYPRE_USING_OPENMP
#pragma omp parallel for private(i) schedule(static)
#endif
	 for (i = 0; i < num_cols*num_vectors; i++)
	    y_data[i] = 0.0;
      }
      else
      {
#ifdef HYPRE_USING_OPENMP
#pragma omp parallel for private(i) schedule(static)
#endif
	 for (i = 0; i < num_cols*num_vectors; i++)
	    y_data[i] *= temp;
      }
   }

   /*-----------------------------------------------------------------
    * y += A^T*x
    *-----------------------------------------------------------------*/
   num_threads = hypre_NumThreads();
   if (num_threads > 1)
   {
      y_data_expand = hypre_CTAlloc(double, num_threads*y_size);
      
      if ( num_vectors==1 )
      {

#ifdef HYPRE_USING_OPENMP
#pragma omp parallel private(i,jj,j, my_thread_num, offset)    
         {                                      
            my_thread_num = omp_get_thread_num();
            offset =  y_size*my_thread_num;
#pragma omp for schedule(static)
#endif
            for (i = 0; i < num_rows; i++)
            {
               for (jj = A_i[i]; jj < A_i[i+1]; jj++)
               {
                  j = A_j[jj];
                  y_data_expand[offset + j] += A_data[jj] * x_data[i];
               }
            }
#ifdef HYPRE_USING_OPENMP
            /* implied barrier */           
#pragma omp for schedule(static)
#endif
            for (i = 0; i < y_size; i++)
            {
               for (j = 0; j < num_threads; j++)
               {
                  y_data[i] += y_data_expand[j*y_size + i];
                  /*y_data_expand[j*y_size + i] = 0; //zero out for next time */
               }
            }
#ifdef HYPRE_USING_OPENMP
         } /* end parallel region */
#endif         
         hypre_TFree(y_data_expand);
      }
      else
      {
         /* MULTIPLE VECTORS NOT THREADED YET */
         for (i = 0; i < num_rows; i++)
         {
            for ( jv=0; jv<num_vectors; ++jv )
            {
               for (jj = A_i[i]; jj < A_i[i+1]; jj++)
               {
                  j = A_j[jj];
                  y_data[ j*idxstride_y + jv*vecstride_y ] +=
                     A_data[jj] * x_data[ i*idxstride_x + jv*vecstride_x];
               }
            }
         }
      }

      hypre_TFree(y_data_expand);
   }
Пример #7
0
HYPRE_Int
hypre_CSRMatrixMatvecT( HYPRE_Complex    alpha,
                        hypre_CSRMatrix *A,
                        hypre_Vector    *x,
                        HYPRE_Complex    beta,
                        hypre_Vector    *y     )
{
   HYPRE_Complex    *A_data    = hypre_CSRMatrixData(A);
   HYPRE_Int        *A_i       = hypre_CSRMatrixI(A);
   HYPRE_Int        *A_j       = hypre_CSRMatrixJ(A);
   HYPRE_Int         num_rows  = hypre_CSRMatrixNumRows(A);
   HYPRE_Int         num_cols  = hypre_CSRMatrixNumCols(A);

   HYPRE_Complex    *x_data = hypre_VectorData(x);
   HYPRE_Complex    *y_data = hypre_VectorData(y);
   HYPRE_Int         x_size = hypre_VectorSize(x);
   HYPRE_Int         y_size = hypre_VectorSize(y);
   HYPRE_Int         num_vectors = hypre_VectorNumVectors(x);
   HYPRE_Int         idxstride_y = hypre_VectorIndexStride(y);
   HYPRE_Int         vecstride_y = hypre_VectorVectorStride(y);
   HYPRE_Int         idxstride_x = hypre_VectorIndexStride(x);
   HYPRE_Int         vecstride_x = hypre_VectorVectorStride(x);

   HYPRE_Complex     temp;

   HYPRE_Complex    *y_data_expand;
   HYPRE_Int         my_thread_num = 0, offset = 0;
   
   HYPRE_Int         i, j, jv, jj;
   HYPRE_Int         num_threads;

   HYPRE_Int         ierr  = 0;

   hypre_Vector     *x_tmp = NULL;

   /*---------------------------------------------------------------------
    *  Check for size compatibility.  MatvecT returns ierr = 1 if
    *  length of X doesn't equal the number of rows of A,
    *  ierr = 2 if the length of Y doesn't equal the number of 
    *  columns of A, and ierr = 3 if both are true.
    *
    *  Because temporary vectors are often used in MatvecT, none of 
    *  these conditions terminates processing, and the ierr flag
    *  is informational only.
    *--------------------------------------------------------------------*/

   hypre_assert( num_vectors == hypre_VectorNumVectors(y) );
 
   if (num_rows != x_size)
      ierr = 1;

   if (num_cols != y_size)
      ierr = 2;

   if (num_rows != x_size && num_cols != y_size)
      ierr = 3;
   /*-----------------------------------------------------------------------
    * Do (alpha == 0.0) computation - RDF: USE MACHINE EPS
    *-----------------------------------------------------------------------*/

   if (alpha == 0.0)
   {
#ifdef HYPRE_USING_OPENMP
#pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE
#endif
      for (i = 0; i < num_cols*num_vectors; i++)
         y_data[i] *= beta;

      return ierr;
   }

   if (x == y)
   {
      x_tmp = hypre_SeqVectorCloneDeep(x);
      x_data = hypre_VectorData(x_tmp);
   }

   /*-----------------------------------------------------------------------
    * y = (beta/alpha)*y
    *-----------------------------------------------------------------------*/

   temp = beta / alpha;
   
   if (temp != 1.0)
   {
      if (temp == 0.0)
      {
#ifdef HYPRE_USING_OPENMP
#pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE
#endif
         for (i = 0; i < num_cols*num_vectors; i++)
            y_data[i] = 0.0;
      }
      else
      {
#ifdef HYPRE_USING_OPENMP
#pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE
#endif
         for (i = 0; i < num_cols*num_vectors; i++)
            y_data[i] *= temp;
      }
   }

   /*-----------------------------------------------------------------
    * y += A^T*x
    *-----------------------------------------------------------------*/
   num_threads = hypre_NumThreads();
   if (num_threads > 1)
   {
      y_data_expand = hypre_CTAlloc(HYPRE_Complex, num_threads*y_size);

      if ( num_vectors==1 )
      {

#ifdef HYPRE_USING_OPENMP
#pragma omp parallel private(i,jj,j,my_thread_num,offset)
#endif
         {                                      
            my_thread_num = hypre_GetThreadNum();
            offset =  y_size*my_thread_num;
#ifdef HYPRE_USING_OPENMP
#pragma omp for HYPRE_SMP_SCHEDULE
#endif
            for (i = 0; i < num_rows; i++)
            {
               for (jj = A_i[i]; jj < A_i[i+1]; jj++)
               {
                  j = A_j[jj];
                  y_data_expand[offset + j] += A_data[jj] * x_data[i];
               }
            }

            /* implied barrier (for threads)*/           
#ifdef HYPRE_USING_OPENMP
#pragma omp for HYPRE_SMP_SCHEDULE
#endif
            for (i = 0; i < y_size; i++)
            {
               for (j = 0; j < num_threads; j++)
               {
                  y_data[i] += y_data_expand[j*y_size + i];
                  
               }
            }

         } /* end parallel threaded region */
      }
      else
      {
         /* multiple vector case is not threaded */
         for (i = 0; i < num_rows; i++)
         {
            for ( jv=0; jv<num_vectors; ++jv )
            {
               for (jj = A_i[i]; jj < A_i[i+1]; jj++)
               {
                  j = A_j[jj];
                  y_data[ j*idxstride_y + jv*vecstride_y ] +=
                     A_data[jj] * x_data[ i*idxstride_x + jv*vecstride_x];
               }
            }
         }
      }

      hypre_TFree(y_data_expand);

   }
   else 
   {
      for (i = 0; i < num_rows; i++)
      {
         if ( num_vectors==1 )
         {
            for (jj = A_i[i]; jj < A_i[i+1]; jj++)
            {
               j = A_j[jj];
               y_data[j] += A_data[jj] * x_data[i];
            }
         }
         else
         {
            for ( jv=0; jv<num_vectors; ++jv )
            {
               for (jj = A_i[i]; jj < A_i[i+1]; jj++)
               {
                  j = A_j[jj];
                  y_data[ j*idxstride_y + jv*vecstride_y ] +=
                     A_data[jj] * x_data[ i*idxstride_x + jv*vecstride_x ];
               }
            }
         }
      }
   }
   /*-----------------------------------------------------------------
    * y = alpha*y
    *-----------------------------------------------------------------*/

   if (alpha != 1.0)
   {
#ifdef HYPRE_USING_OPENMP
#pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE
#endif
      for (i = 0; i < num_cols*num_vectors; i++)
         y_data[i] *= alpha;
   }

   if (x == y) hypre_SeqVectorDestroy(x_tmp);

   return ierr;
}
Пример #8
0
HYPRE_Int
hypre_BoomerAMGCycle( void              *amg_vdata, 
                   hypre_ParVector  **F_array,
                   hypre_ParVector  **U_array   )
{
   hypre_ParAMGData *amg_data = amg_vdata;

   HYPRE_Solver *smoother;
   /* Data Structure variables */

   hypre_ParCSRMatrix    **A_array;
   hypre_ParCSRMatrix    **P_array;
   hypre_ParCSRMatrix    **R_array;
   hypre_ParVector    *Utemp;
   hypre_ParVector    *Vtemp;
   hypre_ParVector    *Rtemp;
   hypre_ParVector    *Ptemp;
   hypre_ParVector    *Ztemp;
   hypre_ParVector    *Aux_U;
   hypre_ParVector    *Aux_F;

   hypre_ParCSRBlockMatrix    **A_block_array;
   hypre_ParCSRBlockMatrix    **P_block_array;
   hypre_ParCSRBlockMatrix    **R_block_array;

   HYPRE_Real   *Ztemp_data;
   HYPRE_Real   *Ptemp_data;
   HYPRE_Int     **CF_marker_array;
   /* HYPRE_Int     **unknown_map_array;
   HYPRE_Int     **point_map_array;
   HYPRE_Int     **v_at_point_array; */

   HYPRE_Real    cycle_op_count;   
   HYPRE_Int       cycle_type;
   HYPRE_Int       num_levels;
   HYPRE_Int       max_levels;

   HYPRE_Real   *num_coeffs;
   HYPRE_Int      *num_grid_sweeps;   
   HYPRE_Int      *grid_relax_type;   
   HYPRE_Int     **grid_relax_points;  

   HYPRE_Int     block_mode;
   
   HYPRE_Real  *max_eig_est;
   HYPRE_Real  *min_eig_est;
   HYPRE_Int      cheby_order;
   HYPRE_Real   cheby_fraction;

 /* Local variables  */ 
   HYPRE_Int      *lev_counter;
   HYPRE_Int       Solve_err_flag;
   HYPRE_Int       k;
   HYPRE_Int       i, j, jj;
   HYPRE_Int       level;
   HYPRE_Int       cycle_param;
   HYPRE_Int       coarse_grid;
   HYPRE_Int       fine_grid;
   HYPRE_Int       Not_Finished;
   HYPRE_Int       num_sweep;
   HYPRE_Int       cg_num_sweep = 1;
   HYPRE_Int       relax_type;
   HYPRE_Int       relax_points;
   HYPRE_Int       relax_order;
   HYPRE_Int       relax_local;
   HYPRE_Int       old_version = 0;
   HYPRE_Real   *relax_weight;
   HYPRE_Real   *omega;
   HYPRE_Real    alfa, beta, gammaold;
   HYPRE_Real    gamma = 1.0;
   HYPRE_Int       local_size;
/*   HYPRE_Int      *smooth_option; */
   HYPRE_Int       smooth_type;
   HYPRE_Int       smooth_num_levels;
   HYPRE_Int       num_threads, my_id;

   HYPRE_Real    alpha;
   HYPRE_Real  **l1_norms = NULL;
   HYPRE_Real   *l1_norms_level;

   HYPRE_Int seq_cg = 0;

   MPI_Comm comm;

#if 0
   HYPRE_Real   *D_mat;
   HYPRE_Real   *S_vec;
#endif
   
   /* Acquire data and allocate storage */

   num_threads = hypre_NumThreads();

   A_array           = hypre_ParAMGDataAArray(amg_data);
   P_array           = hypre_ParAMGDataPArray(amg_data);
   R_array           = hypre_ParAMGDataRArray(amg_data);
   CF_marker_array   = hypre_ParAMGDataCFMarkerArray(amg_data);
   Vtemp             = hypre_ParAMGDataVtemp(amg_data);
   Rtemp             = hypre_ParAMGDataRtemp(amg_data);
   Ptemp             = hypre_ParAMGDataPtemp(amg_data);
   Ztemp             = hypre_ParAMGDataZtemp(amg_data);
   num_levels        = hypre_ParAMGDataNumLevels(amg_data);
   max_levels        = hypre_ParAMGDataMaxLevels(amg_data);
   cycle_type        = hypre_ParAMGDataCycleType(amg_data);

   A_block_array     = hypre_ParAMGDataABlockArray(amg_data);
   P_block_array     = hypre_ParAMGDataPBlockArray(amg_data);
   R_block_array     = hypre_ParAMGDataRBlockArray(amg_data);
   block_mode        = hypre_ParAMGDataBlockMode(amg_data);

   num_grid_sweeps     = hypre_ParAMGDataNumGridSweeps(amg_data);
   grid_relax_type     = hypre_ParAMGDataGridRelaxType(amg_data);
   grid_relax_points   = hypre_ParAMGDataGridRelaxPoints(amg_data);
   relax_order         = hypre_ParAMGDataRelaxOrder(amg_data);
   relax_weight        = hypre_ParAMGDataRelaxWeight(amg_data); 
   omega               = hypre_ParAMGDataOmega(amg_data); 
   smooth_type         = hypre_ParAMGDataSmoothType(amg_data); 
   smooth_num_levels   = hypre_ParAMGDataSmoothNumLevels(amg_data); 
   l1_norms            = hypre_ParAMGDataL1Norms(amg_data); 
   /* smooth_option       = hypre_ParAMGDataSmoothOption(amg_data); */

   max_eig_est = hypre_ParAMGDataMaxEigEst(amg_data);
   min_eig_est = hypre_ParAMGDataMinEigEst(amg_data);
   cheby_order = hypre_ParAMGDataChebyOrder(amg_data);
   cheby_fraction = hypre_ParAMGDataChebyFraction(amg_data);

   cycle_op_count = hypre_ParAMGDataCycleOpCount(amg_data);

   lev_counter = hypre_CTAlloc(HYPRE_Int, num_levels);

   if (hypre_ParAMGDataParticipate(amg_data)) seq_cg = 1;

   /* Initialize */

   Solve_err_flag = 0;

   if (grid_relax_points) old_version = 1;

   num_coeffs = hypre_CTAlloc(HYPRE_Real, num_levels);
   num_coeffs[0]    = hypre_ParCSRMatrixDNumNonzeros(A_array[0]);
   comm = hypre_ParCSRMatrixComm(A_array[0]);
   hypre_MPI_Comm_rank(comm,&my_id);

   if (block_mode)
   {
      for (j = 1; j < num_levels; j++)
         num_coeffs[j] = hypre_ParCSRBlockMatrixNumNonzeros(A_block_array[j]);
      
   }
   else 
   {
       for (j = 1; j < num_levels; j++)
         num_coeffs[j] = hypre_ParCSRMatrixDNumNonzeros(A_array[j]);
   }
   
   /*---------------------------------------------------------------------
    *    Initialize cycling control counter
    *
    *     Cycling is controlled using a level counter: lev_counter[k]
    *     
    *     Each time relaxation is performed on level k, the
    *     counter is decremented by 1. If the counter is then
    *     negative, we go to the next finer level. If non-
    *     negative, we go to the next coarser level. The
    *     following actions control cycling:
    *     
    *     a. lev_counter[0] is initialized to 1.
    *     b. lev_counter[k] is initialized to cycle_type for k>0.
    *     
    *     c. During cycling, when going down to level k, lev_counter[k]
    *        is set to the max of (lev_counter[k],cycle_type)
    *---------------------------------------------------------------------*/

   Not_Finished = 1;

   lev_counter[0] = 1;
   for (k = 1; k < num_levels; ++k) 
   {
      lev_counter[k] = cycle_type;
   }

   level = 0;
   cycle_param = 1;

   smoother = hypre_ParAMGDataSmoother(amg_data);

   if (smooth_num_levels > 0)
   {
      if (smooth_type == 7 || smooth_type == 8
          || smooth_type == 17 || smooth_type == 18
          || smooth_type == 9 || smooth_type == 19)
      {
         HYPRE_Int actual_local_size = hypre_ParVectorActualLocalSize(Vtemp);
         Utemp = hypre_ParVectorCreate(comm,hypre_ParVectorGlobalSize(Vtemp),
                        hypre_ParVectorPartitioning(Vtemp));
         hypre_ParVectorOwnsPartitioning(Utemp) = 0;
         local_size 
            = hypre_VectorSize(hypre_ParVectorLocalVector(Vtemp));
         if (local_size < actual_local_size)
         {
            hypre_VectorData(hypre_ParVectorLocalVector(Utemp)) =
	 	hypre_CTAlloc(HYPRE_Complex, actual_local_size);
            hypre_ParVectorActualLocalSize(Utemp) = actual_local_size;
         }
         else
	     hypre_ParVectorInitialize(Utemp);
      }
   }
   
  
   /*---------------------------------------------------------------------
    * Main loop of cycling
    *--------------------------------------------------------------------*/
  
   while (Not_Finished)
   {
      if (num_levels > 1) 
      {
        local_size 
            = hypre_VectorSize(hypre_ParVectorLocalVector(F_array[level]));
        hypre_VectorSize(hypre_ParVectorLocalVector(Vtemp)) = local_size;
        if (smooth_num_levels <= level)
	{
           cg_num_sweep = 1;
           num_sweep = num_grid_sweeps[cycle_param];
           Aux_U = U_array[level];
           Aux_F = F_array[level];
	}
	else if (smooth_type > 9)
	{
           hypre_VectorSize(hypre_ParVectorLocalVector(Ztemp)) = local_size;
           hypre_VectorSize(hypre_ParVectorLocalVector(Rtemp)) = local_size;
           hypre_VectorSize(hypre_ParVectorLocalVector(Ptemp)) = local_size;
           Ztemp_data = hypre_VectorData(hypre_ParVectorLocalVector(Ztemp));
           Ptemp_data = hypre_VectorData(hypre_ParVectorLocalVector(Ptemp));
           hypre_ParVectorSetConstantValues(Ztemp,0);
           alpha = -1.0;
           beta = 1.0;
           hypre_ParCSRMatrixMatvecOutOfPlace(alpha, A_array[level], 
                                U_array[level], beta, F_array[level], Rtemp);
	   cg_num_sweep = hypre_ParAMGDataSmoothNumSweeps(amg_data);
           num_sweep = num_grid_sweeps[cycle_param];
           Aux_U = Ztemp;
           Aux_F = Rtemp;
	}
	else 
	{
           cg_num_sweep = 1;
	   num_sweep = hypre_ParAMGDataSmoothNumSweeps(amg_data);
           Aux_U = U_array[level];
           Aux_F = F_array[level];
	}
        relax_type = grid_relax_type[cycle_param];
      }
      else /* AB: 4/08: removed the max_levels > 1 check - should do this when max-levels = 1 also */
      {
        /* If no coarsening occurred, apply a simple smoother once */
        Aux_U = U_array[level];
        Aux_F = F_array[level];
        num_sweep = 1;
        /* TK: Use the user relax type (instead of 0) to allow for setting a
           convergent smoother (e.g. in the solution of singular problems). */
        relax_type = hypre_ParAMGDataUserRelaxType(amg_data);
      }

      if (l1_norms != NULL)
         l1_norms_level = l1_norms[level];
      else
         l1_norms_level = NULL;

      if (cycle_param == 3 && seq_cg)
      {
         hypre_seqAMGCycle(amg_data, level, F_array, U_array);
      }
      else
      {
         
        /*------------------------------------------------------------------
         * Do the relaxation num_sweep times
         *-----------------------------------------------------------------*/
         for (jj = 0; jj < cg_num_sweep; jj++)
         {
	   if (smooth_num_levels > level && smooth_type > 9)
              hypre_ParVectorSetConstantValues(Aux_U,0);

           for (j = 0; j < num_sweep; j++)
           {
              if (num_levels == 1 && max_levels > 1)
              {
                 relax_points = 0;
                 relax_local = 0;
              }
              else
              {
                 if (old_version)
		    relax_points = grid_relax_points[cycle_param][j];
                 relax_local = relax_order;
              }

              /*-----------------------------------------------
               * VERY sloppy approximation to cycle complexity
               *-----------------------------------------------*/
              if (old_version && level < num_levels -1)
              {
                 switch (relax_points)
                 {
                    case 1:
                    cycle_op_count += num_coeffs[level+1];
                    break;
  
                    case -1: 
                    cycle_op_count += (num_coeffs[level]-num_coeffs[level+1]); 
                    break;
                 }
              }
	      else
              {
                 cycle_op_count += num_coeffs[level]; 
              }
              /*-----------------------------------------------
                Choose Smoother
                -----------------------------------------------*/

              if (smooth_num_levels > level && 
			(smooth_type == 7 || smooth_type == 8 ||
			smooth_type == 9 || smooth_type == 19 ||
			smooth_type == 17 || smooth_type == 18))
              {
                 hypre_VectorSize(hypre_ParVectorLocalVector(Utemp)) = local_size;
                 alpha = -1.0;
                 beta = 1.0;
                 hypre_ParCSRMatrixMatvecOutOfPlace(alpha, A_array[level], 
                                U_array[level], beta, Aux_F, Vtemp);
                 if (smooth_type == 8 || smooth_type == 18)
                    HYPRE_ParCSRParaSailsSolve(smoother[level],
                                 (HYPRE_ParCSRMatrix) A_array[level],
                                 (HYPRE_ParVector) Vtemp,
                                 (HYPRE_ParVector) Utemp);
                 else if (smooth_type == 7 || smooth_type == 17)
                    HYPRE_ParCSRPilutSolve(smoother[level],
                                 (HYPRE_ParCSRMatrix) A_array[level],
                                 (HYPRE_ParVector) Vtemp,
                                 (HYPRE_ParVector) Utemp);
                 else if (smooth_type == 9 || smooth_type == 19)
                    HYPRE_EuclidSolve(smoother[level],
                                 (HYPRE_ParCSRMatrix) A_array[level],
                                 (HYPRE_ParVector) Vtemp,
                                 (HYPRE_ParVector) Utemp);
                 hypre_ParVectorAxpy(relax_weight[level],Utemp,Aux_U);
	      }
              else if (smooth_num_levels > level &&
			(smooth_type == 6 || smooth_type == 16))
              {
                 HYPRE_SchwarzSolve(smoother[level],
                                 (HYPRE_ParCSRMatrix) A_array[level],
                                 (HYPRE_ParVector) Aux_F,
                                  (HYPRE_ParVector) Aux_U);
              }
              /*else if (relax_type == 99)*/
              else if (relax_type == 9 || relax_type == 99)
              { /* Gaussian elimination */
                 hypre_GaussElimSolve(amg_data, level, relax_type);
              }
              else if (relax_type == 18)
              {   /* L1 - Jacobi*/
                 if (relax_order == 1 && cycle_param < 3)
                 {
                    /* need to do CF - so can't use the AMS one */
                    HYPRE_Int i;
                    HYPRE_Int loc_relax_points[2];
                    if (cycle_type < 2)
                    {
                       loc_relax_points[0] = 1;
                       loc_relax_points[1] = -1;
                    }
                    else
                    {
                       loc_relax_points[0] = -1;
                       loc_relax_points[1] = 1;
                    }
                    for (i=0; i < 2; i++)
                       hypre_ParCSRRelax_L1_Jacobi(A_array[level],
                                                 Aux_F,
                                                 CF_marker_array[level],
                                                 loc_relax_points[i],
                                                 relax_weight[level],
                                                 l1_norms[level],
                                                 Aux_U,
                                                 Vtemp);
                 }
                 else /* not CF - so use through AMS */
                 {
                    if (num_threads == 1)
                       hypre_ParCSRRelax(A_array[level], 
                                       Aux_F,
                                       1,
                                       1,
                                       l1_norms_level,
                                       relax_weight[level],
                                       omega[level],0,0,0,0,
                                       Aux_U,
                                       Vtemp, 
                                       Ztemp);

                    else
                       hypre_ParCSRRelaxThreads(A_array[level], 
                                              Aux_F,
                                              1,
                                              1,
                                              l1_norms_level,
                                              relax_weight[level],
                                              omega[level],
                                              Aux_U,
                                              Vtemp,
                                              Ztemp);
                 }
              }
              else if (relax_type == 15)
              {  /* CG */
                 if (j ==0) /* do num sweep iterations of CG */
                    hypre_ParCSRRelax_CG( smoother[level],
                                        A_array[level], 
                                        Aux_F,      
                                        Aux_U,
                                        num_sweep);
              }
              else if (relax_type == 16)
              { /* scaled Chebyshev */
                 HYPRE_Int scale = 1;
                 HYPRE_Int variant = 0;
                 hypre_ParCSRRelax_Cheby(A_array[level], 
                                       Aux_F,
                                       max_eig_est[level],     
                                       min_eig_est[level],     
                                       cheby_fraction, cheby_order, scale,
                                       variant, Aux_U, Vtemp, Ztemp );
              }
              else if (relax_type ==17)
              {
                 hypre_BoomerAMGRelax_FCFJacobi(A_array[level], 
                                              Aux_F,
                                              CF_marker_array[level],
                                              relax_weight[level],
                                              Aux_U,
                                              Vtemp);
              }
	      else if (old_version)
	      {
                 Solve_err_flag = hypre_BoomerAMGRelax(A_array[level], 
                                                     Aux_F,
                                                     CF_marker_array[level],
                                                     relax_type, relax_points,
                                                     relax_weight[level],
                                                     omega[level],
                                                     l1_norms_level,
                                                     Aux_U,
                                                     Vtemp, 
                                                     Ztemp);
	      }
	      else 
	      {
                 /* smoother than can have CF ordering */
                 if (block_mode)
                 {
                     Solve_err_flag = hypre_BoomerAMGBlockRelaxIF(A_block_array[level], 
                                                                  Aux_F,
                                                                  CF_marker_array[level],
                                                                  relax_type,
                                                                  relax_local,
                                                                  cycle_param,
                                                                  relax_weight[level],
                                                                  omega[level],
                                                                  Aux_U,
                                                                  Vtemp);
                 }
                 else
                 {
                    Solve_err_flag = hypre_BoomerAMGRelaxIF(A_array[level], 
                                                          Aux_F,
                                                          CF_marker_array[level],
                                                          relax_type,
                                                          relax_local,
                                                          cycle_param,
                                                          relax_weight[level],
                                                          omega[level],
                                                          l1_norms_level,
                                                          Aux_U,
                                                          Vtemp, 
                                                          Ztemp);
                 }
	      }
 
              if (Solve_err_flag != 0)
                 return(Solve_err_flag);
           }
           if  (smooth_num_levels > level && smooth_type > 9)
           {
              gammaold = gamma;
              gamma = hypre_ParVectorInnerProd(Rtemp,Ztemp);
              if (jj == 0)
                 hypre_ParVectorCopy(Ztemp,Ptemp);
              else
              {
                 beta = gamma/gammaold;
                 for (i=0; i < local_size; i++)
		    Ptemp_data[i] = Ztemp_data[i] + beta*Ptemp_data[i];
              }
              hypre_ParCSRMatrixMatvec(1.0,A_array[level],Ptemp,0.0,Vtemp);
              alfa = gamma /hypre_ParVectorInnerProd(Ptemp,Vtemp);
              hypre_ParVectorAxpy(alfa,Ptemp,U_array[level]);
              hypre_ParVectorAxpy(-alfa,Vtemp,Rtemp);
           }
        }
      }

      /*------------------------------------------------------------------
       * Decrement the control counter and determine which grid to visit next
       *-----------------------------------------------------------------*/

      --lev_counter[level];
       
      if (lev_counter[level] >= 0 && level != num_levels-1)
      {
                               
         /*---------------------------------------------------------------
          * Visit coarser level next.  
 	  * Compute residual using hypre_ParCSRMatrixMatvec.
          * Perform restriction using hypre_ParCSRMatrixMatvecT.
          * Reset counters and cycling parameters for coarse level
          *--------------------------------------------------------------*/

         fine_grid = level;
         coarse_grid = level + 1;

         hypre_ParVectorSetConstantValues(U_array[coarse_grid], 0.0); 
          
         alpha = -1.0;
         beta = 1.0;

         if (block_mode)
         {
            hypre_ParVectorCopy(F_array[fine_grid],Vtemp);
            hypre_ParCSRBlockMatrixMatvec(alpha, A_block_array[fine_grid], U_array[fine_grid],
                                          beta, Vtemp);
         }
         else 
         {
            // JSP: avoid unnecessary copy using out-of-place version of SpMV
            hypre_ParCSRMatrixMatvecOutOfPlace(alpha, A_array[fine_grid], U_array[fine_grid],
                                               beta, F_array[fine_grid], Vtemp);
         }

         alpha = 1.0;
         beta = 0.0;

         if (block_mode)
         {
            hypre_ParCSRBlockMatrixMatvecT(alpha,R_block_array[fine_grid],Vtemp,
                                      beta,F_array[coarse_grid]);
         }
         else
         {
            hypre_ParCSRMatrixMatvecT(alpha,R_array[fine_grid],Vtemp,
                                      beta,F_array[coarse_grid]);
         }

         ++level;
         lev_counter[level] = hypre_max(lev_counter[level],cycle_type);
         cycle_param = 1;
         if (level == num_levels-1) cycle_param = 3;
      }

      else if (level != 0)
      {
         /*---------------------------------------------------------------
          * Visit finer level next.
          * Interpolate and add correction using hypre_ParCSRMatrixMatvec.
          * Reset counters and cycling parameters for finer level.
          *--------------------------------------------------------------*/

         fine_grid = level - 1;
         coarse_grid = level;
         alpha = 1.0;
         beta = 1.0;
         if (block_mode)
         {
            hypre_ParCSRBlockMatrixMatvec(alpha, P_block_array[fine_grid], 
                                     U_array[coarse_grid],
                                     beta, U_array[fine_grid]);   
         }
         else 
         {
            hypre_ParCSRMatrixMatvec(alpha, P_array[fine_grid], 
                                     U_array[coarse_grid],
                                     beta, U_array[fine_grid]);            
         }
         
         --level;
         cycle_param = 2;
      }
      else
      {
         Not_Finished = 0;
      }
   }

   hypre_ParAMGDataCycleOpCount(amg_data) = cycle_op_count;

   hypre_TFree(lev_counter);
   hypre_TFree(num_coeffs);
   if (smooth_num_levels > 0)
   {
     if (smooth_type == 7 || smooth_type == 8 || smooth_type == 9 || 
	smooth_type == 17 || smooth_type == 18 || smooth_type == 19 )
        hypre_ParVectorDestroy(Utemp);
   }
   return(Solve_err_flag);
}
Пример #9
0
HYPRE_Int hypre_CreateDinv(void *amg_vdata)
{
   hypre_ParAMGData *amg_data = amg_vdata;

   /* Data Structure variables */
   hypre_ParCSRMatrix **A_array;
   hypre_ParVector    **F_array;
   hypre_ParVector    **U_array;

   hypre_ParCSRMatrix *A_tmp;
   hypre_CSRMatrix *A_tmp_diag;
   hypre_ParVector *Xtilde;
   hypre_ParVector *Rtilde;
   hypre_Vector *Xtilde_local;
   hypre_Vector *Rtilde_local;
   HYPRE_Real    *x_data;
   HYPRE_Real    *r_data;
   HYPRE_Real    *tmp_data;
   HYPRE_Real    *D_inv = NULL;
   HYPRE_Real    *relax_weight = NULL;
   HYPRE_Real     relax_type;

   HYPRE_Int       addlvl;
   HYPRE_Int       num_levels;
   HYPRE_Int       num_add_lvls;
   HYPRE_Int       num_rows_L;
   HYPRE_Int       num_rows_A;
   HYPRE_Int       num_rows_tmp;
   HYPRE_Int       level, i;

 /* Local variables  */ 
   HYPRE_Int       Solve_err_flag = 0;
   HYPRE_Int       num_threads;

   HYPRE_Real  **l1_norms_ptr = NULL;
   HYPRE_Real  *l1_norms;
   HYPRE_Int l1_start;

   /* Acquire data and allocate storage */

   num_threads = hypre_NumThreads();

   A_array           = hypre_ParAMGDataAArray(amg_data);
   F_array           = hypre_ParAMGDataFArray(amg_data);
   U_array           = hypre_ParAMGDataUArray(amg_data);
   addlvl            = hypre_ParAMGDataSimple(amg_data);
   num_levels        = hypre_ParAMGDataNumLevels(amg_data);
   relax_weight      = hypre_ParAMGDataRelaxWeight(amg_data);
   relax_type        = hypre_ParAMGDataGridRelaxType(amg_data)[1];
   num_rows_A        = hypre_CSRMatrixNumRows(hypre_ParCSRMatrixDiag(A_array[0]));

   l1_norms_ptr      = hypre_ParAMGDataL1Norms(amg_data); 
   /* smooth_option       = hypre_ParAMGDataSmoothOption(amg_data); */

   num_add_lvls = num_levels+1-addlvl;
  
   num_rows_L  = 0;
   for (i=addlvl; i < num_levels; i++)
   {
      A_tmp = A_array[i];
      A_tmp_diag = hypre_ParCSRMatrixDiag(A_tmp);
      num_rows_tmp = hypre_CSRMatrixNumRows(A_tmp_diag);
      num_rows_L += num_rows_tmp;
   }

   Rtilde = hypre_CTAlloc(hypre_ParVector, 1);
   Rtilde_local = hypre_SeqVectorCreate(num_rows_L);   
   hypre_SeqVectorInitialize(Rtilde_local);
   hypre_ParVectorLocalVector(Rtilde) = Rtilde_local;   
   hypre_ParVectorOwnsData(Rtilde) = 1;

   Xtilde = hypre_CTAlloc(hypre_ParVector, 1);
   Xtilde_local = hypre_SeqVectorCreate(num_rows_L);   
   hypre_SeqVectorInitialize(Xtilde_local);
   hypre_ParVectorLocalVector(Xtilde) = Xtilde_local;   
   hypre_ParVectorOwnsData(Xtilde) = 1;
      
   x_data = hypre_VectorData(hypre_ParVectorLocalVector(Xtilde));
   r_data = hypre_VectorData(hypre_ParVectorLocalVector(Rtilde));
   D_inv = hypre_CTAlloc(HYPRE_Real, num_rows_L);

   l1_start = 0;
   for (level=addlvl; level < num_levels; level++)
   {
      if (level != 0)
      {
         tmp_data = hypre_VectorData(hypre_ParVectorLocalVector(F_array[level]));
         if (tmp_data) hypre_TFree(tmp_data);
         hypre_VectorData(hypre_ParVectorLocalVector(F_array[level])) = &r_data[l1_start];
         hypre_VectorOwnsData(hypre_ParVectorLocalVector(F_array[level])) = 0;
         tmp_data = hypre_VectorData(hypre_ParVectorLocalVector(U_array[level]));
         if (tmp_data) hypre_TFree(tmp_data);
         hypre_VectorData(hypre_ParVectorLocalVector(U_array[level])) = &x_data[l1_start];
         hypre_VectorOwnsData(hypre_ParVectorLocalVector(U_array[level])) = 0;
      }

      A_tmp = A_array[level];
      A_tmp_diag = hypre_ParCSRMatrixDiag(A_tmp);
      num_rows_tmp = hypre_CSRMatrixNumRows(A_tmp_diag);

      if (relax_type == 0)
      {
         HYPRE_Real rlx_wt = relax_weight[level];
         HYPRE_Int *A_tmp_diag_i = hypre_CSRMatrixI(A_tmp_diag);
         HYPRE_Real *A_tmp_diag_data = hypre_CSRMatrixData(A_tmp_diag);
#ifdef HYPRE_USING_OPENMP
#pragma omp for private(i) HYPRE_SMP_SCHEDULE
#endif
         for (i=0; i < num_rows_tmp; i++)
            D_inv[l1_start+i] = rlx_wt/A_tmp_diag_data[A_tmp_diag_i[i]];
      }
      else
      {
         l1_norms = l1_norms_ptr[level];
#ifdef HYPRE_USING_OPENMP
#pragma omp for private(i) HYPRE_SMP_SCHEDULE
#endif
         for (i=0; i < num_rows_tmp; i++)
            D_inv[l1_start+i] = 1.0/l1_norms[i];
      }
      l1_start += num_rows_tmp;
   }

   hypre_ParAMGDataDinv(amg_data) = D_inv;
   hypre_ParAMGDataRtilde(amg_data) = Rtilde;
   hypre_ParAMGDataXtilde(amg_data) = Xtilde;

   return Solve_err_flag;
}
Пример #10
0
HYPRE_Int
hypre_BoomerAMGAdditiveCycle( void              *amg_vdata)
{
   hypre_ParAMGData *amg_data = amg_vdata;

   /* Data Structure variables */

   hypre_ParCSRMatrix    **A_array;
   hypre_ParCSRMatrix    **P_array;
   hypre_ParCSRMatrix    **R_array;
   hypre_ParCSRMatrix    *Lambda;
   hypre_ParVector    **F_array;
   hypre_ParVector    **U_array;
   hypre_ParVector    *Vtemp;
   hypre_ParVector    *Ztemp;
   hypre_ParVector    *Xtilde, *Rtilde;
   HYPRE_Int      **CF_marker_array;

   HYPRE_Int       num_levels;
   HYPRE_Int       addlvl;
   HYPRE_Int       additive;
   HYPRE_Int       mult_additive;
   HYPRE_Int       simple;
   HYPRE_Int       i, num_rows;
   HYPRE_Int       n_global;
   HYPRE_Int       rlx_order;

 /* Local variables  */ 
   HYPRE_Int       Solve_err_flag = 0;
   HYPRE_Int       level;
   HYPRE_Int       coarse_grid;
   HYPRE_Int       fine_grid;
   HYPRE_Int       relax_type;
   HYPRE_Int       rlx_down;
   HYPRE_Int       rlx_up;
   HYPRE_Int      *grid_relax_type;
   HYPRE_Real      **l1_norms;
   HYPRE_Real    alpha, beta;
   HYPRE_Int       num_threads;
   HYPRE_Real *u_data;
   HYPRE_Real *f_data;
   HYPRE_Real *v_data;
   HYPRE_Real *l1_norms_lvl;
   HYPRE_Real *D_inv;
   HYPRE_Real *x_global;
   HYPRE_Real *r_global;
   HYPRE_Real *relax_weight;
   HYPRE_Real *omega;

#if 0
   HYPRE_Real   *D_mat;
   HYPRE_Real   *S_vec;
#endif
   
   /* Acquire data and allocate storage */

   num_threads = hypre_NumThreads();

   A_array           = hypre_ParAMGDataAArray(amg_data);
   F_array           = hypre_ParAMGDataFArray(amg_data);
   U_array           = hypre_ParAMGDataUArray(amg_data);
   P_array           = hypre_ParAMGDataPArray(amg_data);
   R_array           = hypre_ParAMGDataRArray(amg_data);
   CF_marker_array   = hypre_ParAMGDataCFMarkerArray(amg_data);
   Vtemp             = hypre_ParAMGDataVtemp(amg_data);
   Ztemp             = hypre_ParAMGDataZtemp(amg_data);
   num_levels        = hypre_ParAMGDataNumLevels(amg_data);
   additive          = hypre_ParAMGDataAdditive(amg_data);
   mult_additive     = hypre_ParAMGDataMultAdditive(amg_data);
   simple            = hypre_ParAMGDataSimple(amg_data);
   grid_relax_type   = hypre_ParAMGDataGridRelaxType(amg_data);
   Lambda            = hypre_ParAMGDataLambda(amg_data);
   Xtilde            = hypre_ParAMGDataXtilde(amg_data);
   Rtilde            = hypre_ParAMGDataRtilde(amg_data);
   l1_norms          = hypre_ParAMGDataL1Norms(amg_data);
   D_inv             = hypre_ParAMGDataDinv(amg_data);
   grid_relax_type   = hypre_ParAMGDataGridRelaxType(amg_data);
   relax_weight      = hypre_ParAMGDataRelaxWeight(amg_data);
   omega             = hypre_ParAMGDataOmega(amg_data);
   rlx_order         = hypre_ParAMGDataRelaxOrder(amg_data);

   /* Initialize */

   addlvl = hypre_max(additive, mult_additive);
   addlvl = hypre_max(addlvl, simple);
   Solve_err_flag = 0;

   /*---------------------------------------------------------------------
    * Main loop of cycling --- multiplicative version --- V-cycle
    *--------------------------------------------------------------------*/

   /* down cycle */
   relax_type = grid_relax_type[1];
   rlx_down = grid_relax_type[1];
   rlx_up = grid_relax_type[2];
   for (level = 0; level < num_levels-1; level++)
   {
      fine_grid = level;
      coarse_grid = level + 1;

      u_data = hypre_VectorData(hypre_ParVectorLocalVector(U_array[fine_grid]));
      f_data = hypre_VectorData(hypre_ParVectorLocalVector(F_array[fine_grid]));
      v_data = hypre_VectorData(hypre_ParVectorLocalVector(Vtemp));
      l1_norms_lvl = l1_norms[level];

      hypre_ParVectorSetConstantValues(U_array[coarse_grid], 0.0); 

      if (level < addlvl) /* multiplicative version */
      {
         /* smoothing step */

         if (rlx_down == 0)
         {
            HYPRE_Real *A_data = hypre_CSRMatrixData(hypre_ParCSRMatrixDiag(A_array[fine_grid]));
            HYPRE_Int *A_i = hypre_CSRMatrixI(hypre_ParCSRMatrixDiag(A_array[fine_grid]));
            hypre_ParVectorCopy(F_array[fine_grid],Vtemp);
            num_rows = hypre_CSRMatrixNumRows(hypre_ParCSRMatrixDiag(A_array[fine_grid]));
#ifdef HYPRE_USING_OPENMP
#pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE
#endif
            for (i = 0; i < num_rows; i++)
               u_data[i] = relax_weight[level]*v_data[i] / A_data[A_i[i]];
         }

         else if (rlx_down != 18)
         {
            /*hypre_BoomerAMGRelax(A_array[fine_grid],F_array[fine_grid],NULL,rlx_down,0,*/
            hypre_BoomerAMGRelaxIF(A_array[fine_grid],F_array[fine_grid],
	     CF_marker_array[fine_grid], rlx_down,rlx_order,1,
             relax_weight[fine_grid], omega[fine_grid],
             l1_norms_lvl, U_array[fine_grid], Vtemp, Ztemp);
            hypre_ParVectorCopy(F_array[fine_grid],Vtemp);
         }
         else
         {
            hypre_ParVectorCopy(F_array[fine_grid],Vtemp);
            num_rows = hypre_CSRMatrixNumRows(hypre_ParCSRMatrixDiag(A_array[fine_grid]));
#ifdef HYPRE_USING_OPENMP
#pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE
#endif
	    for (i = 0; i < num_rows; i++)
               u_data[i] += v_data[i] / l1_norms_lvl[i];
         }
     
         alpha = -1.0;
         beta = 1.0;
         hypre_ParCSRMatrixMatvec(alpha, A_array[fine_grid], U_array[fine_grid],
                                     beta, Vtemp);

         alpha = 1.0;
         beta = 0.0;
         hypre_ParCSRMatrixMatvecT(alpha,R_array[fine_grid],Vtemp,
                                      beta,F_array[coarse_grid]);
      }
      else /* additive version */
      {
         hypre_ParVectorCopy(F_array[fine_grid],Vtemp);
         if (level == 0) /* compute residual */
         {
            hypre_ParVectorCopy(Vtemp, Rtilde);
            hypre_ParVectorCopy(U_array[fine_grid],Xtilde);
         }
         alpha = 1.0;
         beta = 0.0;
         hypre_ParCSRMatrixMatvecT(alpha,R_array[fine_grid],Vtemp,
                                      beta,F_array[coarse_grid]);
      }
   }

   /* solve coarse grid */ 
   if (addlvl < num_levels)
   {
      if (simple > -1)
      {
         x_global = hypre_VectorData(hypre_ParVectorLocalVector(Xtilde));
         r_global = hypre_VectorData(hypre_ParVectorLocalVector(Rtilde));
         n_global = hypre_VectorSize(hypre_ParVectorLocalVector(Xtilde));
#ifdef HYPRE_USING_OPENMP
#pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE
#endif
	 for (i=0; i < n_global; i++)
	    x_global[i] += D_inv[i]*r_global[i];
      }
      else
	 hypre_ParCSRMatrixMatvec(1.0, Lambda, Rtilde, 1.0, Xtilde);
      if (addlvl == 0) hypre_ParVectorCopy(Xtilde, U_array[0]);
   }
   else
   {
      fine_grid = num_levels -1;
      hypre_ParCSRRelax(A_array[fine_grid], F_array[fine_grid],
                              1, 1, l1_norms[fine_grid],
                              1.0, 1.0 ,0,0,0,0,
                              U_array[fine_grid], Vtemp, Ztemp);
   }

   /* up cycle */
   relax_type = grid_relax_type[2];
   for (level = num_levels-1; level > 0; level--)
   {
      fine_grid = level - 1;
      coarse_grid = level;

      if (level <= addlvl) /* multiplicative version */
      {
         alpha = 1.0;
         beta = 1.0;
         hypre_ParCSRMatrixMatvec(alpha, P_array[fine_grid], 
                                     U_array[coarse_grid],
                                     beta, U_array[fine_grid]);            
         if (rlx_up != 18)
            /*hypre_BoomerAMGRelax(A_array[fine_grid],F_array[fine_grid],NULL,rlx_up,0,*/
            hypre_BoomerAMGRelaxIF(A_array[fine_grid],F_array[fine_grid],
		CF_marker_array[fine_grid],
		rlx_up,rlx_order,2,
                relax_weight[fine_grid], omega[fine_grid],
                l1_norms[fine_grid], U_array[fine_grid], Vtemp, Ztemp);
         else if (rlx_order)
         {
            HYPRE_Int loc_relax_points[2];
            loc_relax_points[0] = -1;
            loc_relax_points[1] = 1;
            for (i=0; i < 2; i++)
                hypre_ParCSRRelax_L1_Jacobi(A_array[fine_grid],F_array[fine_grid],
                                            CF_marker_array[fine_grid],
                                            loc_relax_points[i],
                                            1.0, l1_norms[fine_grid],
                                            U_array[fine_grid], Vtemp);
         }
         else 
            hypre_ParCSRRelax(A_array[fine_grid], F_array[fine_grid],
                                 1, 1, l1_norms[fine_grid],
                                 1.0, 1.0 ,0,0,0,0,
                                 U_array[fine_grid], Vtemp, Ztemp);
      }
      else /* additive version */
      {
         alpha = 1.0;
         beta = 1.0;
         hypre_ParCSRMatrixMatvec(alpha, P_array[fine_grid], 
                                     U_array[coarse_grid],
                                     beta, U_array[fine_grid]);            
      }
   }

   return(Solve_err_flag);
}
Пример #11
0
HYPRE_Int hypre_CreateLambda(void *amg_vdata)
{
   hypre_ParAMGData *amg_data = amg_vdata;

   /* Data Structure variables */
   MPI_Comm comm;
   hypre_ParCSRMatrix **A_array;
   hypre_ParVector    **F_array;
   hypre_ParVector    **U_array;

   hypre_ParCSRMatrix *A_tmp;
   hypre_ParCSRMatrix *Lambda;
   hypre_CSRMatrix *L_diag;
   hypre_CSRMatrix *L_offd;
   hypre_CSRMatrix *A_tmp_diag;
   hypre_CSRMatrix *A_tmp_offd;
   hypre_ParVector *Xtilde;
   hypre_ParVector *Rtilde;
   hypre_Vector *Xtilde_local;
   hypre_Vector *Rtilde_local;
   hypre_ParCSRCommPkg *comm_pkg;
   hypre_ParCSRCommPkg *L_comm_pkg = NULL;
   hypre_ParCSRCommHandle *comm_handle;
   HYPRE_Real    *L_diag_data;
   HYPRE_Real    *L_offd_data;
   HYPRE_Real    *buf_data = NULL;
   HYPRE_Real    *tmp_data;
   HYPRE_Real    *x_data;
   HYPRE_Real    *r_data;
   HYPRE_Real    *l1_norms;
   HYPRE_Real    *A_tmp_diag_data;
   HYPRE_Real    *A_tmp_offd_data;
   HYPRE_Real    *D_data = NULL;
   HYPRE_Real    *D_data_offd = NULL;
   HYPRE_Int *L_diag_i;
   HYPRE_Int *L_diag_j;
   HYPRE_Int *L_offd_i;
   HYPRE_Int *L_offd_j;
   HYPRE_Int *A_tmp_diag_i;
   HYPRE_Int *A_tmp_offd_i;
   HYPRE_Int *A_tmp_diag_j;
   HYPRE_Int *A_tmp_offd_j;
   HYPRE_Int *L_recv_ptr = NULL;
   HYPRE_Int *L_send_ptr = NULL;
   HYPRE_Int *L_recv_procs = NULL;
   HYPRE_Int *L_send_procs = NULL;
   HYPRE_Int *L_send_map_elmts = NULL;
   HYPRE_Int *recv_procs;
   HYPRE_Int *send_procs;
   HYPRE_Int *send_map_elmts;
   HYPRE_Int *send_map_starts;
   HYPRE_Int *recv_vec_starts;
   HYPRE_Int *all_send_procs = NULL;
   HYPRE_Int *all_recv_procs = NULL;
   HYPRE_Int *remap = NULL;
   HYPRE_Int *level_start;

   HYPRE_Int       addlvl;
   HYPRE_Int       additive;
   HYPRE_Int       mult_additive;
   HYPRE_Int       num_levels;
   HYPRE_Int       num_add_lvls;
   HYPRE_Int       num_procs;
   HYPRE_Int       num_sends, num_recvs;
   HYPRE_Int       num_sends_L = 0;
   HYPRE_Int       num_recvs_L = 0;
   HYPRE_Int       send_data_L = 0;
   HYPRE_Int       num_rows_L = 0;
   HYPRE_Int       num_rows_tmp = 0;
   HYPRE_Int       num_cols_offd_L = 0;
   HYPRE_Int       num_cols_offd = 0;
   HYPRE_Int       level, i, j, k;
   HYPRE_Int       this_proc, cnt, cnt_diag, cnt_offd;
   HYPRE_Int       cnt_recv, cnt_send, cnt_row, row_start;
   HYPRE_Int       start_diag, start_offd, indx, cnt_map;
   HYPRE_Int       start, j_indx, index, cnt_level;
   HYPRE_Int       max_sends, max_recvs;

 /* Local variables  */ 
   HYPRE_Int       Solve_err_flag = 0;
   HYPRE_Int       num_threads;
   HYPRE_Int       num_nonzeros_diag;
   HYPRE_Int       num_nonzeros_offd;

   HYPRE_Real  **l1_norms_ptr = NULL;
   HYPRE_Real   *relax_weight = NULL;
   HYPRE_Real    relax_type;

   /* Acquire data and allocate storage */

   num_threads = hypre_NumThreads();

   A_array           = hypre_ParAMGDataAArray(amg_data);
   F_array           = hypre_ParAMGDataFArray(amg_data);
   U_array           = hypre_ParAMGDataUArray(amg_data);
   additive          = hypre_ParAMGDataAdditive(amg_data);
   mult_additive     = hypre_ParAMGDataMultAdditive(amg_data);
   num_levels        = hypre_ParAMGDataNumLevels(amg_data);
   relax_weight      = hypre_ParAMGDataRelaxWeight(amg_data);
   relax_type        = hypre_ParAMGDataGridRelaxType(amg_data)[1];
   comm              = hypre_ParCSRMatrixComm(A_array[0]);

   hypre_MPI_Comm_size(comm,&num_procs);

   l1_norms_ptr      = hypre_ParAMGDataL1Norms(amg_data); 

   addlvl = hypre_max(additive, mult_additive);
   num_add_lvls = num_levels+1-addlvl;

   level_start = hypre_CTAlloc(HYPRE_Int, num_add_lvls+1);
   send_data_L = 0;
   num_rows_L  = 0;
   num_cols_offd_L = 0;
   num_nonzeros_diag = 0;
   num_nonzeros_offd = 0;
   level_start[0] = 0; 
   cnt = 1;
   max_sends = 0;
   max_recvs = 0;
   for (i=addlvl; i < num_levels; i++)
   {
      A_tmp = A_array[i];
      A_tmp_diag = hypre_ParCSRMatrixDiag(A_tmp);
      A_tmp_offd = hypre_ParCSRMatrixOffd(A_tmp);
      A_tmp_diag_i = hypre_CSRMatrixI(A_tmp_diag);
      A_tmp_offd_i = hypre_CSRMatrixI(A_tmp_offd);
      num_rows_tmp = hypre_CSRMatrixNumRows(A_tmp_diag);
      num_cols_offd = hypre_CSRMatrixNumCols(A_tmp_offd);
      num_rows_L += num_rows_tmp;
      level_start[cnt] = level_start[cnt-1] + num_rows_tmp;
      cnt++;
      num_cols_offd_L += num_cols_offd;
      num_nonzeros_diag += A_tmp_diag_i[num_rows_tmp];
      num_nonzeros_offd += A_tmp_offd_i[num_rows_tmp];
      comm_pkg = hypre_ParCSRMatrixCommPkg(A_tmp);
      if (comm_pkg)
      {
         num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
         max_sends += num_sends;
         if (num_sends) 
	    send_data_L += hypre_ParCSRCommPkgSendMapStart(comm_pkg,num_sends);
         max_recvs += hypre_ParCSRCommPkgNumRecvs(comm_pkg);
      }
   }
   if (max_sends >= num_procs ||max_recvs >= num_procs)
   {
         max_sends = num_procs;
         max_recvs = num_procs;
   }
   if (max_sends) all_send_procs = hypre_CTAlloc(HYPRE_Int, max_sends);
   if (max_recvs) all_recv_procs = hypre_CTAlloc(HYPRE_Int, max_recvs);

   cnt_send = 0;
   cnt_recv = 0;
   if (max_sends || max_recvs)
   {
      if (max_sends < num_procs && max_recvs < num_procs)
      {
         for (i=addlvl; i < num_levels; i++)
         {
            A_tmp = A_array[i];
            comm_pkg = hypre_ParCSRMatrixCommPkg(A_tmp);
            if (comm_pkg)
            {
               num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
               num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg);
               send_procs = hypre_ParCSRCommPkgSendProcs(comm_pkg);
               recv_procs = hypre_ParCSRCommPkgRecvProcs(comm_pkg);
               for (j = 0; j < num_sends; j++)
	          all_send_procs[cnt_send++] = send_procs[j];
               for (j = 0; j < num_recvs; j++)
	          all_recv_procs[cnt_recv++] = recv_procs[j];
            }
         }
         if (max_sends)
         {
            qsort0(all_send_procs, 0, max_sends-1);
            num_sends_L = 1;
            this_proc = all_send_procs[0];
            for (i=1; i < max_sends; i++)
            {
               if (all_send_procs[i] > this_proc)
               {
                  this_proc = all_send_procs[i];
                  all_send_procs[num_sends_L++] = this_proc;
               }
            }
            L_send_procs = hypre_CTAlloc(HYPRE_Int, num_sends_L);
            for (j=0; j < num_sends_L; j++)
	       L_send_procs[j] = all_send_procs[j];
	    hypre_TFree(all_send_procs);
         }
         if (max_recvs)
         {
            qsort0(all_recv_procs, 0, max_recvs-1);
            num_recvs_L = 1;
            this_proc = all_recv_procs[0];
            for (i=1; i < max_recvs; i++)
            {
               if (all_recv_procs[i] > this_proc)
               {
                  this_proc = all_recv_procs[i];
                  all_recv_procs[num_recvs_L++] = this_proc;
               }
            }
            L_recv_procs = hypre_CTAlloc(HYPRE_Int, num_recvs_L);
            for (j=0; j < num_recvs_L; j++)
	       L_recv_procs[j] = all_recv_procs[j];
	    hypre_TFree(all_recv_procs);
         }

         L_recv_ptr = hypre_CTAlloc(HYPRE_Int, num_recvs_L+1);
         L_send_ptr = hypre_CTAlloc(HYPRE_Int, num_sends_L+1);

         for (i=addlvl; i < num_levels; i++)
         {
            A_tmp = A_array[i];
            comm_pkg = hypre_ParCSRMatrixCommPkg(A_tmp);
            if (comm_pkg)
            {
               num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
               num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg);
               send_procs = hypre_ParCSRCommPkgSendProcs(comm_pkg);
               recv_procs = hypre_ParCSRCommPkgRecvProcs(comm_pkg);
               send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg);
               recv_vec_starts = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg);
            }
            else
            {
               num_sends = 0;
               num_recvs = 0;
            }
            for (k = 0; k < num_sends; k++)
            {
               this_proc = hypre_BinarySearch(L_send_procs,send_procs[k],num_sends_L);
               L_send_ptr[this_proc+1] += send_map_starts[k+1]-send_map_starts[k];
            }
            for (k = 0; k < num_recvs; k++)
            {
               this_proc = hypre_BinarySearch(L_recv_procs,recv_procs[k],num_recvs_L);
               L_recv_ptr[this_proc+1] += recv_vec_starts[k+1]-recv_vec_starts[k];
            }
         }

         L_recv_ptr[0] = 0;
         for (i=1; i < num_recvs_L; i++)
            L_recv_ptr[i+1] += L_recv_ptr[i];

         L_send_ptr[0] = 0;
         for (i=1; i < num_sends_L; i++)
            L_send_ptr[i+1] += L_send_ptr[i];
      }
      else
      {
         num_recvs_L = 0;
         num_sends_L = 0;
         for (i=addlvl; i < num_levels; i++)
         {
            A_tmp = A_array[i];
            comm_pkg = hypre_ParCSRMatrixCommPkg(A_tmp);
            if (comm_pkg)
            {
               num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
               num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg);
               send_procs = hypre_ParCSRCommPkgSendProcs(comm_pkg);
               recv_procs = hypre_ParCSRCommPkgRecvProcs(comm_pkg);
               send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg);
               recv_vec_starts = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg);
               for (j = 0; j < num_sends; j++)
               {
                  this_proc = send_procs[j];
	          if (all_send_procs[this_proc] == 0)
		      num_sends_L++;
                  all_send_procs[this_proc] += send_map_starts[j+1]-send_map_starts[j];
               }
               for (j = 0; j < num_recvs; j++)
               {
                  this_proc = recv_procs[j];
	          if (all_recv_procs[this_proc] == 0)
		      num_recvs_L++;
                  all_recv_procs[this_proc] += recv_vec_starts[j+1]-recv_vec_starts[j];
               }
            }
         }
         if (max_sends)
         {
            L_send_procs = hypre_CTAlloc(HYPRE_Int, num_sends_L);
            L_send_ptr = hypre_CTAlloc(HYPRE_Int, num_sends_L+1);
            num_sends_L = 0;
            for (j=0; j < num_procs; j++)
            {
	       this_proc = all_send_procs[j];
	       if (this_proc)
	       {
	           L_send_procs[num_sends_L++] = j;
	           L_send_ptr[num_sends_L] = this_proc + L_send_ptr[num_sends_L-1];
	       }
            }
         }
         if (max_recvs)
         {
            L_recv_procs = hypre_CTAlloc(HYPRE_Int, num_recvs_L);
            L_recv_ptr = hypre_CTAlloc(HYPRE_Int, num_recvs_L+1);
            num_recvs_L = 0;
            for (j=0; j < num_procs; j++)
            {
	       this_proc = all_recv_procs[j];
	       if (this_proc)
	       {
	           L_recv_procs[num_recvs_L++] = j;
	           L_recv_ptr[num_recvs_L] = this_proc + L_recv_ptr[num_recvs_L-1];
	       }
            }
         }
      } 
   }
   if (max_sends) hypre_TFree(all_send_procs);
   if (max_recvs) hypre_TFree(all_recv_procs);

   L_diag = hypre_CSRMatrixCreate(num_rows_L, num_rows_L, num_nonzeros_diag);
   L_offd = hypre_CSRMatrixCreate(num_rows_L, num_cols_offd_L, num_nonzeros_offd);
   hypre_CSRMatrixInitialize(L_diag);
   hypre_CSRMatrixInitialize(L_offd);
   if (num_nonzeros_diag)
   {
      L_diag_data = hypre_CSRMatrixData(L_diag);
      L_diag_j = hypre_CSRMatrixJ(L_diag);
   }
   L_diag_i = hypre_CSRMatrixI(L_diag);
   if (num_nonzeros_offd)
   {
      L_offd_data = hypre_CSRMatrixData(L_offd);
      L_offd_j = hypre_CSRMatrixJ(L_offd);
   }
   L_offd_i = hypre_CSRMatrixI(L_offd);

   if (num_rows_L) D_data = hypre_CTAlloc(HYPRE_Real,num_rows_L);
   if (send_data_L)
   {
      L_send_map_elmts = hypre_CTAlloc(HYPRE_Int, send_data_L);
      buf_data = hypre_CTAlloc(HYPRE_Real,send_data_L);
   }
   if (num_cols_offd_L)
   {
      D_data_offd = hypre_CTAlloc(HYPRE_Real,num_cols_offd_L);
      /*L_col_map_offd = hypre_CTAlloc(HYPRE_Int, num_cols_offd_L);*/
      remap = hypre_CTAlloc(HYPRE_Int, num_cols_offd_L);
   }

   Rtilde = hypre_CTAlloc(hypre_ParVector, 1);
   Rtilde_local = hypre_SeqVectorCreate(num_rows_L);   
   hypre_SeqVectorInitialize(Rtilde_local);
   hypre_ParVectorLocalVector(Rtilde) = Rtilde_local;   
   hypre_ParVectorOwnsData(Rtilde) = 1;

   Xtilde = hypre_CTAlloc(hypre_ParVector, 1);
   Xtilde_local = hypre_SeqVectorCreate(num_rows_L);   
   hypre_SeqVectorInitialize(Xtilde_local);
   hypre_ParVectorLocalVector(Xtilde) = Xtilde_local;   
   hypre_ParVectorOwnsData(Xtilde) = 1;
      
   x_data = hypre_VectorData(hypre_ParVectorLocalVector(Xtilde));
   r_data = hypre_VectorData(hypre_ParVectorLocalVector(Rtilde));

   cnt = 0;
   cnt_level = 0;
   cnt_diag = 0; 
   cnt_offd = 0; 
   cnt_row = 1; 
   L_diag_i[0] = 0;
   L_offd_i[0] = 0;
   for (level=addlvl; level < num_levels; level++)
   {
      row_start = level_start[cnt_level];
      if (level != 0)
      {
         tmp_data = hypre_VectorData(hypre_ParVectorLocalVector(F_array[level]));
         if (tmp_data) hypre_TFree(tmp_data);
         hypre_VectorData(hypre_ParVectorLocalVector(F_array[level])) = &r_data[row_start];
         hypre_VectorOwnsData(hypre_ParVectorLocalVector(F_array[level])) = 0;
         tmp_data = hypre_VectorData(hypre_ParVectorLocalVector(U_array[level]));
         if (tmp_data) hypre_TFree(tmp_data);
         hypre_VectorData(hypre_ParVectorLocalVector(U_array[level])) = &x_data[row_start];
         hypre_VectorOwnsData(hypre_ParVectorLocalVector(U_array[level])) = 0;
      }
      cnt_level++;

      start_diag = L_diag_i[cnt_row-1];
      start_offd = L_offd_i[cnt_row-1];
      A_tmp = A_array[level];
      A_tmp_diag = hypre_ParCSRMatrixDiag(A_tmp);
      A_tmp_offd = hypre_ParCSRMatrixOffd(A_tmp);
      comm_pkg = hypre_ParCSRMatrixCommPkg(A_tmp);
      A_tmp_diag_i = hypre_CSRMatrixI(A_tmp_diag);
      A_tmp_offd_i = hypre_CSRMatrixI(A_tmp_offd);
      A_tmp_diag_j = hypre_CSRMatrixJ(A_tmp_diag);
      A_tmp_offd_j = hypre_CSRMatrixJ(A_tmp_offd);
      A_tmp_diag_data = hypre_CSRMatrixData(A_tmp_diag);
      A_tmp_offd_data = hypre_CSRMatrixData(A_tmp_offd);
      num_rows_tmp = hypre_CSRMatrixNumRows(A_tmp_diag);
      if (comm_pkg)
      {
         num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg);
         num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg);
         send_procs = hypre_ParCSRCommPkgSendProcs(comm_pkg);
         recv_procs = hypre_ParCSRCommPkgRecvProcs(comm_pkg);
         send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg);
         send_map_elmts = hypre_ParCSRCommPkgSendMapElmts(comm_pkg);
         recv_vec_starts = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg);
      }
      else
      {
         num_sends = 0;
         num_recvs = 0;
      }
   
      /* Compute new combined communication package */
      for (i=0; i < num_sends; i++)
      {
         this_proc = hypre_BinarySearch(L_send_procs,send_procs[i],num_sends_L);
         indx = L_send_ptr[this_proc];
         for (j=send_map_starts[i]; j < send_map_starts[i+1]; j++)
         {
	    L_send_map_elmts[indx++] = row_start + send_map_elmts[j];
         }
         L_send_ptr[this_proc] = indx;
      }
            
      cnt_map = 0;
      for (i = 0; i < num_recvs; i++)
      {
         this_proc = hypre_BinarySearch(L_recv_procs,recv_procs[i],num_recvs_L);
         indx = L_recv_ptr[this_proc];
         for (j=recv_vec_starts[i]; j < recv_vec_starts[i+1]; j++)
         {
	    remap[cnt_map++] = indx++;
         }
         L_recv_ptr[this_proc] = indx;
      }
   
      /* Compute Lambda */ 
      if (relax_type == 0)
      {
        HYPRE_Real rlx_wt = relax_weight[level];
#ifdef HYPRE_USING_OPENMP
#pragma omp for private(i) HYPRE_SMP_SCHEDULE
#endif
         for (i=0; i < num_rows_tmp; i++)
        {
           D_data[i] = rlx_wt/A_tmp_diag_data[A_tmp_diag_i[i]];
           L_diag_i[cnt_row+i] = start_diag + A_tmp_diag_i[i+1];
           L_offd_i[cnt_row+i] = start_offd + A_tmp_offd_i[i+1];
        }
      }
      else
      {
        l1_norms = l1_norms_ptr[level];
#ifdef HYPRE_USING_OPENMP
#pragma omp for private(i) HYPRE_SMP_SCHEDULE
#endif
        for (i=0; i < num_rows_tmp; i++)
        {
           D_data[i] = 1.0/l1_norms[i];
           L_diag_i[cnt_row+i] = start_diag + A_tmp_diag_i[i+1];
           L_offd_i[cnt_row+i] = start_offd + A_tmp_offd_i[i+1];
        }
      }
 
      if (num_procs > 1)
      {
         index = 0;
         for (i=0; i < num_sends; i++)
         {
            start = send_map_starts[i];
            for (j=start; j < send_map_starts[i+1]; j++)
              buf_data[index++] = D_data[send_map_elmts[j]];
         }

         comm_handle = hypre_ParCSRCommHandleCreate(1, comm_pkg,
                        buf_data, D_data_offd);
         hypre_ParCSRCommHandleDestroy(comm_handle);
      }

      for (i = 0; i < num_rows_tmp; i++)
      {
         j_indx = A_tmp_diag_i[i];
         L_diag_data[cnt_diag] = (2.0 - A_tmp_diag_data[j_indx]*D_data[i])*D_data[i];
         L_diag_j[cnt_diag++] = i+row_start;
         for (j=A_tmp_diag_i[i]+1; j < A_tmp_diag_i[i+1]; j++)
         {
             j_indx = A_tmp_diag_j[j];
             L_diag_data[cnt_diag] = (- A_tmp_diag_data[j]*D_data[j_indx])*D_data[i];
             L_diag_j[cnt_diag++] = j_indx+row_start;
         }
         for (j=A_tmp_offd_i[i]; j < A_tmp_offd_i[i+1]; j++)
         {
             j_indx = A_tmp_offd_j[j];
             L_offd_data[cnt_offd] = (- A_tmp_offd_data[j]*D_data_offd[j_indx])*D_data[i];
             L_offd_j[cnt_offd++] = remap[j_indx];
         }
      }
      cnt_row += num_rows_tmp;
   }

   if (L_send_ptr)
   {
      for (i=num_sends_L-1; i > 0; i--)
         L_send_ptr[i] = L_send_ptr[i-1];
      L_send_ptr[0] = 0;
   }
   else
      L_send_ptr = hypre_CTAlloc(HYPRE_Int,1);

   if (L_recv_ptr)
   {
      for (i=num_recvs_L-1; i > 0; i--)
         L_recv_ptr[i] = L_recv_ptr[i-1];
      L_recv_ptr[0] = 0;
   }
   else
      L_recv_ptr = hypre_CTAlloc(HYPRE_Int,1);

   L_comm_pkg = hypre_CTAlloc(hypre_ParCSRCommPkg,1);

   hypre_ParCSRCommPkgNumRecvs(L_comm_pkg) = num_recvs_L;
   hypre_ParCSRCommPkgNumSends(L_comm_pkg) = num_sends_L;
   hypre_ParCSRCommPkgRecvProcs(L_comm_pkg) = L_recv_procs;
   hypre_ParCSRCommPkgSendProcs(L_comm_pkg) = L_send_procs;
   hypre_ParCSRCommPkgRecvVecStarts(L_comm_pkg) = L_recv_ptr;
   hypre_ParCSRCommPkgSendMapStarts(L_comm_pkg) = L_send_ptr;
   hypre_ParCSRCommPkgSendMapElmts(L_comm_pkg) = L_send_map_elmts;
   hypre_ParCSRCommPkgComm(L_comm_pkg) = comm;


   Lambda = hypre_CTAlloc(hypre_ParCSRMatrix, 1);
   hypre_ParCSRMatrixDiag(Lambda) = L_diag;
   hypre_ParCSRMatrixOffd(Lambda) = L_offd;
   hypre_ParCSRMatrixCommPkg(Lambda) = L_comm_pkg;
   hypre_ParCSRMatrixComm(Lambda) = comm;
   hypre_ParCSRMatrixOwnsData(Lambda) = 1;

   hypre_ParAMGDataLambda(amg_data) = Lambda;
   hypre_ParAMGDataRtilde(amg_data) = Rtilde;
   hypre_ParAMGDataXtilde(amg_data) = Xtilde;

   hypre_TFree(D_data_offd);
   hypre_TFree(D_data);
   if (num_procs > 1) hypre_TFree(buf_data);
   hypre_TFree(remap);
   hypre_TFree(buf_data);
   hypre_TFree(level_start);

   return Solve_err_flag;
}