Exemplo n.º 1
0
HYPRE_Int 
hypre_DistributedMatrixRestoreRow( hypre_DistributedMatrix *matrix,
                             HYPRE_Int row,
                             HYPRE_Int *size,
                             HYPRE_Int **col_ind,
                             double **values )
{
   HYPRE_Int ierr = 0;

#ifdef HYPRE_TIMING
   hypre_BeginTiming( matrix->GetRow_timer );
#endif

   if ( hypre_DistributedMatrixLocalStorageType(matrix) == HYPRE_PETSC )
      ierr = hypre_DistributedMatrixRestoreRowPETSc( matrix, row, size, col_ind, values );
   else if ( hypre_DistributedMatrixLocalStorageType(matrix) == HYPRE_ISIS )
      ierr = hypre_RestoreDistributedMatrixRowISIS( matrix, row, size, col_ind, values );
   else if ( hypre_DistributedMatrixLocalStorageType(matrix) == HYPRE_PARCSR )
      ierr = hypre_DistributedMatrixRestoreRowParCSR( matrix, row, size, col_ind, values );
   else
      ierr = -1;

#ifdef HYPRE_TIMING
   hypre_EndTiming( matrix->GetRow_timer );
#endif

   return( ierr );
}
Exemplo n.º 2
0
HYPRE_Int
hypre_StructCoarsen( hypre_StructGrid  *fgrid,
                     hypre_Index        index,
                     hypre_Index        stride,
                     HYPRE_Int          prune,
                     hypre_StructGrid **cgrid_ptr )
{
   hypre_StructGrid *cgrid;

   MPI_Comm          comm;
   HYPRE_Int         ndim;

   hypre_BoxArray   *my_boxes;

   hypre_Index       periodic;
   hypre_Index       ilower, iupper;

   hypre_Box        *box;
   hypre_Box        *new_box;
   hypre_Box        *bounding_box;

   HYPRE_Int         i, j, myid, count;
   HYPRE_Int         info_size, max_nentries;
   HYPRE_Int         num_entries;
   HYPRE_Int        *fids, *cids;
   hypre_Index       new_dist;
   hypre_IndexRef    max_distance;
   HYPRE_Int         proc, id;
   HYPRE_Int         coarsen_factor, known;
   HYPRE_Int         num, last_proc;
#if 0
   hypre_StructAssumedPart *fap = NULL, *cap = NULL;
#endif
   hypre_BoxManager   *fboxman, *cboxman;

   hypre_BoxManEntry *entries;
   hypre_BoxManEntry  *entry;
     
   void               *entry_info = NULL;
 
#if TIME_DEBUG  
   HYPRE_Int tindex;
   char new_title[80];
   hypre_sprintf(new_title,"Coarsen.%d",s_coarsen_num);
   tindex = hypre_InitializeTiming(new_title);
   s_coarsen_num++;

   hypre_BeginTiming(tindex);
#endif

   hypre_SetIndex(ilower, 0);
   hypre_SetIndex(iupper, 0);

   /* get relevant information from the fine grid */
   fids = hypre_StructGridIDs(fgrid);
   fboxman = hypre_StructGridBoxMan(fgrid);
   comm  = hypre_StructGridComm(fgrid);
   ndim  = hypre_StructGridNDim(fgrid);
   max_distance = hypre_StructGridMaxDistance(fgrid);
   
   /* initial */
   hypre_MPI_Comm_rank(comm, &myid );

   /* create new coarse grid */
   hypre_StructGridCreate(comm, ndim, &cgrid);

   /* coarsen my boxes and create the coarse grid ids (same as fgrid) */
   my_boxes = hypre_BoxArrayDuplicate(hypre_StructGridBoxes(fgrid));
   cids = hypre_TAlloc(HYPRE_Int,  hypre_BoxArraySize(my_boxes));
   for (i = 0; i < hypre_BoxArraySize(my_boxes); i++)
   {
      box = hypre_BoxArrayBox(my_boxes, i);
      hypre_StructCoarsenBox(box, index, stride);
      cids[i] = fids[i];
   }
   
   /* prune? */
   /* zero volume boxes are needed when forming P and P^T */ 
   if (prune)
   {
      count = 0;    
      hypre_ForBoxI(i, my_boxes)
      {
         box = hypre_BoxArrayBox(my_boxes, i);
         if (hypre_BoxVolume(box))
         {
            hypre_CopyBox(box, hypre_BoxArrayBox(my_boxes, count));
            cids[count] = cids[i];
            count++;
         }
      }
      hypre_BoxArraySetSize(my_boxes, count);
   }
int
hypre_SMGResidual( void               *residual_vdata,
                   hypre_StructMatrix *A,
                   hypre_StructVector *x,
                   hypre_StructVector *b,
                   hypre_StructVector *r              )
{
   int ierr = 0;

   hypre_SMGResidualData  *residual_data = residual_vdata;

   hypre_IndexRef          base_stride = (residual_data -> base_stride);
   hypre_BoxArray         *base_points = (residual_data -> base_points);
   hypre_ComputePkg       *compute_pkg = (residual_data -> compute_pkg);

   hypre_CommHandle       *comm_handle;
                       
   hypre_BoxArrayArray    *compute_box_aa;
   hypre_BoxArray         *compute_box_a;
   hypre_Box              *compute_box;
                       
   hypre_Box              *A_data_box;
   hypre_Box              *x_data_box;
   hypre_Box              *b_data_box;
   hypre_Box              *r_data_box;
                       
   int                     Ai;
   int                     xi;
   int                     bi;
   int                     ri;
                         
   double                 *Ap;
   double                 *xp;
   double                 *bp;
   double                 *rp;
                       
   hypre_Index             loop_size;
   hypre_IndexRef          start;
                       
   hypre_StructStencil    *stencil;
   hypre_Index            *stencil_shape;
   int                     stencil_size;

   int                     compute_i, i, j, si;
   int                     loopi, loopj, loopk;

   hypre_BeginTiming(residual_data -> time_index);

   /*-----------------------------------------------------------------------
    * Compute residual r = b - Ax
    *-----------------------------------------------------------------------*/

   stencil       = hypre_StructMatrixStencil(A);
   stencil_shape = hypre_StructStencilShape(stencil);
   stencil_size  = hypre_StructStencilSize(stencil);

   for (compute_i = 0; compute_i < 2; compute_i++)
   {
      switch(compute_i)
      {
         case 0:
         {
            xp = hypre_StructVectorData(x);
            hypre_InitializeIndtComputations(compute_pkg, xp, &comm_handle);
            compute_box_aa = hypre_ComputePkgIndtBoxes(compute_pkg);

            /*----------------------------------------
             * Copy b into r
             *----------------------------------------*/

            compute_box_a = base_points;
            hypre_ForBoxI(i, compute_box_a)
               {
                  compute_box = hypre_BoxArrayBox(compute_box_a, i);
                  start = hypre_BoxIMin(compute_box);

                  b_data_box =
                     hypre_BoxArrayBox(hypre_StructVectorDataSpace(b), i);
                  r_data_box =
                     hypre_BoxArrayBox(hypre_StructVectorDataSpace(r), i);

                  bp = hypre_StructVectorBoxData(b, i);
                  rp = hypre_StructVectorBoxData(r, i);

                  hypre_BoxGetStrideSize(compute_box, base_stride, loop_size);
                  hypre_BoxLoop2Begin(loop_size,
                                      b_data_box, start, base_stride, bi,
                                      r_data_box, start, base_stride, ri);
#define HYPRE_BOX_SMP_PRIVATE loopk,loopi,loopj,bi,ri
#include "hypre_box_smp_forloop.h"
                  hypre_BoxLoop2For(loopi, loopj, loopk, bi, ri)
                     {
                        rp[ri] = bp[bi];
                     }
                  hypre_BoxLoop2End(bi, ri);
               }
         }
         break;

         case 1:
         {
            hypre_FinalizeIndtComputations(comm_handle);
            compute_box_aa = hypre_ComputePkgDeptBoxes(compute_pkg);
         }
         break;
      }

      /*--------------------------------------------------------------------
       * Compute r -= A*x
       *--------------------------------------------------------------------*/

      hypre_ForBoxArrayI(i, compute_box_aa)
         {
            compute_box_a = hypre_BoxArrayArrayBoxArray(compute_box_aa, i);

            A_data_box = hypre_BoxArrayBox(hypre_StructMatrixDataSpace(A), i);
            x_data_box = hypre_BoxArrayBox(hypre_StructVectorDataSpace(x), i);
            r_data_box = hypre_BoxArrayBox(hypre_StructVectorDataSpace(r), i);

            rp = hypre_StructVectorBoxData(r, i);

            hypre_ForBoxI(j, compute_box_a)
               {
                  compute_box = hypre_BoxArrayBox(compute_box_a, j);

                  start  = hypre_BoxIMin(compute_box);

                  for (si = 0; si < stencil_size; si++)
                  {
                     Ap = hypre_StructMatrixBoxData(A, i, si);
                     xp = hypre_StructVectorBoxData(x, i) +
                        hypre_BoxOffsetDistance(x_data_box, stencil_shape[si]);

                     hypre_BoxGetStrideSize(compute_box, base_stride,
                                            loop_size);
                     hypre_BoxLoop3Begin(loop_size,
                                         A_data_box, start, base_stride, Ai,
                                         x_data_box, start, base_stride, xi,
                                         r_data_box, start, base_stride, ri);
#define HYPRE_BOX_SMP_PRIVATE loopk,loopi,loopj,Ai,xi,ri
#include "hypre_box_smp_forloop.h"
                     hypre_BoxLoop3For(loopi, loopj, loopk, Ai, xi, ri)
                        {
                           rp[ri] -= Ap[Ai] * xp[xi];
                        }
                     hypre_BoxLoop3End(Ai, xi, ri);
                  }
Exemplo n.º 4
0
HYPRE_Int main( HYPRE_Int   argc, char *argv[] )
{
   HYPRE_Int                 arg_index;
   HYPRE_Int                 print_usage;
   HYPRE_Int                 build_matrix_arg_index;
   HYPRE_Int                 solver_id;
   HYPRE_Int                 ierr,i,j; 
   HYPRE_Int                 num_iterations; 

   HYPRE_ParCSRMatrix  parcsr_A;
   HYPRE_Int                 num_procs, myid;
   HYPRE_Int                 local_row;
   HYPRE_Int		       time_index;
   MPI_Comm            comm;
   HYPRE_Int                 M, N;
   HYPRE_Int                 first_local_row, last_local_row;
   HYPRE_Int                 first_local_col, last_local_col;
   HYPRE_Int                 size, *col_ind;
   HYPRE_Real          *values;

   /* parameters for BoomerAMG */
   HYPRE_Real          strong_threshold;
   HYPRE_Int                 num_grid_sweeps;  
   HYPRE_Real          relax_weight; 

   /* parameters for GMRES */
   HYPRE_Int	               k_dim;

   char *paramString = new char[100];

   /*-----------------------------------------------------------
    * Initialize some stuff
    *-----------------------------------------------------------*/

   hypre_MPI_Init(&argc, &argv);
   hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &num_procs );
   hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &myid );

   /*-----------------------------------------------------------
    * Set defaults
    *-----------------------------------------------------------*/
 
   build_matrix_arg_index = argc;
   solver_id              = 0;
   strong_threshold       = 0.25;
   num_grid_sweeps        = 2;
   relax_weight           = 0.5;
   k_dim                  = 20;

   /*-----------------------------------------------------------
    * Parse command line
    *-----------------------------------------------------------*/
 
   print_usage = 0;
   arg_index = 1;

   while ( (arg_index < argc) && (!print_usage) )
   {
      if ( strcmp(argv[arg_index], "-solver") == 0 )
      {
         arg_index++;
         solver_id = atoi(argv[arg_index++]);
      }
      else if ( strcmp(argv[arg_index], "-dbg") == 0 )
      {
         arg_index++;
         atoi(argv[arg_index++]);
      }
      else if ( strcmp(argv[arg_index], "-help") == 0 )
      {
         print_usage = 1;
      }
      else
      {
         arg_index++;
      }
   }

   /*-----------------------------------------------------------
    * Print usage info
    *-----------------------------------------------------------*/
 
   if ( (print_usage) && (myid == 0) )
   {
      hypre_printf("\n");
      hypre_printf("Usage: %s [<options>]\n", argv[0]);
      hypre_printf("\n");
      hypre_printf("  -solver <ID>           : solver ID\n");
      hypre_printf("       0=DS-PCG      1=ParaSails-PCG \n");
      hypre_printf("       2=AMG-PCG     3=DS-GMRES     \n");
      hypre_printf("       4=PILUT-GMRES 5=AMG-GMRES    \n");     
      hypre_printf("\n");
      hypre_printf("  -rlx <val>             : relaxation type\n");
      hypre_printf("       0=Weighted Jacobi  \n");
      hypre_printf("       1=Gauss-Seidel (very slow!)  \n");
      hypre_printf("       3=Hybrid Jacobi/Gauss-Seidel  \n");
      hypre_printf("\n");  
      exit(1);
   }

   /*-----------------------------------------------------------
    * Print driver parameters
    *-----------------------------------------------------------*/
 
   if (myid == 0)
   {
      hypre_printf("Running with these driver parameters:\n");
      hypre_printf("  solver ID    = %d\n", solver_id);
   }

   /*-----------------------------------------------------------
    * Set up matrix
    *-----------------------------------------------------------*/

   strcpy(paramString, "LS Interface");
   time_index = hypre_InitializeTiming(paramString);
   hypre_BeginTiming(time_index);

   BuildParLaplacian27pt(argc, argv, build_matrix_arg_index, &parcsr_A);
    
   /*-----------------------------------------------------------
    * Copy the parcsr matrix into the LSI through interface calls
    *-----------------------------------------------------------*/

   ierr = HYPRE_ParCSRMatrixGetComm( parcsr_A, &comm );
   ierr += HYPRE_ParCSRMatrixGetDims( parcsr_A, &M, &N );
   ierr = HYPRE_ParCSRMatrixGetLocalRange( parcsr_A,
             &first_local_row, &last_local_row ,
             &first_local_col, &last_local_col );

   HYPRE_LinSysCore H(hypre_MPI_COMM_WORLD);
   HYPRE_Int numLocalEqns = last_local_row - first_local_row + 1;
   H.createMatricesAndVectors(M,first_local_row+1,numLocalEqns);

   HYPRE_Int index;
   HYPRE_Int *rowLengths = new HYPRE_Int[numLocalEqns];
   HYPRE_Int **colIndices = new HYPRE_Int*[numLocalEqns];

   local_row = 0;
   for (i=first_local_row; i<= last_local_row; i++)
   {
      ierr += HYPRE_ParCSRMatrixGetRow(parcsr_A,i,&size,&col_ind,&values );
      rowLengths[local_row] = size;
      colIndices[local_row] = new HYPRE_Int[size];
      for (j=0; j<size; j++) colIndices[local_row][j] = col_ind[j] + 1;
      local_row++;
      HYPRE_ParCSRMatrixRestoreRow(parcsr_A,i,&size,&col_ind,&values);
   }
   H.allocateMatrix(colIndices, rowLengths);
   delete [] rowLengths;
   for (i=0; i< numLocalEqns; i++) delete [] colIndices[i];
   delete [] colIndices;

   HYPRE_Int *newColInd;

   for (i=first_local_row; i<= last_local_row; i++)
   {
      ierr += HYPRE_ParCSRMatrixGetRow(parcsr_A,i,&size,&col_ind,&values );
      newColInd = new HYPRE_Int[size];
      for (j=0; j<size; j++) newColInd[j] = col_ind[j] + 1;
      H.sumIntoSystemMatrix(i+1,size,(const HYPRE_Real*)values,
                                     (const HYPRE_Int*)newColInd);
      delete [] newColInd;
      ierr += HYPRE_ParCSRMatrixRestoreRow(parcsr_A,i,&size,&col_ind,&values);
   }
   H.matrixLoadComplete();
   HYPRE_ParCSRMatrixDestroy(parcsr_A);

   /*-----------------------------------------------------------
    * Set up the RHS and initial guess
    *-----------------------------------------------------------*/

   HYPRE_Real ddata=1.0;
   HYPRE_Int  status;

   for (i=first_local_row; i<= last_local_row; i++)
   {
      index = i + 1;
      H.sumIntoRHSVector(1,(const HYPRE_Real*) &ddata, (const HYPRE_Int*) &index);
   }

   hypre_EndTiming(time_index);
   strcpy(paramString, "LS Interface");
   hypre_PrintTiming(paramString, hypre_MPI_COMM_WORLD);
   hypre_FinalizeTiming(time_index);
   hypre_ClearTiming();
 
   /*-----------------------------------------------------------
    * Solve the system using PCG 
    *-----------------------------------------------------------*/

   if ( solver_id == 0 ) 
   {
      strcpy(paramString, "solver cg");
      H.parameters(1, &paramString);
      if (myid == 0) hypre_printf("Solver: DS-PCG\n");

      strcpy(paramString, "preconditioner diagonal");
      H.parameters(1, &paramString);
   } 
   else if ( solver_id == 1 )
   {
      strcpy(paramString, "solver cg");
      H.parameters(1, &paramString);
      if (myid == 0) hypre_printf("Solver: ParaSails-PCG\n");

      strcpy(paramString, "preconditioner parasails");
      H.parameters(1, &paramString);
      strcpy(paramString, "parasailsNlevels 1");
      H.parameters(1, &paramString);
      strcpy(paramString, "parasailsThreshold 0.1");
      H.parameters(1, &paramString);
   }
   else if ( solver_id == 2 )
   {
      strcpy(paramString, "solver cg");
      H.parameters(1, &paramString);
      if (myid == 0) hypre_printf("Solver: AMG-PCG\n");

      strcpy(paramString, "preconditioner boomeramg");
      H.parameters(1, &paramString);
      strcpy(paramString, "amgCoarsenType falgout");
      H.parameters(1, &paramString);
      hypre_sprintf(paramString, "amgStrongThreshold %e", strong_threshold);
      H.parameters(1, &paramString);
      hypre_sprintf(paramString, "amgNumSweeps %d", num_grid_sweeps);
      H.parameters(1, &paramString);
      strcpy(paramString, "amgRelaxType jacobi");
      H.parameters(1, &paramString);
      hypre_sprintf(paramString, "amgRelaxWeight %e", relax_weight);
      H.parameters(1, &paramString);
   }
   else if ( solver_id == 3 )
   {
      strcpy(paramString, "solver cg");
      H.parameters(1, &paramString);
      if (myid == 0) hypre_printf("Solver: Poly-PCG\n");

      strcpy(paramString, "preconditioner poly");
      H.parameters(1, &paramString);
      strcpy(paramString, "polyOrder 9");
      H.parameters(1, &paramString);
   }
   else if ( solver_id == 4 )
   {
      strcpy(paramString, "solver gmres");
      H.parameters(1, &paramString);
      hypre_sprintf(paramString, "gmresDim %d", k_dim);
      H.parameters(1, &paramString);
      if (myid == 0) hypre_printf("Solver: DS-GMRES\n");

      strcpy(paramString, "preconditioner diagonal");
      H.parameters(1, &paramString);
   }
   else if ( solver_id == 5 ) 
   {
      strcpy(paramString, "solver gmres");
      H.parameters(1, &paramString);
      hypre_sprintf(paramString, "gmresDim %d", k_dim);
      H.parameters(1, &paramString);
      if (myid == 0) hypre_printf("Solver: PILUT-GMRES\n");

      strcpy(paramString, "preconditioner pilut");
      H.parameters(1, &paramString);
      strcpy(paramString, "pilutRowSize 0");
      H.parameters(1, &paramString);
      strcpy(paramString, "pilutDropTol 0.0");
      H.parameters(1, &paramString);
   }
   else if ( solver_id == 6 )
   {
      strcpy(paramString, "solver gmres");
      H.parameters(1, &paramString);
      hypre_sprintf(paramString, "gmresDim %d", k_dim);
      H.parameters(1, &paramString);
      if (myid == 0) hypre_printf("Solver: AMG-GMRES\n");

      strcpy(paramString, "preconditioner boomeramg");
      H.parameters(1, &paramString);
      strcpy(paramString, "amgCoarsenType falgout");
      H.parameters(1, &paramString);
      hypre_sprintf(paramString, "amgStrongThreshold %e", strong_threshold);
      H.parameters(1, &paramString);
      hypre_sprintf(paramString, "amgNumSweeps %d", num_grid_sweeps);
      H.parameters(1, &paramString);
      strcpy(paramString, "amgRelaxType jacobi");
      H.parameters(1, &paramString);
      hypre_sprintf(paramString, "amgRelaxWeight %e", relax_weight);
      H.parameters(1, &paramString);
   }
   else if ( solver_id == 7 )
   {
      strcpy(paramString, "solver gmres");
      H.parameters(1, &paramString);
      hypre_sprintf(paramString, "gmresDim %d", k_dim);
      H.parameters(1, &paramString);
      if (myid == 0) hypre_printf("Solver: DDILUT-GMRES\n");

      strcpy(paramString, "preconditioner ddilut");
      H.parameters(1, &paramString);
      strcpy(paramString, "ddilutFillin 5.0");
      H.parameters(1, &paramString);
      strcpy(paramString, "ddilutDropTol 0.0");
      H.parameters(1, &paramString);
   }
   else if ( solver_id == 8 )
   {
      strcpy(paramString, "solver gmres");
      H.parameters(1, &paramString);
      hypre_sprintf(paramString, "gmresDim %d", k_dim);
      H.parameters(1, &paramString);
      if (myid == 0) hypre_printf("Solver: POLY-GMRES\n");

      strcpy(paramString, "preconditioner poly");
      H.parameters(1, &paramString);
      strcpy(paramString, "polyOrder 5");
      H.parameters(1, &paramString);
   }
 
   strcpy(paramString, "Krylov Solve");
   time_index = hypre_InitializeTiming(paramString);
   hypre_BeginTiming(time_index);
 
   H.launchSolver(status, num_iterations);
 
   hypre_EndTiming(time_index);
   strcpy(paramString, "Solve phase times");
   hypre_PrintTiming(paramString, hypre_MPI_COMM_WORLD);
   hypre_FinalizeTiming(time_index);
   hypre_ClearTiming();
 
   if (myid == 0)
   {
      hypre_printf("\n Iterations = %d\n", num_iterations);
      hypre_printf("\n");
   }
 
   /*-----------------------------------------------------------
    * Finalize things
    *-----------------------------------------------------------*/

   delete [] paramString;
   hypre_MPI_Finalize();

   return (0);
}
Exemplo n.º 5
0
int
hypre_PointRelax( void               *relax_vdata,
                  hypre_StructMatrix *A,
                  hypre_StructVector *b,
                  hypre_StructVector *x           )
{
   hypre_PointRelaxData *relax_data = (hypre_PointRelaxData *)relax_vdata;

   int                    max_iter         = (relax_data -> max_iter);
   int                    zero_guess       = (relax_data -> zero_guess);
   double                 weight           = (relax_data -> weight);
   int                    num_pointsets    = (relax_data -> num_pointsets);
   int                   *pointset_ranks   = (relax_data -> pointset_ranks);
   hypre_Index           *pointset_strides = (relax_data -> pointset_strides);
   hypre_StructVector    *t                = (relax_data -> t);
   int                    diag_rank        = (relax_data -> diag_rank);
   hypre_ComputePkg     **compute_pkgs     = (relax_data -> compute_pkgs);

   hypre_ComputePkg      *compute_pkg;
   hypre_CommHandle      *comm_handle;
                        
   hypre_BoxArrayArray   *compute_box_aa;
   hypre_BoxArray        *compute_box_a;
   hypre_Box             *compute_box;
                        
   hypre_Box             *A_data_box;
   hypre_Box             *b_data_box;
   hypre_Box             *x_data_box;
   hypre_Box             *t_data_box;
                        
   int                    Ai;
   int                    bi;
   int                    xi;
   int                    ti;
                        
   double                *Ap;
   double                *bp;
   double                *xp;
   double                *tp;
                        
   hypre_IndexRef         stride;
   hypre_IndexRef         start;
   hypre_Index            loop_size;
                        
   hypre_StructStencil   *stencil;
   hypre_Index           *stencil_shape;
   int                    stencil_size;
                        
   int                    iter, p, compute_i, i, j, si;
   int                    loopi, loopj, loopk;
   int                    pointset;

   int                    ierr = 0;

   /*----------------------------------------------------------
    * Initialize some things and deal with special cases
    *----------------------------------------------------------*/

   hypre_BeginTiming(relax_data -> time_index);

   hypre_StructMatrixDestroy(relax_data -> A);
   hypre_StructVectorDestroy(relax_data -> b);
   hypre_StructVectorDestroy(relax_data -> x);
   (relax_data -> A) = hypre_StructMatrixRef(A);
   (relax_data -> x) = hypre_StructVectorRef(x);
   (relax_data -> b) = hypre_StructVectorRef(b);

   (relax_data -> num_iterations) = 0;

   /* if max_iter is zero, return */
   if (max_iter == 0)
   {
      /* if using a zero initial guess, return zero */
      if (zero_guess)
      {
         hypre_StructVectorSetConstantValues(x, 0.0);
      }

      hypre_EndTiming(relax_data -> time_index);
      return ierr;
   }

   stencil       = hypre_StructMatrixStencil(A);
   stencil_shape = hypre_StructStencilShape(stencil);
   stencil_size  = hypre_StructStencilSize(stencil);

   /*----------------------------------------------------------
    * Do zero_guess iteration
    *----------------------------------------------------------*/

   p    = 0;
   iter = 0;

   if (zero_guess)
   {
      pointset = pointset_ranks[p];
      compute_pkg = compute_pkgs[pointset];
      stride = pointset_strides[pointset];

      for (compute_i = 0; compute_i < 2; compute_i++)
      {
         switch(compute_i)
         {
            case 0:
            {
               compute_box_aa = hypre_ComputePkgIndtBoxes(compute_pkg);
            }
            break;

            case 1:
            {
               compute_box_aa = hypre_ComputePkgDeptBoxes(compute_pkg);
            }
            break;
         }

         hypre_ForBoxArrayI(i, compute_box_aa)
            {
               compute_box_a = hypre_BoxArrayArrayBoxArray(compute_box_aa, i);

               A_data_box =
                  hypre_BoxArrayBox(hypre_StructMatrixDataSpace(A), i);
               b_data_box =
                  hypre_BoxArrayBox(hypre_StructVectorDataSpace(b), i);
               x_data_box =
                  hypre_BoxArrayBox(hypre_StructVectorDataSpace(x), i);

               Ap = hypre_StructMatrixBoxData(A, i, diag_rank);
               bp = hypre_StructVectorBoxData(b, i);
               xp = hypre_StructVectorBoxData(x, i);

               hypre_ForBoxI(j, compute_box_a)
                  {
                     compute_box = hypre_BoxArrayBox(compute_box_a, j);

                     start  = hypre_BoxIMin(compute_box);
                     hypre_BoxGetStrideSize(compute_box, stride, loop_size);

                     hypre_BoxLoop3Begin(loop_size,
                                         A_data_box, start, stride, Ai,
                                         b_data_box, start, stride, bi,
                                         x_data_box, start, stride, xi);
#define HYPRE_BOX_SMP_PRIVATE loopk,loopi,loopj,Ai,bi,xi
#include "hypre_box_smp_forloop.h"
                     hypre_BoxLoop3For(loopi, loopj, loopk, Ai, bi, xi)
                        {
                           xp[xi] = bp[bi] / Ap[Ai];
                        }
                     hypre_BoxLoop3End(Ai, bi, xi);
                  }
            }
      }
Exemplo n.º 6
0
hypre_int
main( hypre_int argc,
      char *argv[] )
{
   HYPRE_Int           arg_index;
   HYPRE_Int           print_usage;
   HYPRE_Int           nx, ny, nz;
   HYPRE_Int           P, Q, R;
   HYPRE_Int           bx, by, bz;

   HYPRE_StructGrid    from_grid, to_grid;
   HYPRE_StructVector  from_vector, to_vector, check_vector;
   HYPRE_CommPkg       comm_pkg;

   HYPRE_Int           time_index;
   HYPRE_Int           num_procs, myid;

   HYPRE_Int           p, q, r;
   HYPRE_Int           dim;
   HYPRE_Int           nblocks ;
   HYPRE_Int         **ilower, **iupper, **iupper2;
   HYPRE_Int           istart[3];
   HYPRE_Int           i, ix, iy, iz, ib;
   HYPRE_Int           print_system = 0;

   HYPRE_Real          check;

   /*-----------------------------------------------------------
    * Initialize some stuff
    *-----------------------------------------------------------*/

   /* Initialize MPI */
   hypre_MPI_Init(&argc, &argv);

   hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &num_procs );
   hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &myid );

   /*-----------------------------------------------------------
    * Set defaults
    *-----------------------------------------------------------*/
 
   dim = 3;

   nx = 2;
   ny = 2;
   nz = 2;

   P  = num_procs;
   Q  = 1;
   R  = 1;

   bx = 1;
   by = 1;
   bz = 1;

   istart[0] = 1;
   istart[1] = 1;
   istart[2] = 1;

   /*-----------------------------------------------------------
    * Parse command line
    *-----------------------------------------------------------*/
 
   print_usage = 0;
   arg_index = 1;
   while (arg_index < argc)
   {
      if ( strcmp(argv[arg_index], "-n") == 0 )
      {
         arg_index++;
         nx = atoi(argv[arg_index++]);
         ny = atoi(argv[arg_index++]);
         nz = atoi(argv[arg_index++]);
      }
      else if ( strcmp(argv[arg_index], "-istart") == 0 )
      {
         arg_index++;
         istart[0] = atoi(argv[arg_index++]);
         istart[1] = atoi(argv[arg_index++]);
         istart[2] = atoi(argv[arg_index++]);
      }
      else if ( strcmp(argv[arg_index], "-P") == 0 )
      {
         arg_index++;
         P  = atoi(argv[arg_index++]);
         Q  = atoi(argv[arg_index++]);
         R  = atoi(argv[arg_index++]);
      }
      else if ( strcmp(argv[arg_index], "-b") == 0 )
      {
         arg_index++;
         bx = atoi(argv[arg_index++]);
         by = atoi(argv[arg_index++]);
         bz = atoi(argv[arg_index++]);
      }
      else if ( strcmp(argv[arg_index], "-d") == 0 )
      {
         arg_index++;
         dim = atoi(argv[arg_index++]);
      }
      else if ( strcmp(argv[arg_index], "-print") == 0 )
      {
         arg_index++;
         print_system = 1;
      }
      else if ( strcmp(argv[arg_index], "-help") == 0 )
      {
         print_usage = 1;
         break;
      }
      else
      {
         arg_index++;
      }
   }

   /*-----------------------------------------------------------
    * Print usage info
    *-----------------------------------------------------------*/
 
   if ( (print_usage) && (myid == 0) )
   {
      hypre_printf("\n");
      hypre_printf("Usage: %s [<options>]\n", argv[0]);
      hypre_printf("\n");
      hypre_printf("  -n <nx> <ny> <nz>   : problem size per block\n");
      hypre_printf("  -istart <ix> <iy> <iz> : start of box\n");
      hypre_printf("  -P <Px> <Py> <Pz>   : processor topology\n");
      hypre_printf("  -b <bx> <by> <bz>   : blocking per processor\n");
      hypre_printf("  -d <dim>            : problem dimension (2 or 3)\n");
      hypre_printf("  -print              : print vectors\n");
      hypre_printf("\n");
   }

   if ( print_usage )
   {
      exit(1);
   }

   /*-----------------------------------------------------------
    * Check a few things
    *-----------------------------------------------------------*/

   if ((P*Q*R) > num_procs)
   {
      if (myid == 0)
      {
         hypre_printf("Error: PxQxR is more than the number of processors\n");
      }
      exit(1);
   }
   else if ((P*Q*R) < num_procs)
   {
      if (myid == 0)
      {
         hypre_printf("Warning: PxQxR is less than the number of processors\n");
      }
   }

   /*-----------------------------------------------------------
    * Print driver parameters
    *-----------------------------------------------------------*/
 
   if (myid == 0)
   {
      hypre_printf("Running with these driver parameters:\n");
      hypre_printf("  (nx, ny, nz)    = (%d, %d, %d)\n", nx, ny, nz);
      hypre_printf("  (ix, iy, iz)    = (%d, %d, %d)\n",
                   istart[0],istart[1],istart[2]);
      hypre_printf("  (Px, Py, Pz)    = (%d, %d, %d)\n", P,  Q,  R);
      hypre_printf("  (bx, by, bz)    = (%d, %d, %d)\n", bx, by, bz);
      hypre_printf("  dim             = %d\n", dim);
   }

   /*-----------------------------------------------------------
    * Set up the stencil structure (7 points) when matrix is NOT read from file
    * Set up the grid structure used when NO files are read
    *-----------------------------------------------------------*/

   switch (dim)
   {
      case 1:
         nblocks = bx;
         p = myid % P;
         break;

      case 2:
         nblocks = bx*by;
         p = myid % P;
         q = (( myid - p)/P) % Q;
         break;

      case 3:
         nblocks = bx*by*bz;
         p = myid % P;
         q = (( myid - p)/P) % Q;
         r = ( myid - p - P*q)/( P*Q );
         break;
   }

   if (myid >= (P*Q*R))
   {
      /* My processor has no data on it */
      nblocks = bx = by = bz = 0;
   }

   /*-----------------------------------------------------------
    * prepare space for the extents
    *-----------------------------------------------------------*/

   ilower = hypre_CTAlloc(HYPRE_Int*, nblocks);
   iupper = hypre_CTAlloc(HYPRE_Int*, nblocks);
   iupper2 = hypre_CTAlloc(HYPRE_Int*, nblocks);
   for (i = 0; i < nblocks; i++)
   {
      ilower[i] = hypre_CTAlloc(HYPRE_Int, dim);
      iupper[i] = hypre_CTAlloc(HYPRE_Int, dim);
      iupper2[i] = hypre_CTAlloc(HYPRE_Int, dim);
   }

   ib = 0;
   switch (dim)
   {
      case 1:
         for (ix = 0; ix < bx; ix++)
         {
            ilower[ib][0] = istart[0]+ nx*(bx*p+ix);
            iupper[ib][0] = istart[0]+ nx*(bx*p+ix+1) - 1;
            iupper2[ib][0] = iupper[ib][0];
            if ( (ix == (bx-1)) && (p < (P-1)) )
               iupper2[ib][0] = iupper[ib][0] + 1;
            ib++;
         }
         break;
      case 2:
         for (iy = 0; iy < by; iy++)
            for (ix = 0; ix < bx; ix++)
            {
               ilower[ib][0] = istart[0]+ nx*(bx*p+ix);
               iupper[ib][0] = istart[0]+ nx*(bx*p+ix+1) - 1;
               ilower[ib][1] = istart[1]+ ny*(by*q+iy);
               iupper[ib][1] = istart[1]+ ny*(by*q+iy+1) - 1;
               iupper2[ib][0] = iupper[ib][0];
               iupper2[ib][1] = iupper[ib][1];
               if ( (ix == (bx-1)) && (p < (P-1)) )
                  iupper2[ib][0] = iupper[ib][0] + 1;
               if ( (iy == (by-1)) && (q < (Q-1)) )
                  iupper2[ib][1] = iupper[ib][1] + 1;
               ib++;
            }
         break;
      case 3:
         for (iz = 0; iz < bz; iz++)
            for (iy = 0; iy < by; iy++)
               for (ix = 0; ix < bx; ix++)
               {
                  ilower[ib][0] = istart[0]+ nx*(bx*p+ix);
                  iupper[ib][0] = istart[0]+ nx*(bx*p+ix+1) - 1;
                  ilower[ib][1] = istart[1]+ ny*(by*q+iy);
                  iupper[ib][1] = istart[1]+ ny*(by*q+iy+1) - 1;
                  ilower[ib][2] = istart[2]+ nz*(bz*r+iz);
                  iupper[ib][2] = istart[2]+ nz*(bz*r+iz+1) - 1;
                  iupper2[ib][0] = iupper[ib][0];
                  iupper2[ib][1] = iupper[ib][1];
                  iupper2[ib][2] = iupper[ib][2];
                  if ( (ix == (bx-1)) && (p < (P-1)) )
                     iupper2[ib][0] = iupper[ib][0] + 1;
                  if ( (iy == (by-1)) && (q < (Q-1)) )
                     iupper2[ib][1] = iupper[ib][1] + 1;
                  if ( (iz == (bz-1)) && (r < (R-1)) )
                     iupper2[ib][2] = iupper[ib][2] + 1;
                  ib++;
               }
         break;
   }

   HYPRE_StructGridCreate(hypre_MPI_COMM_WORLD, dim, &from_grid);
   HYPRE_StructGridCreate(hypre_MPI_COMM_WORLD, dim, &to_grid);
   for (ib = 0; ib < nblocks; ib++)
   {
      HYPRE_StructGridSetExtents(from_grid, ilower[ib], iupper[ib]);
      HYPRE_StructGridSetExtents(to_grid, ilower[ib], iupper2[ib]);
   }
   HYPRE_StructGridAssemble(from_grid);
   HYPRE_StructGridAssemble(to_grid);

   /*-----------------------------------------------------------
    * Set up the vectors
    *-----------------------------------------------------------*/

   HYPRE_StructVectorCreate(hypre_MPI_COMM_WORLD, from_grid, &from_vector);
   HYPRE_StructVectorInitialize(from_vector);
   AddValuesVector(from_grid, from_vector, 1.0);
   HYPRE_StructVectorAssemble(from_vector);

   HYPRE_StructVectorCreate(hypre_MPI_COMM_WORLD, to_grid, &to_vector);
   HYPRE_StructVectorInitialize(to_vector);
   AddValuesVector(to_grid, to_vector, 0.0);
   HYPRE_StructVectorAssemble(to_vector);

   /* Vector used to check the migration */
   HYPRE_StructVectorCreate(hypre_MPI_COMM_WORLD, to_grid, &check_vector);
   HYPRE_StructVectorInitialize(check_vector);
   AddValuesVector(to_grid, check_vector, 1.0);
   HYPRE_StructVectorAssemble(check_vector);

   /*-----------------------------------------------------------
    * Migrate
    *-----------------------------------------------------------*/

   time_index = hypre_InitializeTiming("Struct Migrate");
   hypre_BeginTiming(time_index);

   HYPRE_StructVectorGetMigrateCommPkg(from_vector, to_vector, &comm_pkg);
   HYPRE_StructVectorMigrate(comm_pkg, from_vector, to_vector);
   HYPRE_CommPkgDestroy(comm_pkg);

   hypre_EndTiming(time_index);
   hypre_PrintTiming("Struct Migrate", hypre_MPI_COMM_WORLD);
   hypre_FinalizeTiming(time_index);

   /*-----------------------------------------------------------
    * Check the migration and print the result
    *-----------------------------------------------------------*/

   hypre_StructAxpy(-1.0, to_vector, check_vector);
   check = hypre_StructInnerProd (check_vector, check_vector);

   if (myid == 0)
   {
      printf("\nCheck = %1.0f (success = 0)\n\n", check);
   }

   /*-----------------------------------------------------------
    * Print out the vectors
    *-----------------------------------------------------------*/

   if (print_system)
   {
      HYPRE_StructVectorPrint("struct_migrate.out.xfr", from_vector, 0);
      HYPRE_StructVectorPrint("struct_migrate.out.xto", to_vector, 0);
   }

   /*-----------------------------------------------------------
    * Finalize things
    *-----------------------------------------------------------*/

   HYPRE_StructGridDestroy(from_grid);
   HYPRE_StructGridDestroy(to_grid);
   
   for (i = 0; i < nblocks; i++)
   {
      hypre_TFree(ilower[i]);
      hypre_TFree(iupper[i]);
      hypre_TFree(iupper2[i]);
   }
   hypre_TFree(ilower);
   hypre_TFree(iupper);
   hypre_TFree(iupper2);

   HYPRE_StructVectorDestroy(from_vector);
   HYPRE_StructVectorDestroy(to_vector);
   HYPRE_StructVectorDestroy(check_vector);

   /* Finalize MPI */
   hypre_MPI_Finalize();

   return (0);
}
Exemplo n.º 7
0
int
hypre_SemiRestrict( void               *restrict_vdata,
                    hypre_StructMatrix *R,
                    hypre_StructVector *r,
                    hypre_StructVector *rc             )
{
   int ierr = 0;

   hypre_SemiRestrictData *restrict_data = (hypre_SemiRestrictData *)restrict_vdata;

   int                     R_stored_as_transpose;
   hypre_ComputePkg       *compute_pkg;
   hypre_IndexRef          cindex;
   hypre_IndexRef          stride;

   hypre_StructGrid       *fgrid;
   int                    *fgrid_ids;
   hypre_StructGrid       *cgrid;
   hypre_BoxArray         *cgrid_boxes;
   int                    *cgrid_ids;

   hypre_CommHandle       *comm_handle;
                       
   hypre_BoxArrayArray    *compute_box_aa;
   hypre_BoxArray         *compute_box_a;
   hypre_Box              *compute_box;
                       
   hypre_Box              *R_dbox;
   hypre_Box              *r_dbox;
   hypre_Box              *rc_dbox;
                       
   int                     Ri;
   int                     ri;
   int                     rci;
                         
   double                 *Rp0, *Rp1;
   double                 *rp, *rp0, *rp1;
   double                 *rcp;
                       
   hypre_Index             loop_size;
   hypre_IndexRef          start;
   hypre_Index             startc;
   hypre_Index             stridec;
                       
   hypre_StructStencil    *stencil;
   hypre_Index            *stencil_shape;

   int                     compute_i, fi, ci, j;
   int                     loopi, loopj, loopk;

   /*-----------------------------------------------------------------------
    * Initialize some things.
    *-----------------------------------------------------------------------*/

   hypre_BeginTiming(restrict_data -> time_index);

   R_stored_as_transpose = (restrict_data -> R_stored_as_transpose);
   compute_pkg   = (restrict_data -> compute_pkg);
   cindex        = (restrict_data -> cindex);
   stride        = (restrict_data -> stride);

   stencil       = hypre_StructMatrixStencil(R);
   stencil_shape = hypre_StructStencilShape(stencil);

   hypre_SetIndex(stridec, 1, 1, 1);

   /*--------------------------------------------------------------------
    * Restrict the residual.
    *--------------------------------------------------------------------*/

   fgrid = hypre_StructVectorGrid(r);
   fgrid_ids = hypre_StructGridIDs(fgrid);
   cgrid = hypre_StructVectorGrid(rc);
   cgrid_boxes = hypre_StructGridBoxes(cgrid);
   cgrid_ids = hypre_StructGridIDs(cgrid);

   for (compute_i = 0; compute_i < 2; compute_i++)
   {
      switch(compute_i)
      {
         case 0:
         {
            rp = hypre_StructVectorData(r);
            hypre_InitializeIndtComputations(compute_pkg, rp, &comm_handle);
            compute_box_aa = hypre_ComputePkgIndtBoxes(compute_pkg);
         }
         break;

         case 1:
         {
            hypre_FinalizeIndtComputations(comm_handle);
            compute_box_aa = hypre_ComputePkgDeptBoxes(compute_pkg);
         }
         break;
      }

      fi = 0;
      hypre_ForBoxArrayI(ci, cgrid_boxes)
         {
            while (fgrid_ids[fi] != cgrid_ids[ci])
            {
               fi++;
            }

            compute_box_a = hypre_BoxArrayArrayBoxArray(compute_box_aa, fi);

            R_dbox  = hypre_BoxArrayBox(hypre_StructMatrixDataSpace(R),  fi);
            r_dbox  = hypre_BoxArrayBox(hypre_StructVectorDataSpace(r),  fi);
            rc_dbox = hypre_BoxArrayBox(hypre_StructVectorDataSpace(rc), ci);

            if (R_stored_as_transpose)
            {
               Rp0 = hypre_StructMatrixBoxData(R, fi, 1) -
                  hypre_BoxOffsetDistance(R_dbox, stencil_shape[1]);
               Rp1 = hypre_StructMatrixBoxData(R, fi, 0);
            }
            else
            {
               Rp0 = hypre_StructMatrixBoxData(R, fi, 0);
               Rp1 = hypre_StructMatrixBoxData(R, fi, 1);
            }
            rp  = hypre_StructVectorBoxData(r, fi);
            rp0 = rp + hypre_BoxOffsetDistance(r_dbox, stencil_shape[0]);
            rp1 = rp + hypre_BoxOffsetDistance(r_dbox, stencil_shape[1]);
            rcp = hypre_StructVectorBoxData(rc, ci);

            hypre_ForBoxI(j, compute_box_a)
               {
                  compute_box = hypre_BoxArrayBox(compute_box_a, j);

                  start  = hypre_BoxIMin(compute_box);
                  hypre_StructMapFineToCoarse(start, cindex, stride, startc);

                  hypre_BoxGetStrideSize(compute_box, stride, loop_size);
                  hypre_BoxLoop3Begin(loop_size,
                                      R_dbox,  startc, stridec, Ri,
                                      r_dbox,  start,  stride,  ri,
                                      rc_dbox, startc, stridec, rci);
#define HYPRE_BOX_SMP_PRIVATE loopk,loopi,loopj,Ri,ri,rci
#include "hypre_box_smp_forloop.h"
                  hypre_BoxLoop3For(loopi, loopj, loopk, Ri, ri, rci)
                     {
                        rcp[rci] = rp[ri] + (Rp0[Ri] * rp0[ri] +
                                             Rp1[Ri] * rp1[ri]);
                     }
                  hypre_BoxLoop3End(Ri, ri, rci);
               }
         }
Exemplo n.º 8
0
HYPRE_Int
hypre_SysPFMGSolve( void                 *sys_pfmg_vdata,
                    hypre_SStructMatrix  *A_in,
                    hypre_SStructVector  *b_in,
                    hypre_SStructVector  *x_in         )
{
   hypre_SysPFMGData       *sys_pfmg_data = sys_pfmg_vdata;

   hypre_SStructPMatrix *A;
   hypre_SStructPVector *b;
   hypre_SStructPVector *x;

   double                tol             = (sys_pfmg_data -> tol);
   HYPRE_Int             max_iter        = (sys_pfmg_data -> max_iter);
   HYPRE_Int             rel_change      = (sys_pfmg_data -> rel_change);
   HYPRE_Int             zero_guess      = (sys_pfmg_data -> zero_guess);
   HYPRE_Int             num_pre_relax   = (sys_pfmg_data -> num_pre_relax);
   HYPRE_Int             num_post_relax  = (sys_pfmg_data -> num_post_relax);
   HYPRE_Int             num_levels      = (sys_pfmg_data -> num_levels);
   hypre_SStructPMatrix  **A_l           = (sys_pfmg_data -> A_l);
   hypre_SStructPMatrix  **P_l           = (sys_pfmg_data -> P_l);
   hypre_SStructPMatrix  **RT_l          = (sys_pfmg_data -> RT_l);
   hypre_SStructPVector  **b_l           = (sys_pfmg_data -> b_l);
   hypre_SStructPVector  **x_l           = (sys_pfmg_data -> x_l);
   hypre_SStructPVector  **r_l           = (sys_pfmg_data -> r_l);
   hypre_SStructPVector  **e_l           = (sys_pfmg_data -> e_l);
   void                **relax_data_l    = (sys_pfmg_data -> relax_data_l);
   void                **matvec_data_l   = (sys_pfmg_data -> matvec_data_l);
   void                **restrict_data_l = (sys_pfmg_data -> restrict_data_l);
   void                **interp_data_l   = (sys_pfmg_data -> interp_data_l);
   HYPRE_Int             logging         = (sys_pfmg_data -> logging);
   double               *norms           = (sys_pfmg_data -> norms);
   double               *rel_norms       = (sys_pfmg_data -> rel_norms);
   HYPRE_Int            *active_l        = (sys_pfmg_data -> active_l);

   double                b_dot_b, r_dot_r, eps;
   double                e_dot_e, x_dot_x;
                    
   HYPRE_Int             i, l;
                    
   HYPRE_Int             ierr = 0;
#if DEBUG
   char                  filename[255];
#endif

   /*-----------------------------------------------------
    * Initialize some things and deal with special cases
    *-----------------------------------------------------*/

   hypre_BeginTiming(sys_pfmg_data -> time_index);

   /*-----------------------------------------------------
    * Refs to A,x,b (the PMatrix & PVectors within
    * the input SStructMatrix & SStructVectors)
    *-----------------------------------------------------*/
   hypre_SStructPMatrixRef(hypre_SStructMatrixPMatrix(A_in, 0), &A);
   hypre_SStructPVectorRef(hypre_SStructVectorPVector(b_in, 0), &b);
   hypre_SStructPVectorRef(hypre_SStructVectorPVector(x_in, 0), &x);


   hypre_SStructPMatrixDestroy(A_l[0]);
   hypre_SStructPVectorDestroy(b_l[0]);
   hypre_SStructPVectorDestroy(x_l[0]);
   hypre_SStructPMatrixRef(A, &A_l[0]);
   hypre_SStructPVectorRef(b, &b_l[0]);
   hypre_SStructPVectorRef(x, &x_l[0]);


   (sys_pfmg_data -> num_iterations) = 0;

   /* if max_iter is zero, return */
   if (max_iter == 0)
   {
      /* if using a zero initial guess, return zero */
      if (zero_guess)
      {
         hypre_SStructPVectorSetConstantValues(x, 0.0);
      }

      hypre_EndTiming(sys_pfmg_data -> time_index);
      return ierr;
   }

   /* part of convergence check */
   if (tol > 0.0)
   {
      /* eps = (tol^2) */
      hypre_SStructPInnerProd(b_l[0], b_l[0], &b_dot_b);
      eps = tol*tol;

      /* if rhs is zero, return a zero solution */
      if (b_dot_b == 0.0)
      {
         hypre_SStructPVectorSetConstantValues(x, 0.0);
         if (logging > 0)
         {
            norms[0]     = 0.0;
            rel_norms[0] = 0.0;
         }

         hypre_EndTiming(sys_pfmg_data -> time_index);
         return ierr;
      }
   }

   /*-----------------------------------------------------
    * Do V-cycles:
    *   For each index l, "fine" = l, "coarse" = (l+1)
    *-----------------------------------------------------*/

   for (i = 0; i < max_iter; i++)
   {
      /*--------------------------------------------------
       * Down cycle
       *--------------------------------------------------*/

      /* fine grid pre-relaxation */
      hypre_SysPFMGRelaxSetPreRelax(relax_data_l[0]);
      hypre_SysPFMGRelaxSetMaxIter(relax_data_l[0], num_pre_relax);
      hypre_SysPFMGRelaxSetZeroGuess(relax_data_l[0], zero_guess);
      hypre_SysPFMGRelax(relax_data_l[0], A_l[0], b_l[0], x_l[0]);
      zero_guess = 0;

      /* compute fine grid residual (b - Ax) */
      hypre_SStructPCopy(b_l[0], r_l[0]);
      hypre_SStructPMatvecCompute(matvec_data_l[0],
                                 -1.0, A_l[0], x_l[0], 1.0, r_l[0]);

      /* convergence check */
      if (tol > 0.0)
      {
         hypre_SStructPInnerProd(r_l[0], r_l[0], &r_dot_r);

         if (logging > 0)
         {
            norms[i] = sqrt(r_dot_r);
            if (b_dot_b > 0)
               rel_norms[i] = sqrt(r_dot_r/b_dot_b);
            else
               rel_norms[i] = 0.0;
         }

         /* always do at least 1 V-cycle */
         if ((r_dot_r/b_dot_b < eps) && (i > 0))
         {
            if (rel_change)
            {
               if ((e_dot_e/x_dot_x) < eps)
                  break;
            }
            else
            {
               break;
            }
         }
      }

      if (num_levels > 1)
      {
         /* restrict fine grid residual */
         hypre_SysSemiRestrict(restrict_data_l[0], RT_l[0], r_l[0], b_l[1]);
#if DEBUG
         hypre_sprintf(filename, "zout_xdown.%02d", 0);
         hypre_SStructPVectorPrint(filename, x_l[0], 0);
         hypre_sprintf(filename, "zout_rdown.%02d", 0);
         hypre_SStructPVectorPrint(filename, r_l[0], 0);
         hypre_sprintf(filename, "zout_b.%02d", 1);
         hypre_SStructPVectorPrint(filename, b_l[1], 0);
#endif
         for (l = 1; l <= (num_levels - 2); l++)
         {
            if (active_l[l])
            {
               /* pre-relaxation */
               hypre_SysPFMGRelaxSetPreRelax(relax_data_l[l]);
               hypre_SysPFMGRelaxSetMaxIter(relax_data_l[l], num_pre_relax);
               hypre_SysPFMGRelaxSetZeroGuess(relax_data_l[l], 1);
               hypre_SysPFMGRelax(relax_data_l[l], A_l[l], b_l[l], x_l[l]);

               /* compute residual (b - Ax) */
               hypre_SStructPCopy(b_l[l], r_l[l]);
               hypre_SStructPMatvecCompute(matvec_data_l[l],
                                          -1.0, A_l[l], x_l[l], 1.0, r_l[l]);
            }
            else
            {
               /* inactive level, set x=0, so r=(b-Ax)=b */
               hypre_SStructPVectorSetConstantValues(x_l[l], 0.0);
               hypre_SStructPCopy(b_l[l], r_l[l]);
            }

            /* restrict residual */
            hypre_SysSemiRestrict(restrict_data_l[l],
                                   RT_l[l], r_l[l], b_l[l+1]);
#if DEBUG
            hypre_sprintf(filename, "zout_xdown.%02d", l);
            hypre_SStructPVectorPrint(filename, x_l[l], 0);
            hypre_sprintf(filename, "zout_rdown.%02d", l);
            hypre_SStructPVectorPrint(filename, r_l[l], 0);
            hypre_sprintf(filename, "zout_b.%02d", l+1);
            hypre_SStructPVectorPrint(filename, b_l[l+1], 0);
#endif
         }

         /*--------------------------------------------------
          * Bottom
          *--------------------------------------------------*/

         hypre_SysPFMGRelaxSetZeroGuess(relax_data_l[l], 1);
         hypre_SysPFMGRelax(relax_data_l[l], A_l[l], b_l[l], x_l[l]);
#if DEBUG
         hypre_sprintf(filename, "zout_xbottom.%02d", l);
         hypre_SStructPVectorPrint(filename, x_l[l], 0);
#endif

         /*--------------------------------------------------
          * Up cycle
          *--------------------------------------------------*/

         for (l = (num_levels - 2); l >= 1; l--)
         {
            /* interpolate error and correct (x = x + Pe_c) */
            hypre_SysSemiInterp(interp_data_l[l], P_l[l], x_l[l+1], e_l[l]);
            hypre_SStructPAxpy(1.0, e_l[l], x_l[l]);
#if DEBUG
            hypre_sprintf(filename, "zout_eup.%02d", l);
            hypre_SStructPVectorPrint(filename, e_l[l], 0);
            hypre_sprintf(filename, "zout_xup.%02d", l);
            hypre_SStructPVectorPrint(filename, x_l[l], 0);
#endif
            if (active_l[l])
            {
               /* post-relaxation */
               hypre_SysPFMGRelaxSetPostRelax(relax_data_l[l]);
               hypre_SysPFMGRelaxSetMaxIter(relax_data_l[l], num_post_relax);
               hypre_SysPFMGRelaxSetZeroGuess(relax_data_l[l], 0);
               hypre_SysPFMGRelax(relax_data_l[l], A_l[l], b_l[l], x_l[l]);
            }
         }

         /* interpolate error and correct on fine grid (x = x + Pe_c) */
         hypre_SysSemiInterp(interp_data_l[0], P_l[0], x_l[1], e_l[0]);
         hypre_SStructPAxpy(1.0, e_l[0], x_l[0]);
#if DEBUG
         hypre_sprintf(filename, "zout_eup.%02d", 0);
         hypre_SStructPVectorPrint(filename, e_l[0], 0);
         hypre_sprintf(filename, "zout_xup.%02d", 0);
         hypre_SStructPVectorPrint(filename, x_l[0], 0);
#endif
      }

      /* part of convergence check */
      if ((tol > 0.0) && (rel_change))
      {
         if (num_levels > 1)
         {
            hypre_SStructPInnerProd(e_l[0], e_l[0], &e_dot_e);
            hypre_SStructPInnerProd(x_l[0], x_l[0], &x_dot_x);
         }
         else
         {
            e_dot_e = 0.0;
            x_dot_x = 1.0;
         }
      }

      /* fine grid post-relaxation */
      hypre_SysPFMGRelaxSetPostRelax(relax_data_l[0]);
      hypre_SysPFMGRelaxSetMaxIter(relax_data_l[0], num_post_relax);
      hypre_SysPFMGRelaxSetZeroGuess(relax_data_l[0], 0);
      hypre_SysPFMGRelax(relax_data_l[0], A_l[0], b_l[0], x_l[0]);

      (sys_pfmg_data -> num_iterations) = (i + 1);
   }

   /*-----------------------------------------------------
    * Destroy Refs to A,x,b (the PMatrix & PVectors within
    * the input SStructMatrix & SStructVectors).
    *-----------------------------------------------------*/
   hypre_SStructPMatrixDestroy(A);
   hypre_SStructPVectorDestroy(x);
   hypre_SStructPVectorDestroy(b);

   hypre_EndTiming(sys_pfmg_data -> time_index);

   return ierr;
}
Exemplo n.º 9
0
HYPRE_Int
hypre_SMGResidual( void               *residual_vdata,
                   hypre_StructMatrix *A,
                   hypre_StructVector *x,
                   hypre_StructVector *b,
                   hypre_StructVector *r              )
{
   HYPRE_Int ierr;

   hypre_SMGResidualData  *residual_data = residual_vdata;

   hypre_IndexRef          base_stride = (residual_data -> base_stride);
   hypre_BoxArray         *base_points = (residual_data -> base_points);
   hypre_ComputePkg       *compute_pkg = (residual_data -> compute_pkg);

   hypre_CommHandle       *comm_handle;
                       
   hypre_BoxArrayArray    *compute_box_aa;
   hypre_BoxArray         *compute_box_a;
   hypre_Box              *compute_box;
                       
   hypre_Box              *A_data_box;
   hypre_Box              *x_data_box;
   hypre_Box              *b_data_box;
   hypre_Box              *r_data_box;
                       
   HYPRE_Int               Ai;
   HYPRE_Int               xi;
   HYPRE_Int               bi;
   HYPRE_Int               ri;
                         
   double                 *Ap0;
   double                 *xp0;
   double                 *bp;
   double                 *rp;
                       
   hypre_Index             loop_size;
   hypre_IndexRef          start;
                       
   hypre_StructStencil    *stencil;
   hypre_Index            *stencil_shape;
   HYPRE_Int               stencil_size;

   HYPRE_Int               compute_i, i, j, si;
   HYPRE_Int               loopi, loopj, loopk;

   double            *Ap1, *Ap2;
   double            *Ap3, *Ap4;
   double            *Ap5, *Ap6;
   double            *Ap7, *Ap8, *Ap9;
   double            *Ap10, *Ap11, *Ap12, *Ap13, *Ap14;
   double            *Ap15, *Ap16, *Ap17, *Ap18;
   double            *Ap19, *Ap20, *Ap21, *Ap22, *Ap23, *Ap24, *Ap25, *Ap26;
   double            *xp1, *xp2;
   double            *xp3, *xp4;
   double            *xp5, *xp6;
   double            *xp7, *xp8, *xp9;
   double            *xp10, *xp11, *xp12, *xp13, *xp14;
   double            *xp15, *xp16, *xp17, *xp18;
   double            *xp19, *xp20, *xp21, *xp22, *xp23, *xp24, *xp25, *xp26;

   hypre_BeginTiming(residual_data -> time_index);

   /*-----------------------------------------------------------------------
    * Compute residual r = b - Ax
    *-----------------------------------------------------------------------*/

   stencil       = hypre_StructMatrixStencil(A);
   stencil_shape = hypre_StructStencilShape(stencil);
   stencil_size  = hypre_StructStencilSize(stencil);

   for (compute_i = 0; compute_i < 2; compute_i++)
   {
      switch(compute_i)
      {
         case 0:
         {
            xp0 = hypre_StructVectorData(x);
            hypre_InitializeIndtComputations(compute_pkg, xp0, &comm_handle);
            compute_box_aa = hypre_ComputePkgIndtBoxes(compute_pkg);

            /*----------------------------------------
             * Copy b into r
             *----------------------------------------*/

            compute_box_a = base_points;
            hypre_ForBoxI(i, compute_box_a)
               {
                  compute_box = hypre_BoxArrayBox(compute_box_a, i);
                  start = hypre_BoxIMin(compute_box);

                  b_data_box =
                     hypre_BoxArrayBox(hypre_StructVectorDataSpace(b), i);
                  r_data_box =
                     hypre_BoxArrayBox(hypre_StructVectorDataSpace(r), i);

                  bp = hypre_StructVectorBoxData(b, i);
                  rp = hypre_StructVectorBoxData(r, i);

                  hypre_BoxGetStrideSize(compute_box, base_stride, loop_size);
                  hypre_BoxLoop2Begin(loop_size,
                                      b_data_box, start, base_stride, bi,
                                      r_data_box, start, base_stride, ri);
#define HYPRE_BOX_SMP_PRIVATE loopk,loopi,loopj,bi,ri
#include "hypre_box_smp_forloop.h"
                  hypre_BoxLoop2For(loopi, loopj, loopk, bi, ri)
                     {
                        rp[ri] = bp[bi];
                     }
                  hypre_BoxLoop2End(bi, ri);
               }
         }
         break;

         case 1:
         {
            hypre_FinalizeIndtComputations(comm_handle);
            compute_box_aa = hypre_ComputePkgDeptBoxes(compute_pkg);
         }
         break;
      }

      /*--------------------------------------------------------------------
       * Compute r -= A*x
       *--------------------------------------------------------------------*/

      hypre_ForBoxArrayI(i, compute_box_aa)
         {
            compute_box_a = hypre_BoxArrayArrayBoxArray(compute_box_aa, i);

            A_data_box = hypre_BoxArrayBox(hypre_StructMatrixDataSpace(A), i);
            x_data_box = hypre_BoxArrayBox(hypre_StructVectorDataSpace(x), i);
            r_data_box = hypre_BoxArrayBox(hypre_StructVectorDataSpace(r), i);

            rp = hypre_StructVectorBoxData(r, i);

            /*--------------------------------------------------------------
             * Switch statement to direct control (based on stencil size) to
             * code to get pointers and offsets fo A and x.
             *--------------------------------------------------------------*/

            switch (stencil_size)
            {
               case 1:

               Ap0 = hypre_StructMatrixBoxData(A, i, 0);
               xp0 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[0]);

               break;

               case 3:

               Ap0 = hypre_StructMatrixBoxData(A, i, 0);
               Ap1 = hypre_StructMatrixBoxData(A, i, 1);
               Ap2 = hypre_StructMatrixBoxData(A, i, 2);

               xp0 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[0]);
               xp1 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[1]);
               xp2 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[2]);

               break;

               case 5:

               Ap0 = hypre_StructMatrixBoxData(A, i, 0);
               Ap1 = hypre_StructMatrixBoxData(A, i, 1);
               Ap2 = hypre_StructMatrixBoxData(A, i, 2);
               Ap3 = hypre_StructMatrixBoxData(A, i, 3);
               Ap4 = hypre_StructMatrixBoxData(A, i, 4);

               xp0 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[0]);
               xp1 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[1]);
               xp2 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[2]);
               xp3 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[3]);
               xp4 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[4]);

               break;

               case 7:

               Ap0 = hypre_StructMatrixBoxData(A, i, 0);
               Ap1 = hypre_StructMatrixBoxData(A, i, 1);
               Ap2 = hypre_StructMatrixBoxData(A, i, 2);
               Ap3 = hypre_StructMatrixBoxData(A, i, 3);
               Ap4 = hypre_StructMatrixBoxData(A, i, 4);
               Ap5 = hypre_StructMatrixBoxData(A, i, 5);
               Ap6 = hypre_StructMatrixBoxData(A, i, 6);

               xp0 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[0]);
               xp1 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[1]);
               xp2 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[2]);
               xp3 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[3]);
               xp4 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[4]);
               xp5 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[5]);
               xp6 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[6]);

               break;

               case 9:

               Ap0 = hypre_StructMatrixBoxData(A, i, 0);
               Ap1 = hypre_StructMatrixBoxData(A, i, 1);
               Ap2 = hypre_StructMatrixBoxData(A, i, 2);
               Ap3 = hypre_StructMatrixBoxData(A, i, 3);
               Ap4 = hypre_StructMatrixBoxData(A, i, 4);
               Ap5 = hypre_StructMatrixBoxData(A, i, 5);
               Ap6 = hypre_StructMatrixBoxData(A, i, 6);
               Ap7 = hypre_StructMatrixBoxData(A, i, 7);
               Ap8 = hypre_StructMatrixBoxData(A, i, 8);

               xp0 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[0]);
               xp1 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[1]);
               xp2 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[2]);
               xp3 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[3]);
               xp4 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[4]);
               xp5 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[5]);
               xp6 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[6]);
               xp7 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[7]);
               xp8 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[8]);

               break;

               case 15:

               Ap0 = hypre_StructMatrixBoxData(A, i, 0);
               Ap1 = hypre_StructMatrixBoxData(A, i, 1);
               Ap2 = hypre_StructMatrixBoxData(A, i, 2);
               Ap3 = hypre_StructMatrixBoxData(A, i, 3);
               Ap4 = hypre_StructMatrixBoxData(A, i, 4);
               Ap5 = hypre_StructMatrixBoxData(A, i, 5);
               Ap6 = hypre_StructMatrixBoxData(A, i, 6);
               Ap7 = hypre_StructMatrixBoxData(A, i, 7);
               Ap8 = hypre_StructMatrixBoxData(A, i, 8);
               Ap9 = hypre_StructMatrixBoxData(A, i, 9);
               Ap10 = hypre_StructMatrixBoxData(A, i, 10);
               Ap11 = hypre_StructMatrixBoxData(A, i, 11);
               Ap12 = hypre_StructMatrixBoxData(A, i, 12);
               Ap13 = hypre_StructMatrixBoxData(A, i, 13);
               Ap14 = hypre_StructMatrixBoxData(A, i, 14);

               xp0 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[0]);
               xp1 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[1]);
               xp2 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[2]);
               xp3 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[3]);
               xp4 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[4]);
               xp5 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[5]);
               xp6 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[6]);
               xp7 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[7]);
               xp8 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[8]);
               xp9 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[9]);
               xp10 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[10]);
               xp11 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[11]);
               xp12 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[12]);
               xp13 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[13]);
               xp14 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[14]);

               break;

               case 19:

               Ap0 = hypre_StructMatrixBoxData(A, i, 0);
               Ap1 = hypre_StructMatrixBoxData(A, i, 1);
               Ap2 = hypre_StructMatrixBoxData(A, i, 2);
               Ap3 = hypre_StructMatrixBoxData(A, i, 3);
               Ap4 = hypre_StructMatrixBoxData(A, i, 4);
               Ap5 = hypre_StructMatrixBoxData(A, i, 5);
               Ap6 = hypre_StructMatrixBoxData(A, i, 6);
               Ap7 = hypre_StructMatrixBoxData(A, i, 7);
               Ap8 = hypre_StructMatrixBoxData(A, i, 8);
               Ap9 = hypre_StructMatrixBoxData(A, i, 9);
               Ap10 = hypre_StructMatrixBoxData(A, i, 10);
               Ap11 = hypre_StructMatrixBoxData(A, i, 11);
               Ap12 = hypre_StructMatrixBoxData(A, i, 12);
               Ap13 = hypre_StructMatrixBoxData(A, i, 13);
               Ap14 = hypre_StructMatrixBoxData(A, i, 14);
               Ap15 = hypre_StructMatrixBoxData(A, i, 15);
               Ap16 = hypre_StructMatrixBoxData(A, i, 16);
               Ap17 = hypre_StructMatrixBoxData(A, i, 17);
               Ap18 = hypre_StructMatrixBoxData(A, i, 18);

               xp0 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[0]);
               xp1 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[1]);
               xp2 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[2]);
               xp3 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[3]);
               xp4 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[4]);
               xp5 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[5]);
               xp6 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[6]);
               xp7 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[7]);
               xp8 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[8]);
               xp9 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[9]);
               xp10 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[10]);
               xp11 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[11]);
               xp12 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[12]);
               xp13 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[13]);
               xp14 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[14]);
               xp15 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[15]);
               xp16 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[16]);
               xp17 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[17]);
               xp18 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[18]);

               break;

               case 27:

               Ap0 = hypre_StructMatrixBoxData(A, i, 0);
               Ap1 = hypre_StructMatrixBoxData(A, i, 1);
               Ap2 = hypre_StructMatrixBoxData(A, i, 2);
               Ap3 = hypre_StructMatrixBoxData(A, i, 3);
               Ap4 = hypre_StructMatrixBoxData(A, i, 4);
               Ap5 = hypre_StructMatrixBoxData(A, i, 5);
               Ap6 = hypre_StructMatrixBoxData(A, i, 6);
               Ap7 = hypre_StructMatrixBoxData(A, i, 7);
               Ap8 = hypre_StructMatrixBoxData(A, i, 8);
               Ap9 = hypre_StructMatrixBoxData(A, i, 9);
               Ap10 = hypre_StructMatrixBoxData(A, i, 10);
               Ap11 = hypre_StructMatrixBoxData(A, i, 11);
               Ap12 = hypre_StructMatrixBoxData(A, i, 12);
               Ap13 = hypre_StructMatrixBoxData(A, i, 13);
               Ap14 = hypre_StructMatrixBoxData(A, i, 14);
               Ap15 = hypre_StructMatrixBoxData(A, i, 15);
               Ap16 = hypre_StructMatrixBoxData(A, i, 16);
               Ap17 = hypre_StructMatrixBoxData(A, i, 17);
               Ap18 = hypre_StructMatrixBoxData(A, i, 18);
               Ap19 = hypre_StructMatrixBoxData(A, i, 19);
               Ap20 = hypre_StructMatrixBoxData(A, i, 20);
               Ap21 = hypre_StructMatrixBoxData(A, i, 21);
               Ap22 = hypre_StructMatrixBoxData(A, i, 22);
               Ap23 = hypre_StructMatrixBoxData(A, i, 23);
               Ap24 = hypre_StructMatrixBoxData(A, i, 24);
               Ap25 = hypre_StructMatrixBoxData(A, i, 25);
               Ap26 = hypre_StructMatrixBoxData(A, i, 26);

               xp0 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[0]);
               xp1 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[1]);
               xp2 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[2]);
               xp3 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[3]);
               xp4 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[4]);
               xp5 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[5]);
               xp6 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[6]);
               xp7 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[7]);
               xp8 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[8]);
               xp9 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[9]);
               xp10 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[10]);
               xp11 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[11]);
               xp12 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[12]);
               xp13 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[13]);
               xp14 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[14]);
               xp15 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[15]);
               xp16 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[16]);
               xp17 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[17]);
               xp18 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[18]);
               xp19 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[19]);
               xp20 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[20]);
               xp21 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[21]);
               xp22 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[22]);
               xp23 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[23]);
               xp24 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[24]);
               xp25 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[25]);
               xp26 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[26]);

               break;

               default:
               ;
            }

            hypre_ForBoxI(j, compute_box_a)
               {
                  compute_box = hypre_BoxArrayBox(compute_box_a, j);

                  start  = hypre_BoxIMin(compute_box);

                  /*------------------------------------------------------
                   * Switch statement to direct control to appropriate
                   * box loop depending on stencil size
                   *------------------------------------------------------*/

                  switch (stencil_size)
                  {

                     case 1:
   
                     hypre_BoxGetStrideSize(compute_box, base_stride, loop_size);
                     hypre_BoxLoop3Begin(loop_size,
                                         A_data_box, start, base_stride, Ai,
                                         x_data_box, start, base_stride, xi,
                                         r_data_box, start, base_stride, ri);
#define HYPRE_BOX_SMP_PRIVATE loopk,loopi,loopj,Ai,xi,ri
#include "hypre_box_smp_forloop.h"
                     hypre_BoxLoop3For(loopi, loopj, loopk, Ai, xi, ri)
                        {

                           rp[ri] = rp[ri]
                              - Ap0[Ai] * xp0[xi];

                        }
                     hypre_BoxLoop3End(Ai, xi, ri);

                     break;

                     case 3:

                     hypre_BoxGetStrideSize(compute_box, base_stride, loop_size);
                     hypre_BoxLoop3Begin(loop_size,
                                         A_data_box, start, base_stride, Ai,
                                         x_data_box, start, base_stride, xi,
                                         r_data_box, start, base_stride, ri);
#define HYPRE_BOX_SMP_PRIVATE loopk,loopi,loopj,Ai,xi,ri
#include "hypre_box_smp_forloop.h"
                     hypre_BoxLoop3For(loopi, loopj, loopk, Ai, xi, ri)
                        {
 
                           rp[ri] = rp[ri]
                              - Ap0[Ai] * xp0[xi]
                              - Ap1[Ai] * xp1[xi]
                              - Ap2[Ai] * xp2[xi];

                        }
Exemplo n.º 10
0
HYPRE_Int 
hypre_MaxwellSolve2( void                * maxwell_vdata,
                     hypre_SStructMatrix * A_in,
                     hypre_SStructVector * f,
                     hypre_SStructVector * u )
{
   hypre_MaxwellData     *maxwell_data = maxwell_vdata;

   hypre_ParVector       *f_edge;
   hypre_ParVector       *u_edge;

   HYPRE_Int              max_iter     = maxwell_data-> max_iter;
   double                 tol          = maxwell_data-> tol;
   HYPRE_Int              rel_change   = maxwell_data-> rel_change;
   HYPRE_Int              zero_guess   = maxwell_data-> zero_guess;
   HYPRE_Int              npre_relax   = maxwell_data-> num_pre_relax;
   HYPRE_Int              npost_relax  = maxwell_data-> num_post_relax;

   hypre_ParCSRMatrix   **Ann_l        = maxwell_data-> Ann_l;
   hypre_ParCSRMatrix   **Pn_l         = maxwell_data-> Pn_l;
   hypre_ParCSRMatrix   **RnT_l        = maxwell_data-> RnT_l;
   hypre_ParVector      **bn_l         = maxwell_data-> bn_l;
   hypre_ParVector      **xn_l         = maxwell_data-> xn_l;
   hypre_ParVector      **resn_l       = maxwell_data-> resn_l;
   hypre_ParVector      **en_l         = maxwell_data-> en_l;
   hypre_ParVector      **nVtemp2_l    = maxwell_data-> nVtemp2_l;
   HYPRE_Int            **nCF_marker_l = maxwell_data-> nCF_marker_l;
   double                *nrelax_weight= maxwell_data-> nrelax_weight;
   double                *nomega       = maxwell_data-> nomega;
   HYPRE_Int              nrelax_type  = maxwell_data-> nrelax_type;
   HYPRE_Int              node_numlevs = maxwell_data-> node_numlevels;

   hypre_ParCSRMatrix    *Tgrad        = maxwell_data-> Tgrad;
   hypre_ParCSRMatrix    *T_transpose  = maxwell_data-> T_transpose;

   hypre_ParCSRMatrix   **Aee_l        = maxwell_data-> Aee_l;
   hypre_IJMatrix       **Pe_l         = maxwell_data-> Pe_l;
   hypre_IJMatrix       **ReT_l        = maxwell_data-> ReT_l;
   hypre_ParVector      **be_l         = maxwell_data-> be_l;
   hypre_ParVector      **xe_l         = maxwell_data-> xe_l;
   hypre_ParVector      **rese_l       = maxwell_data-> rese_l;
   hypre_ParVector      **ee_l         = maxwell_data-> ee_l;
   hypre_ParVector      **eVtemp2_l    = maxwell_data-> eVtemp2_l;
   HYPRE_Int            **eCF_marker_l = maxwell_data-> eCF_marker_l;
   double                *erelax_weight= maxwell_data-> erelax_weight;
   double                *eomega       = maxwell_data-> eomega;
   HYPRE_Int              erelax_type  = maxwell_data-> erelax_type;
   HYPRE_Int              edge_numlevs = maxwell_data-> edge_numlevels;

   HYPRE_Int            **BdryRanks_l  = maxwell_data-> BdryRanks_l;
   HYPRE_Int             *BdryRanksCnts_l= maxwell_data-> BdryRanksCnts_l;

   HYPRE_Int              logging      = maxwell_data-> logging;
   double                *norms        = maxwell_data-> norms;
   double                *rel_norms    = maxwell_data-> rel_norms;

   HYPRE_Int              Solve_err_flag;
   HYPRE_Int              relax_local, cycle_param;
                                                                                                            
   double                 b_dot_b = 0, r_dot_r, eps = 0;
   double                 e_dot_e, x_dot_x;

   HYPRE_Int              i, j;
   HYPRE_Int              level;

   HYPRE_Int              ierr= 0;

   
   /* added for the relaxation routines */
   hypre_ParVector *ze = NULL;

   if (hypre_NumThreads() > 1)
   {
      /* Aee is always bigger than Ann */

      ze = hypre_ParVectorCreate(hypre_ParCSRMatrixComm(Aee_l[0]),
                                hypre_ParCSRMatrixGlobalNumRows(Aee_l[0]),
                                hypre_ParCSRMatrixRowStarts(Aee_l[0]));
      hypre_ParVectorInitialize(ze);
      hypre_ParVectorSetPartitioningOwner(ze,0);

   }

   hypre_BeginTiming(maxwell_data-> time_index);

   hypre_SStructVectorConvert(f, &f_edge);
   hypre_SStructVectorConvert(u, &u_edge);
   hypre_ParVectorZeroBCValues(f_edge, BdryRanks_l[0], BdryRanksCnts_l[0]);
   hypre_ParVectorZeroBCValues(u_edge, BdryRanks_l[0], BdryRanksCnts_l[0]);
   be_l[0]= f_edge;
   xe_l[0]= u_edge;

  /* the nodal fine vectors: xn= 0. bn= T'*(be- Aee*xe) is updated in the cycle. */
   hypre_ParVectorSetConstantValues(xn_l[0], 0.0);

   relax_local= 0;
   cycle_param= 0;

  (maxwell_data-> num_iterations) = 0;
  /* if max_iter is zero, return */
   if (max_iter == 0)
   {
      /* if using a zero initial guess, return zero */
      if (zero_guess)
      {
         hypre_ParVectorSetConstantValues(xe_l[0], 0.0);
      }
                                                                                                            
      hypre_EndTiming(maxwell_data -> time_index);
      return ierr;
   }
                                                                                                            
   /* part of convergence check */
   if (tol > 0.0)
   {
      /* eps = (tol^2) */
      b_dot_b= hypre_ParVectorInnerProd(be_l[0], be_l[0]);
      eps = tol*tol;
                                                                                                            
      /* if rhs is zero, return a zero solution */
      if (b_dot_b == 0.0)
      {
         hypre_ParVectorSetConstantValues(xe_l[0], 0.0);
         if (logging > 0)
         {
            norms[0]     = 0.0;
            rel_norms[0] = 0.0;
         }
                                                                                                            
         hypre_EndTiming(maxwell_data -> time_index);
         return ierr;
      }
   }

   /*-----------------------------------------------------
    * Do V-cycles:
    * For each index l, "fine" = l, "coarse" = (l-1)
    *   
    *   solution update:
    *      edge_sol= edge_sol + T*node_sol
    *-----------------------------------------------------*/
   for (i = 0; i < max_iter; i++)
   {
     /* compute fine grid residual & nodal rhs. */
      hypre_ParVectorCopy(be_l[0], rese_l[0]);
      hypre_ParCSRMatrixMatvec(-1.0, Aee_l[0], xe_l[0], 1.0, rese_l[0]);
      hypre_ParVectorZeroBCValues(rese_l[0], BdryRanks_l[0], BdryRanksCnts_l[0]);
      hypre_ParCSRMatrixMatvec(1.0, T_transpose, rese_l[0], 0.0, bn_l[0]);

      /* convergence check */
      if (tol > 0.0)
      {
         r_dot_r= hypre_ParVectorInnerProd(rese_l[0], rese_l[0]);

         if (logging > 0)
         {
            norms[i] = sqrt(r_dot_r);
            if (b_dot_b > 0)
               rel_norms[i] = sqrt(r_dot_r/b_dot_b);
            else
               rel_norms[i] = 0.0;
         }
                                                                                                            
         /* always do at least 1 V-cycle */
         if ((r_dot_r/b_dot_b < eps) && (i > 0))
         {
            if (rel_change)
            {
               if ((e_dot_e/x_dot_x) < eps)
                  break;
            }
            else
            {
               break;
            }
         }
      }

      hypre_ParVectorCopy(bn_l[0], resn_l[0]);
      hypre_ParCSRMatrixMatvec(-1.0, Ann_l[0], xn_l[0], 1.0, resn_l[0]);
      r_dot_r= hypre_ParVectorInnerProd(resn_l[0], resn_l[0]);

      for (level= 0; level<= node_numlevs-2; level++)
      {
         /*-----------------------------------------------
          * Down cycle
          *-----------------------------------------------*/
          for (j= 0; j< npre_relax; j++)
          {
             Solve_err_flag = hypre_BoomerAMGRelaxIF(Ann_l[level],
                                                     bn_l[level],
                                                     nCF_marker_l[level],
                                                     nrelax_type,
                                                     relax_local,
                                                     cycle_param,
                                                     nrelax_weight[level],
                                                     nomega[level],
                                                     NULL,
                                                     xn_l[level],
                                                     nVtemp2_l[level],
                                                     ze);
          }  /*for (j= 0; j< npre_relax; j++) */

         /* compute residuals */
          hypre_ParVectorCopy(bn_l[level], resn_l[level]);
          hypre_ParCSRMatrixMatvec(-1.0, Ann_l[level], xn_l[level], 
                                    1.0, resn_l[level]);

         /* restrict residuals */
          hypre_ParCSRMatrixMatvecT(1.0, RnT_l[level], resn_l[level],
                                    0.0, bn_l[level+1]);

         /* zero off initial guess for the next level */
          hypre_ParVectorSetConstantValues(xn_l[level+1], 0.0);

      }  /* for (level= 0; level<= node_numlevs-2; level++) */
 
      /* coarsest node solve */
      level= node_numlevs-1;
      Solve_err_flag = hypre_BoomerAMGRelaxIF(Ann_l[level],
                                              bn_l[level],
                                              nCF_marker_l[level],
                                              nrelax_type,
                                              relax_local,
                                              cycle_param,
                                              nrelax_weight[level],
                                              nomega[level],
                                              NULL,
                                              xn_l[level],
                                              nVtemp2_l[level],
                                              ze);

     /*---------------------------------------------------------------------
      *  Cycle up the levels.
      *---------------------------------------------------------------------*/
      for (level= (node_numlevs - 2); level>= 1; level--)
      {
          hypre_ParCSRMatrixMatvec(1.0, Pn_l[level], xn_l[level+1], 0.0,
                                   en_l[level]);
          hypre_ParVectorAxpy(1.0, en_l[level], xn_l[level]);

         /* post smooth */
          for (j= 0; j< npost_relax; j++)
          {
             Solve_err_flag = hypre_BoomerAMGRelaxIF(Ann_l[level],
                                                     bn_l[level],
                                                     nCF_marker_l[level],
                                                     nrelax_type,
                                                     relax_local,
                                                     cycle_param,
                                                     nrelax_weight[level],
                                                     nomega[level],
                                                     NULL,
                                                     xn_l[level],
                                                     nVtemp2_l[level],
                                                     ze);
          }
      }   /* for (level= (en_numlevs - 2); level>= 1; level--) */

      /* interpolate error and correct on finest grids */
      hypre_ParCSRMatrixMatvec(1.0, Pn_l[0], xn_l[1], 0.0, en_l[0]);
      hypre_ParVectorAxpy(1.0, en_l[0], xn_l[0]);
                                                                                                              
      for (j= 0; j< npost_relax; j++)
      {
         Solve_err_flag = hypre_BoomerAMGRelaxIF(Ann_l[0],
                                                 bn_l[0],
                                                 nCF_marker_l[0],
                                                 nrelax_type,
                                                 relax_local,
                                                 cycle_param,
                                                 nrelax_weight[0],
                                                 nomega[0],
                                                 NULL,
                                                 xn_l[0],
                                                 nVtemp2_l[0],
                                                 ze);
      }  /* for (j= 0; j< npost_relax; j++) */
      hypre_ParVectorCopy(bn_l[0], resn_l[0]);
      hypre_ParCSRMatrixMatvec(-1.0, Ann_l[0], xn_l[0], 1.0, resn_l[0]);

      /* add the gradient solution component to xe_l[0] */
      hypre_ParCSRMatrixMatvec(1.0, Tgrad, xn_l[0], 1.0, xe_l[0]);

      hypre_ParVectorCopy(be_l[0], rese_l[0]);
      hypre_ParCSRMatrixMatvec(-1.0, Aee_l[0], xe_l[0], 1.0, rese_l[0]);
      r_dot_r= hypre_ParVectorInnerProd(rese_l[0], rese_l[0]);

      for (level= 0; level<= edge_numlevs-2; level++)
      {
         /*-----------------------------------------------
          * Down cycle
          *-----------------------------------------------*/
          for (j= 0; j< npre_relax; j++)
          {
             Solve_err_flag = hypre_BoomerAMGRelaxIF(Aee_l[level],
                                                     be_l[level],
                                                     eCF_marker_l[level],
                                                     erelax_type,
                                                     relax_local,
                                                     cycle_param,
                                                     erelax_weight[level],
                                                     eomega[level],
                                                     NULL,
                                                     xe_l[level],
                                                     eVtemp2_l[level], 
                                                     ze);
          }  /*for (j= 0; j< npre_relax; j++) */
                                                                                                              
         /* compute residuals */
          hypre_ParVectorCopy(be_l[level], rese_l[level]);
          hypre_ParCSRMatrixMatvec(-1.0, Aee_l[level], xe_l[level],
                                    1.0, rese_l[level]);

         /* restrict residuals */
          hypre_ParCSRMatrixMatvecT(1.0,
             (hypre_ParCSRMatrix *) hypre_IJMatrixObject(ReT_l[level]),
                                    rese_l[level], 0.0, be_l[level+1]);
          hypre_ParVectorZeroBCValues(be_l[level+1], BdryRanks_l[level+1],
                                      BdryRanksCnts_l[level+1]);

         /* zero off initial guess for the next level */
          hypre_ParVectorSetConstantValues(xe_l[level+1], 0.0);
                                                                                                              
      }  /* for (level= 1; level<= edge_numlevels-2; level++) */
                                                                                                              
      /* coarsest edge solve */
      level= edge_numlevs-1;
      for (j= 0; j< npre_relax; j++)
      {
         Solve_err_flag = hypre_BoomerAMGRelaxIF(Aee_l[level],
                                                 be_l[level],
                                                 eCF_marker_l[level],
                                                 erelax_type,
                                                 relax_local,
                                                 cycle_param,
                                                 erelax_weight[level],
                                                 eomega[level],
                                                 NULL,
                                                 xe_l[level],
                                                 eVtemp2_l[level], 
                                                 ze);
      }

     /*---------------------------------------------------------------------
      *  Up cycle. 
      *---------------------------------------------------------------------*/
      for (level= (edge_numlevs - 2); level>= 1; level--)
      {
         hypre_ParCSRMatrixMatvec(1.0, 
           (hypre_ParCSRMatrix *) hypre_IJMatrixObject(Pe_l[level]), 
                                  xe_l[level+1], 0.0, ee_l[level]);
         hypre_ParVectorZeroBCValues(ee_l[level], BdryRanks_l[level],
                                     BdryRanksCnts_l[level]);
         hypre_ParVectorAxpy(1.0, ee_l[level], xe_l[level]);

         /* post smooth */
         for (j= 0; j< npost_relax; j++)
         {
            Solve_err_flag = hypre_BoomerAMGRelaxIF(Aee_l[level],
                                                    be_l[level],
                                                    eCF_marker_l[level],
                                                    erelax_type,
                                                    relax_local,
                                                    cycle_param,
                                                    erelax_weight[level],
                                                    eomega[level],
                                                    NULL,
                                                    xe_l[level],
                                                    eVtemp2_l[level], 
                                                    ze);
         }

      }  /* for (level= (edge_numlevs - 2); level>= 1; level--) */

      /* interpolate error and correct on finest grids */
      hypre_ParCSRMatrixMatvec(1.0, 
        (hypre_ParCSRMatrix *) hypre_IJMatrixObject(Pe_l[0]), 
                               xe_l[1], 0.0, ee_l[0]);
      hypre_ParVectorZeroBCValues(ee_l[0], BdryRanks_l[0],
                                  BdryRanksCnts_l[0]);
      hypre_ParVectorAxpy(1.0, ee_l[0], xe_l[0]);

      for (j= 0; j< npost_relax; j++)
      {
         Solve_err_flag = hypre_BoomerAMGRelaxIF(Aee_l[0],
                                                 be_l[0],
                                                 eCF_marker_l[0],
                                                 erelax_type,
                                                 relax_local,
                                                 cycle_param,
                                                 erelax_weight[0],
                                                 eomega[0],
                                                 NULL,
                                                 xe_l[0],
                                                 eVtemp2_l[0],
                                                 ze);
      }  /* for (j= 0; j< npost_relax; j++) */

      e_dot_e= hypre_ParVectorInnerProd(ee_l[0], ee_l[0]);
      x_dot_x= hypre_ParVectorInnerProd(xe_l[0], xe_l[0]);

      hypre_ParVectorCopy(be_l[0], rese_l[0]);
      hypre_ParCSRMatrixMatvec(-1.0, Aee_l[0], xe_l[0], 1.0, rese_l[0]);

      (maxwell_data -> num_iterations) = (i + 1);
   }

   hypre_EndTiming(maxwell_data -> time_index);


   if (ze)
      hypre_ParVectorDestroy(ze);

   return ierr;
}
Exemplo n.º 11
0
HYPRE_Int
hypre_SMGSolve( void               *smg_vdata,
                hypre_StructMatrix *A,
                hypre_StructVector *b,
                hypre_StructVector *x         )
{

   hypre_SMGData        *smg_data = smg_vdata;

   double                tol             = (smg_data -> tol);
   HYPRE_Int             max_iter        = (smg_data -> max_iter);
   HYPRE_Int             rel_change      = (smg_data -> rel_change);
   HYPRE_Int             zero_guess      = (smg_data -> zero_guess);
   HYPRE_Int             num_levels      = (smg_data -> num_levels);
   HYPRE_Int             num_pre_relax   = (smg_data -> num_pre_relax);
   HYPRE_Int             num_post_relax  = (smg_data -> num_post_relax);
   hypre_IndexRef        base_index      = (smg_data -> base_index);
   hypre_IndexRef        base_stride     = (smg_data -> base_stride);
   hypre_StructMatrix  **A_l             = (smg_data -> A_l);
   hypre_StructMatrix  **PT_l            = (smg_data -> PT_l);
   hypre_StructMatrix  **R_l             = (smg_data -> R_l);
   hypre_StructVector  **b_l             = (smg_data -> b_l);
   hypre_StructVector  **x_l             = (smg_data -> x_l);
   hypre_StructVector  **r_l             = (smg_data -> r_l);
   hypre_StructVector  **e_l             = (smg_data -> e_l);
   void                **relax_data_l    = (smg_data -> relax_data_l);
   void                **residual_data_l = (smg_data -> residual_data_l);
   void                **restrict_data_l = (smg_data -> restrict_data_l);
   void                **interp_data_l   = (smg_data -> interp_data_l);
   HYPRE_Int             logging         = (smg_data -> logging);
   double               *norms           = (smg_data -> norms);
   double               *rel_norms       = (smg_data -> rel_norms);

   double                b_dot_b = 0, r_dot_r, eps = 0;
   double                e_dot_e = 0, x_dot_x = 1;
                    
   HYPRE_Int             i, l;
                    
#if DEBUG
   char                  filename[255];
#endif

   /*-----------------------------------------------------
    * Initialize some things and deal with special cases
    *-----------------------------------------------------*/

   hypre_BeginTiming(smg_data -> time_index);

   hypre_StructMatrixDestroy(A_l[0]);
   hypre_StructVectorDestroy(b_l[0]);
   hypre_StructVectorDestroy(x_l[0]);
   A_l[0] = hypre_StructMatrixRef(A);
   b_l[0] = hypre_StructVectorRef(b);
   x_l[0] = hypre_StructVectorRef(x);

   (smg_data -> num_iterations) = 0;

   /* if max_iter is zero, return */
   if (max_iter == 0)
   {
      /* if using a zero initial guess, return zero */
      if (zero_guess)
      {
         hypre_StructVectorSetConstantValues(x, 0.0);
      }

      hypre_EndTiming(smg_data -> time_index);
      return hypre_error_flag;
   }

   /* part of convergence check */
   if (tol > 0.0)
   {
      /* eps = (tol^2) */
      b_dot_b = hypre_StructInnerProd(b_l[0], b_l[0]);
      eps = tol*tol;

      /* if rhs is zero, return a zero solution */
      if (b_dot_b == 0.0)
      {
         hypre_StructVectorSetConstantValues(x, 0.0);
         if (logging > 0)
         {
            norms[0]     = 0.0;
            rel_norms[0] = 0.0;
         }

         hypre_EndTiming(smg_data -> time_index);
         return hypre_error_flag;
      }
   }

   /*-----------------------------------------------------
    * Do V-cycles:
    *   For each index l, "fine" = l, "coarse" = (l+1)
    *-----------------------------------------------------*/

   for (i = 0; i < max_iter; i++)
   {
      /*--------------------------------------------------
       * Down cycle
       *--------------------------------------------------*/

      /* fine grid pre-relaxation */
      if (num_levels > 1)
      {
         hypre_SMGRelaxSetRegSpaceRank(relax_data_l[0], 0, 0);
         hypre_SMGRelaxSetRegSpaceRank(relax_data_l[0], 1, 1);
      }
      hypre_SMGRelaxSetMaxIter(relax_data_l[0], num_pre_relax);
      hypre_SMGRelaxSetZeroGuess(relax_data_l[0], zero_guess);
      hypre_SMGRelax(relax_data_l[0], A_l[0], b_l[0], x_l[0]);
      zero_guess = 0;

      /* compute fine grid residual (b - Ax) */
      hypre_SMGResidual(residual_data_l[0], A_l[0], x_l[0], b_l[0], r_l[0]);

      /* convergence check */
      if (tol > 0.0)
      {
         r_dot_r = hypre_StructInnerProd(r_l[0], r_l[0]);

         if (logging > 0)
         {
            norms[i] = sqrt(r_dot_r);
            if (b_dot_b > 0)
               rel_norms[i] = sqrt(r_dot_r/b_dot_b);
            else
               rel_norms[i] = 0.0;
         }

         /* always do at least 1 V-cycle */
         if ((r_dot_r/b_dot_b < eps) && (i > 0))
         {
            if (rel_change)
            {
               if ((e_dot_e/x_dot_x) < eps)
                  break;
            }
            else
            {
               break;
            }
         }
      }

      if (num_levels > 1)
      {
         /* restrict fine grid residual */
         hypre_SemiRestrict(restrict_data_l[0], R_l[0], r_l[0], b_l[1]);
#if DEBUG
         if(hypre_StructStencilDim(hypre_StructMatrixStencil(A)) == 3)
         {
            hypre_sprintf(filename, "zout_xdown.%02d", 0);
            hypre_StructVectorPrint(filename, x_l[0], 0);
            hypre_sprintf(filename, "zout_rdown.%02d", 0);
            hypre_StructVectorPrint(filename, r_l[0], 0);
            hypre_sprintf(filename, "zout_b.%02d", 1);
            hypre_StructVectorPrint(filename, b_l[1], 0);
         }
#endif
         for (l = 1; l <= (num_levels - 2); l++)
         {
            /* pre-relaxation */
            hypre_SMGRelaxSetRegSpaceRank(relax_data_l[l], 0, 0);
            hypre_SMGRelaxSetRegSpaceRank(relax_data_l[l], 1, 1);
            hypre_SMGRelaxSetMaxIter(relax_data_l[l], num_pre_relax);
            hypre_SMGRelaxSetZeroGuess(relax_data_l[l], 1);
            hypre_SMGRelax(relax_data_l[l], A_l[l], b_l[l], x_l[l]);

            /* compute residual (b - Ax) */
            hypre_SMGResidual(residual_data_l[l],
                              A_l[l], x_l[l], b_l[l], r_l[l]);

            /* restrict residual */
            hypre_SemiRestrict(restrict_data_l[l], R_l[l], r_l[l], b_l[l+1]);
#if DEBUG
            if(hypre_StructStencilDim(hypre_StructMatrixStencil(A)) == 3)
            {
               hypre_sprintf(filename, "zout_xdown.%02d", l);
               hypre_StructVectorPrint(filename, x_l[l], 0);
               hypre_sprintf(filename, "zout_rdown.%02d", l);
               hypre_StructVectorPrint(filename, r_l[l], 0);
               hypre_sprintf(filename, "zout_b.%02d", l+1);
               hypre_StructVectorPrint(filename, b_l[l+1], 0);
            }
#endif
         }

         /*--------------------------------------------------
          * Bottom
          *--------------------------------------------------*/

         hypre_SMGRelaxSetZeroGuess(relax_data_l[l], 1);
         hypre_SMGRelax(relax_data_l[l], A_l[l], b_l[l], x_l[l]);
#if DEBUG
         if(hypre_StructStencilDim(hypre_StructMatrixStencil(A)) == 3)
         {
            hypre_sprintf(filename, "zout_xbottom.%02d", l);
            hypre_StructVectorPrint(filename, x_l[l], 0);
         }
#endif

         /*--------------------------------------------------
          * Up cycle
          *--------------------------------------------------*/

         for (l = (num_levels - 2); l >= 1; l--)
         {
            /* interpolate error and correct (x = x + Pe_c) */
            hypre_SemiInterp(interp_data_l[l], PT_l[l], x_l[l+1], e_l[l]);
            hypre_StructAxpy(1.0, e_l[l], x_l[l]);
#if DEBUG
            if(hypre_StructStencilDim(hypre_StructMatrixStencil(A)) == 3)
            {
               hypre_sprintf(filename, "zout_eup.%02d", l);
               hypre_StructVectorPrint(filename, e_l[l], 0);
               hypre_sprintf(filename, "zout_xup.%02d", l);
               hypre_StructVectorPrint(filename, x_l[l], 0);
            }
#endif
            /* post-relaxation */
            hypre_SMGRelaxSetRegSpaceRank(relax_data_l[l], 0, 1);
            hypre_SMGRelaxSetRegSpaceRank(relax_data_l[l], 1, 0);
            hypre_SMGRelaxSetMaxIter(relax_data_l[l], num_post_relax);
            hypre_SMGRelaxSetZeroGuess(relax_data_l[l], 0);
            hypre_SMGRelax(relax_data_l[l], A_l[l], b_l[l], x_l[l]);
         }

         /* interpolate error and correct on fine grid (x = x + Pe_c) */
         hypre_SemiInterp(interp_data_l[0], PT_l[0], x_l[1], e_l[0]);
         hypre_SMGAxpy(1.0, e_l[0], x_l[0], base_index, base_stride);
#if DEBUG
         if(hypre_StructStencilDim(hypre_StructMatrixStencil(A)) == 3)
         {
            hypre_sprintf(filename, "zout_eup.%02d", 0);
            hypre_StructVectorPrint(filename, e_l[0], 0);
            hypre_sprintf(filename, "zout_xup.%02d", 0);
            hypre_StructVectorPrint(filename, x_l[0], 0);
         }
#endif
      }

      /* part of convergence check */
      if ((tol > 0.0) && (rel_change))
      {
         if (num_levels > 1)
         {
            e_dot_e = hypre_StructInnerProd(e_l[0], e_l[0]);
            x_dot_x = hypre_StructInnerProd(x_l[0], x_l[0]);
         }
         else
         {
            e_dot_e = 0.0;
            x_dot_x = 1.0;
         }
      }

      /* fine grid post-relaxation */
      if (num_levels > 1)
      {
         hypre_SMGRelaxSetRegSpaceRank(relax_data_l[0], 0, 1);
         hypre_SMGRelaxSetRegSpaceRank(relax_data_l[0], 1, 0);
      }
      hypre_SMGRelaxSetMaxIter(relax_data_l[0], num_post_relax);
      hypre_SMGRelaxSetZeroGuess(relax_data_l[0], 0);
      hypre_SMGRelax(relax_data_l[0], A_l[0], b_l[0], x_l[0]);

      (smg_data -> num_iterations) = (i + 1);
   }

   hypre_EndTiming(smg_data -> time_index);

   return hypre_error_flag;
}
int
hypre_SMGRelax( void               *relax_vdata,
                hypre_StructMatrix *A,
                hypre_StructVector *b,
                hypre_StructVector *x           )
{
   hypre_SMGRelaxData   *relax_data = relax_vdata;

   int                   zero_guess;
   int                   stencil_dim;
   hypre_StructVector   *temp_vec;
   hypre_StructMatrix   *A_sol;
   hypre_StructMatrix   *A_rem;
   void                **residual_data;
   void                **solve_data;

   hypre_IndexRef        base_stride;
   hypre_BoxArray       *base_box_a;
   double                zero = 0.0;

   int                   max_iter;
   int                   num_spaces;
   int                  *space_ranks;
                    
   int                   i, j, k, is;
                    
   int                   ierr = 0;

   /*----------------------------------------------------------
    * Note: The zero_guess stuff is not handled correctly
    * for general relaxation parameters.  It is correct when
    * the spaces are independent sets in the direction of
    * relaxation.
    *----------------------------------------------------------*/

   hypre_BeginTiming(relax_data -> time_index);

   /*----------------------------------------------------------
    * Set up the solver
    *----------------------------------------------------------*/

   /* insure that the solver memory gets fully set up */
   if ((relax_data -> setup_a_sol) > 0)
   {
      (relax_data -> setup_a_sol) = 2;
   }

   hypre_SMGRelaxSetup(relax_vdata, A, b, x);

   zero_guess      = (relax_data -> zero_guess);
   stencil_dim     = (relax_data -> stencil_dim);
   temp_vec        = (relax_data -> temp_vec);
   A_sol           = (relax_data -> A_sol);
   A_rem           = (relax_data -> A_rem);
   residual_data   = (relax_data -> residual_data);
   solve_data      = (relax_data -> solve_data);


   /*----------------------------------------------------------
    * Set zero values
    *----------------------------------------------------------*/

   if (zero_guess)
   {
      base_stride = (relax_data -> base_stride);
      base_box_a = (relax_data -> base_box_array);
      ierr = hypre_SMGSetStructVectorConstantValues(x, zero, base_box_a,
                                                    base_stride); 
   }

   /*----------------------------------------------------------
    * Iterate
    *----------------------------------------------------------*/

   for (k = 0; k < 2; k++)
   {
      switch(k)
      {
         /* Do pre-relaxation iterations */
         case 0:
         max_iter    = 1;
         num_spaces  = (relax_data -> num_pre_spaces);
         space_ranks = (relax_data -> pre_space_ranks);
         break;

         /* Do regular relaxation iterations */
         case 1:
         max_iter    = (relax_data -> max_iter);
         num_spaces  = (relax_data -> num_reg_spaces);
         space_ranks = (relax_data -> reg_space_ranks);
         break;
      }

      for (i = 0; i < max_iter; i++)
      {
         for (j = 0; j < num_spaces; j++)
         {
            is = space_ranks[j];

            hypre_SMGResidual(residual_data[is], A_rem, x, b, temp_vec);

            if (stencil_dim > 2)
               hypre_SMGSolve(solve_data[is], A_sol, temp_vec, x);
            else
               hypre_CyclicReduction(solve_data[is], A_sol, temp_vec, x);
         }

         (relax_data -> num_iterations) = (i + 1);
      }
   }

   /*----------------------------------------------------------
    * Free up memory according to memory_use parameter
    *----------------------------------------------------------*/

   if ((stencil_dim - 1) <= (relax_data -> memory_use))
   {
      hypre_SMGRelaxDestroyASol(relax_vdata);
   }

   hypre_EndTiming(relax_data -> time_index);

   return ierr;
}
Exemplo n.º 13
0
int main (int argc, char *argv[])
{
   int myid, num_procs;
   int n, N, pi, pj, pk;
   double h;

   double tol, theta;
   int maxit, cycle_type;
   int rlx_type, rlx_sweeps, rlx_weight, rlx_omega;
   int amg_coarsen_type, amg_agg_levels, amg_rlx_type;
   int amg_interp_type, amg_Pmax;
   int singular_problem ;

   HYPRE_Int time_index;

   HYPRE_SStructGrid     edge_grid;
   HYPRE_SStructGraph    A_graph;
   HYPRE_SStructMatrix   A;
   HYPRE_SStructVector   b;
   HYPRE_SStructVector   x;
   HYPRE_SStructGrid     node_grid;
   HYPRE_SStructGraph    G_graph;
   HYPRE_SStructStencil  G_stencil[3];
   HYPRE_SStructMatrix   G;
   HYPRE_SStructVector   xcoord, ycoord, zcoord;

   HYPRE_Solver          solver, precond;

   /* Initialize MPI */
   MPI_Init(&argc, &argv);
   MPI_Comm_rank(MPI_COMM_WORLD, &myid);
   MPI_Comm_size(MPI_COMM_WORLD, &num_procs);

   /* Set default parameters */
   n                = 10;
   optionAlpha      = 0;
   optionBeta       = 0;
   maxit            = 100;
   tol              = 1e-6;
   cycle_type       = 13;
   rlx_type         = 2;
   rlx_sweeps       = 1;
   rlx_weight       = 1.0;
   rlx_omega        = 1.0;
   amg_coarsen_type = 10;
   amg_agg_levels   = 1;
   amg_rlx_type     = 6;
   theta            = 0.25;
   amg_interp_type  = 6;
   amg_Pmax         = 4;
   singular_problem = 0;

   /* Parse command line */
   {
      int arg_index = 0;
      int print_usage = 0;

      while (arg_index < argc)
      {
         if ( strcmp(argv[arg_index], "-n") == 0 )
         {
            arg_index++;
            n = atoi(argv[arg_index++]);
         }
         else if ( strcmp(argv[arg_index], "-a") == 0 )
         {
            arg_index++;
            optionAlpha = atoi(argv[arg_index++]);
         }
         else if ( strcmp(argv[arg_index], "-b") == 0 )
         {
            arg_index++;
            optionBeta = atoi(argv[arg_index++]);
         }
         else if ( strcmp(argv[arg_index], "-maxit") == 0 )
         {
            arg_index++;
            maxit = atoi(argv[arg_index++]);
         }
         else if ( strcmp(argv[arg_index], "-tol") == 0 )
         {
            arg_index++;
            tol = atof(argv[arg_index++]);
         }
         else if ( strcmp(argv[arg_index], "-type") == 0 )
         {
            arg_index++;
            cycle_type = atoi(argv[arg_index++]);
         }
         else if ( strcmp(argv[arg_index], "-rlx") == 0 )
         {
            arg_index++;
            rlx_type = atoi(argv[arg_index++]);
         }
         else if ( strcmp(argv[arg_index], "-rlxn") == 0 )
         {
            arg_index++;
            rlx_sweeps = atoi(argv[arg_index++]);
         }
         else if ( strcmp(argv[arg_index], "-rlxw") == 0 )
         {
            arg_index++;
            rlx_weight = atof(argv[arg_index++]);
         }
         else if ( strcmp(argv[arg_index], "-rlxo") == 0 )
         {
            arg_index++;
            rlx_omega = atof(argv[arg_index++]);
         }
         else if ( strcmp(argv[arg_index], "-ctype") == 0 )
         {
            arg_index++;
            amg_coarsen_type = atoi(argv[arg_index++]);
         }
         else if ( strcmp(argv[arg_index], "-amgrlx") == 0 )
         {
            arg_index++;
            amg_rlx_type = atoi(argv[arg_index++]);
         }
         else if ( strcmp(argv[arg_index], "-agg") == 0 )
         {
            arg_index++;
            amg_agg_levels = atoi(argv[arg_index++]);
         }
         else if ( strcmp(argv[arg_index], "-itype") == 0 )
         {
            arg_index++;
            amg_interp_type = atoi(argv[arg_index++]);
         }
         else if ( strcmp(argv[arg_index], "-pmax") == 0 )
         {
            arg_index++;
            amg_Pmax = atoi(argv[arg_index++]);
         }
         else if ( strcmp(argv[arg_index], "-sing") == 0 )
         {
            arg_index++;
            singular_problem = 1;
         }
         else if ( strcmp(argv[arg_index], "-theta") == 0 )
         {
            arg_index++;
            theta = atof(argv[arg_index++]);
         }

         else if ( strcmp(argv[arg_index], "-help") == 0 )
         {
            print_usage = 1;
            break;
         }
         else
         {
            arg_index++;
         }
      }

      if ((print_usage) && (myid == 0))
      {
         printf("\n");
         printf("Usage: %s [<options>]\n", argv[0]);
         printf("\n");
         printf("  -n <n>              : problem size per processor (default: 10)\n");
         printf("  -a <alpha_opt>      : choice for the curl-curl coefficient (default: 1)\n");
         printf("  -b <beta_opt>       : choice for the mass coefficient (default: 1)\n");
         printf("\n");
         printf("PCG-AMS solver options:                                     \n");
         printf("  -maxit <num>        : maximum number of iterations (100)  \n");
         printf("  -tol <num>          : convergence tolerance (1e-6)        \n");
         printf("  -type <num>         : 3-level cycle type (0-8, 11-14)     \n");
         printf("  -theta <num>        : BoomerAMG threshold (0.25)          \n");
         printf("  -ctype <num>        : BoomerAMG coarsening type           \n");
         printf("  -agg <num>          : Levels of BoomerAMG agg. coarsening \n");
         printf("  -amgrlx <num>       : BoomerAMG relaxation type           \n");
         printf("  -itype <num>        : BoomerAMG interpolation type        \n");
         printf("  -pmax <num>         : BoomerAMG interpolation truncation  \n");
         printf("  -rlx <num>          : relaxation type                     \n");
         printf("  -rlxn <num>         : number of relaxation sweeps         \n");
         printf("  -rlxw <num>         : damping parameter (usually <=1)     \n");
         printf("  -rlxo <num>         : SOR parameter (usually in (0,2))    \n");
         printf("  -sing               : curl-curl only (singular) problem   \n");
         printf("\n");
         printf("\n");
      }

      if (print_usage)
      {
         MPI_Finalize();
         return (0);
      }
   }

   /* Figure out the processor grid (N x N x N).  The local problem size is n^3,
      while pi, pj and pk indicate the position in the processor grid. */
   N  = pow(num_procs,1.0/3.0) + 0.5;
   if (num_procs != N*N*N)
   {
      if (myid == 0) printf("Can't run on %d processors, try %d.\n",
                            num_procs, N*N*N);
      MPI_Finalize();
      exit(1);
   }
   h  = 1.0 / (N*n);
   pk = myid / (N*N);
   pj = myid/N - pk*N;
   pi = myid - pj*N - pk*N*N;

   /* Start timing */
   time_index = hypre_InitializeTiming("SStruct Setup");
   hypre_BeginTiming(time_index);

   /* 1. Set up the edge and nodal grids.  Note that we do this simultaneously
         to make sure that they have the same extents.  For simplicity we use
         only one part to represent the unit cube. */
   {
      HYPRE_Int ndim = 3;
      HYPRE_Int nparts = 1;

      /* Create empty 2D grid objects */
      HYPRE_SStructGridCreate(MPI_COMM_WORLD, ndim, nparts, &node_grid);
      HYPRE_SStructGridCreate(MPI_COMM_WORLD, ndim, nparts, &edge_grid);

      /* Set the extents of the grid - each processor sets its grid boxes. */
      {
         HYPRE_Int part = 0;
         HYPRE_Int ilower[3] = {1 + pi*n, 1 + pj*n, 1 + pk*n};
         HYPRE_Int iupper[3] = {n + pi*n, n + pj*n, n + pk*n};

         HYPRE_SStructGridSetExtents(node_grid, part, ilower, iupper);
         HYPRE_SStructGridSetExtents(edge_grid, part, ilower, iupper);
      }

      /* Set the variable type and number of variables on each grid. */
      {
         HYPRE_Int i;
         HYPRE_Int nnodevars = 1;
         HYPRE_Int nedgevars = 3;

         HYPRE_SStructVariable nodevars[1] = {HYPRE_SSTRUCT_VARIABLE_NODE};
         HYPRE_SStructVariable edgevars[3] = {HYPRE_SSTRUCT_VARIABLE_XEDGE,
                                              HYPRE_SSTRUCT_VARIABLE_YEDGE,
                                              HYPRE_SSTRUCT_VARIABLE_ZEDGE};
         for (i = 0; i < nparts; i++)
         {
            HYPRE_SStructGridSetVariables(node_grid, i, nnodevars, nodevars);
            HYPRE_SStructGridSetVariables(edge_grid, i, nedgevars, edgevars);
         }
      }

      /* Since there is only one part, there is no need to call the
         SetNeighborPart or SetSharedPart functions, which determine the spatial
         relation between the parts.  See Examples 12, 13 and 14 for
         illustrations of these calls. */

      /* Now the grids are ready to be used */
      HYPRE_SStructGridAssemble(node_grid);
      HYPRE_SStructGridAssemble(edge_grid);
   }

   /* 2. Create the finite element stiffness matrix A and load vector b. */
   {
      HYPRE_Int part = 0; /* this problem has only one part */

      /* Set the ordering of the variables in the finite element problem.  This
         is done by listing the variable offset directions relative to the
         element's center.  See the Reference Manual for more details. */
      {
         HYPRE_Int ordering[48] = { 0,  0, -1, -1,    /* x-edge [0]-[1] */
                                    1, +1,  0, -1,    /* y-edge [1]-[2] */
         /*     [7]------[6]  */    0,  0, +1, -1,    /* x-edge [3]-[2] */
         /*     /|       /|   */    1, -1,  0, -1,    /* y-edge [0]-[3] */
         /*    / |      / |   */    0,  0, -1, +1,    /* x-edge [4]-[5] */
         /*  [4]------[5] |   */    1, +1,  0, +1,    /* y-edge [5]-[6] */
         /*   | [3]----|-[2]  */    0,  0, +1, +1,    /* x-edge [7]-[6] */
         /*   | /      | /    */    1, -1,  0, +1,    /* y-edge [4]-[7] */
         /*   |/       |/     */    2, -1, -1,  0,    /* z-edge [0]-[4] */
         /*  [0]------[1]     */    2, +1, -1,  0,    /* z-edge [1]-[5] */
                                    2, +1, +1,  0,    /* z-edge [2]-[6] */
                                    2, -1, +1,  0 };  /* z-edge [3]-[7] */

         HYPRE_SStructGridSetFEMOrdering(edge_grid, part, ordering);
      }

      /* Set up the Graph - this determines the non-zero structure of the
         matrix. */
      {
         HYPRE_Int part = 0;

         /* Create the graph object */
         HYPRE_SStructGraphCreate(MPI_COMM_WORLD, edge_grid, &A_graph);

         /* See MatrixSetObjectType below */
         HYPRE_SStructGraphSetObjectType(A_graph, HYPRE_PARCSR);

         /* Indicate that this problem uses finite element stiffness matrices and
            load vectors, instead of stencils. */
         HYPRE_SStructGraphSetFEM(A_graph, part);

         /* The edge finite element matrix is full, so there is no need to call the
            HYPRE_SStructGraphSetFEMSparsity() function. */

         /* Assemble the graph */
         HYPRE_SStructGraphAssemble(A_graph);
      }

      /* Set up the SStruct Matrix and right-hand side vector */
      {
         /* Create the matrix object */
         HYPRE_SStructMatrixCreate(MPI_COMM_WORLD, A_graph, &A);
         /* Use a ParCSR storage */
         HYPRE_SStructMatrixSetObjectType(A, HYPRE_PARCSR);
         /* Indicate that the matrix coefficients are ready to be set */
         HYPRE_SStructMatrixInitialize(A);

         /* Create an empty vector object */
         HYPRE_SStructVectorCreate(MPI_COMM_WORLD, edge_grid, &b);
         /* Use a ParCSR storage */
         HYPRE_SStructVectorSetObjectType(b, HYPRE_PARCSR);
         /* Indicate that the vector coefficients are ready to be set */
         HYPRE_SStructVectorInitialize(b);
      }

      /* Set the matrix and vector entries by finite element assembly */
      {
         /* local stiffness matrix and load vector */
         double S[12][12], F[12];

         int i, j, k;
         HYPRE_Int index[3];

         for (i = 1; i <= n; i++)
            for (j = 1; j <= n; j++)
               for (k = 1; k <= n; k++)
               {
                  /* Compute the FEM matrix and r.h.s. for cell (i,j,k) with
                     coefficients evaluated at the cell center. */
                  index[0] = i + pi*n; index[1] = j + pj*n; index[2] = k + pk*n;
                  ComputeFEMND1(S,F,(pi*n+i)*h-h/2,(pj*n+j)*h-h/2,(pk*n+k)*h-h/2,h);

                  /* Eliminate boundary conditions on x = 0 */
                  if (index[0] == 1)
                  {
                     int ii, jj, bc_edges[4] = { 3, 11, 7, 8 };
                     for (ii = 0; ii < 4; ii++)
                     {
                        for (jj = 0; jj < 12; jj++)
                           S[bc_edges[ii]][jj] = S[jj][bc_edges[ii]] = 0.0;
                        S[bc_edges[ii]][bc_edges[ii]] = 1.0;
                        F[bc_edges[ii]] = 0.0;
                     }
                  }
                  /* Eliminate boundary conditions on y = 0 */
                  if (index[1] == 1)
                  {
                     int ii, jj, bc_edges[4] = { 0, 9, 4, 8 };
                     for (ii = 0; ii < 4; ii++)
                     {
                        for (jj = 0; jj < 12; jj++)
                           S[bc_edges[ii]][jj] = S[jj][bc_edges[ii]] = 0.0;
                        S[bc_edges[ii]][bc_edges[ii]] = 1.0;
                        F[bc_edges[ii]] = 0.0;
                     }
                  }
                  /* Eliminate boundary conditions on z = 0 */
                  if (index[2] == 1)
                  {
                     int ii, jj, bc_edges[4] = { 0, 1, 2, 3 };
                     for (ii = 0; ii < 4; ii++)
                     {
                        for (jj = 0; jj < 12; jj++)
                           S[bc_edges[ii]][jj] = S[jj][bc_edges[ii]] = 0.0;
                        S[bc_edges[ii]][bc_edges[ii]] = 1.0;
                        F[bc_edges[ii]] = 0.0;
                     }
                  }
                  /* Eliminate boundary conditions on x = 1 */
                  if (index[0] == N*n)
                  {
                     int ii, jj, bc_edges[4] = { 1, 10, 5, 9 };
                     for (ii = 0; ii < 4; ii++)
                     {
                        for (jj = 0; jj < 12; jj++)
                           S[bc_edges[ii]][jj] = S[jj][bc_edges[ii]] = 0.0;
                        S[bc_edges[ii]][bc_edges[ii]] = 1.0;
                        F[bc_edges[ii]] = 0.0;
                     }
                  }
                  /* Eliminate boundary conditions on y = 1 */
                  if (index[1] == N*n)
                  {
                     int ii, jj, bc_edges[4] = { 2, 10, 6, 11 };
                     for (ii = 0; ii < 4; ii++)
                     {
                        for (jj = 0; jj < 12; jj++)
                           S[bc_edges[ii]][jj] = S[jj][bc_edges[ii]] = 0.0;
                        S[bc_edges[ii]][bc_edges[ii]] = 1.0;
                        F[bc_edges[ii]] = 0.0;
                     }
                  }
                  /* Eliminate boundary conditions on z = 1 */
                  if (index[2] == N*n)
                  {
                     int ii, jj, bc_edges[4] = { 4, 5, 6, 7 };
                     for (ii = 0; ii < 4; ii++)
                     {
                        for (jj = 0; jj < 12; jj++)
                           S[bc_edges[ii]][jj] = S[jj][bc_edges[ii]] = 0.0;
                        S[bc_edges[ii]][bc_edges[ii]] = 1.0;
                        F[bc_edges[ii]] = 0.0;
                     }
                  }

                  /* Assemble the matrix */
                  HYPRE_SStructMatrixAddFEMValues(A, part, index, &S[0][0]);

                  /* Assemble the vector */
                  HYPRE_SStructVectorAddFEMValues(b, part, index, F);
               }
      }

      /* Collective calls finalizing the matrix and vector assembly */
      HYPRE_SStructMatrixAssemble(A);
      HYPRE_SStructVectorAssemble(b);
   }

   /* 3. Create the discrete gradient matrix G, which is needed in AMS. */
   {
      HYPRE_Int part = 0;
      HYPRE_Int stencil_size = 2;

      /* Define the discretization stencil relating the edges and nodes of the
         grid. */
      {
         HYPRE_Int ndim = 3;
         HYPRE_Int entry;
         HYPRE_Int var = 0; /* the node variable */

         /* The discrete gradient stencils connect edge to node variables. */
         HYPRE_Int Gx_offsets[2][3] = {{-1,0,0},{0,0,0}};  /* x-edge [7]-[6] */
         HYPRE_Int Gy_offsets[2][3] = {{0,-1,0},{0,0,0}};  /* y-edge [5]-[6] */
         HYPRE_Int Gz_offsets[2][3] = {{0,0,-1},{0,0,0}};  /* z-edge [2]-[6] */

         HYPRE_SStructStencilCreate(ndim, stencil_size, &G_stencil[0]);
         HYPRE_SStructStencilCreate(ndim, stencil_size, &G_stencil[1]);
         HYPRE_SStructStencilCreate(ndim, stencil_size, &G_stencil[2]);

         for (entry = 0; entry < stencil_size; entry++)
         {
            HYPRE_SStructStencilSetEntry(G_stencil[0], entry, Gx_offsets[entry], var);
            HYPRE_SStructStencilSetEntry(G_stencil[1], entry, Gy_offsets[entry], var);
            HYPRE_SStructStencilSetEntry(G_stencil[2], entry, Gz_offsets[entry], var);
         }
      }

      /* Set up the Graph - this determines the non-zero structure of the
         matrix. */
      {
         HYPRE_Int nvars = 3;
         HYPRE_Int var; /* the edge variables */

         /* Create the discrete gradient graph object */
         HYPRE_SStructGraphCreate(MPI_COMM_WORLD, edge_grid, &G_graph);

         /* See MatrixSetObjectType below */
         HYPRE_SStructGraphSetObjectType(G_graph, HYPRE_PARCSR);

         /* Since the discrete gradient relates edge and nodal variables (it is a
            rectangular matrix), we have to specify the domain (column) grid. */
         HYPRE_SStructGraphSetDomainGrid(G_graph, node_grid);

         /* Tell the graph which stencil to use for each edge variable on each
            part (we only have one part). */
         for (var = 0; var < nvars; var++)
            HYPRE_SStructGraphSetStencil(G_graph, part, var, G_stencil[var]);

         /* Assemble the graph */
         HYPRE_SStructGraphAssemble(G_graph);
      }

      /* Set up the SStruct Matrix */
      {
         /* Create the matrix object */
         HYPRE_SStructMatrixCreate(MPI_COMM_WORLD, G_graph, &G);
         /* Use a ParCSR storage */
         HYPRE_SStructMatrixSetObjectType(G, HYPRE_PARCSR);
         /* Indicate that the matrix coefficients are ready to be set */
         HYPRE_SStructMatrixInitialize(G);
      }

      /* Set the discrete gradient values, assuming a "natural" orientation of
         the edges (i.e. one in agreement with the coordinate directions). */
      {
         int i;
         int nedges = n*(n+1)*(n+1);
         double *values;
         HYPRE_Int stencil_indices[2] = {0,1}; /* the nodes of each edge */

         values = calloc(2*nedges, sizeof(double));

         /* The edge orientation is fixed: from first to second node */
         for (i = 0; i < nedges; i++)
         {
            values[2*i]   = -1.0;
            values[2*i+1] =  1.0;
         }

         /* Set the values in the discrete gradient x-edges */
         {
            HYPRE_Int var = 0;
            HYPRE_Int ilower[3] = {1 + pi*n, 0 + pj*n, 0 + pk*n};
            HYPRE_Int iupper[3] = {n + pi*n, n + pj*n, n + pk*n};
            HYPRE_SStructMatrixSetBoxValues(G, part, ilower, iupper, var,
                                            stencil_size, stencil_indices,
                                            values);
         }
         /* Set the values in the discrete gradient y-edges */
         {
            HYPRE_Int var = 1;
            HYPRE_Int ilower[3] = {0 + pi*n, 1 + pj*n, 0 + pk*n};
            HYPRE_Int iupper[3] = {n + pi*n, n + pj*n, n + pk*n};
            HYPRE_SStructMatrixSetBoxValues(G, part, ilower, iupper, var,
                                            stencil_size, stencil_indices,
                                            values);
         }
         /* Set the values in the discrete gradient z-edges */
         {
            HYPRE_Int var = 2;
            HYPRE_Int ilower[3] = {0 + pi*n, 0 + pj*n, 1 + pk*n};
            HYPRE_Int iupper[3] = {n + pi*n, n + pj*n, n + pk*n};
            HYPRE_SStructMatrixSetBoxValues(G, part, ilower, iupper, var,
                                            stencil_size, stencil_indices,
                                            values);
         }

         free(values);
      }

      /* Finalize the matrix assembly */
      HYPRE_SStructMatrixAssemble(G);
   }

   /* 4. Create the vectors of nodal coordinates xcoord, ycoord and zcoord,
         which are needed in AMS. */
   {
      int i, j, k;
      HYPRE_Int part = 0;
      HYPRE_Int var = 0; /* the node variable */
      HYPRE_Int index[3];
      double xval, yval, zval;

      /* Create empty vector objects */
      HYPRE_SStructVectorCreate(MPI_COMM_WORLD, node_grid, &xcoord);
      HYPRE_SStructVectorCreate(MPI_COMM_WORLD, node_grid, &ycoord);
      HYPRE_SStructVectorCreate(MPI_COMM_WORLD, node_grid, &zcoord);
      /* Set the object type to ParCSR */
      HYPRE_SStructVectorSetObjectType(xcoord, HYPRE_PARCSR);
      HYPRE_SStructVectorSetObjectType(ycoord, HYPRE_PARCSR);
      HYPRE_SStructVectorSetObjectType(zcoord, HYPRE_PARCSR);
      /* Indicate that the vector coefficients are ready to be set */
      HYPRE_SStructVectorInitialize(xcoord);
      HYPRE_SStructVectorInitialize(ycoord);
      HYPRE_SStructVectorInitialize(zcoord);

      /* Compute and set the coordinates of the nodes */
      for (i = 0; i <= n; i++)
         for (j = 0; j <= n; j++)
            for (k = 0; k <= n; k++)
            {
               index[0] = i + pi*n; index[1] = j + pj*n; index[2] = k + pk*n;

               xval = index[0]*h;
               yval = index[1]*h;
               zval = index[2]*h;

               HYPRE_SStructVectorSetValues(xcoord, part, index, var, &xval);
               HYPRE_SStructVectorSetValues(ycoord, part, index, var, &yval);
               HYPRE_SStructVectorSetValues(zcoord, part, index, var, &zval);
            }

      /* Finalize the vector assembly */
      HYPRE_SStructVectorAssemble(xcoord);
      HYPRE_SStructVectorAssemble(ycoord);
      HYPRE_SStructVectorAssemble(zcoord);
   }

   /* 5. Set up a SStruct Vector for the solution vector x */
   {
      HYPRE_Int part = 0;
      int nvalues = n*(n+1)*(n+1);
      double *values;

      values = calloc(nvalues, sizeof(double));

      /* Create an empty vector object */
      HYPRE_SStructVectorCreate(MPI_COMM_WORLD, edge_grid, &x);
      /* Set the object type to ParCSR */
      HYPRE_SStructVectorSetObjectType(x, HYPRE_PARCSR);
      /* Indicate that the vector coefficients are ready to be set */
      HYPRE_SStructVectorInitialize(x);

      /* Set the values for the initial guess x-edge */
      {
         HYPRE_Int var = 0;
         HYPRE_Int ilower[3] = {1 + pi*n, 0 + pj*n, 0 + pk*n};
         HYPRE_Int iupper[3] = {n + pi*n, n + pj*n, n + pk*n};
         HYPRE_SStructVectorSetBoxValues(x, part, ilower, iupper, var, values);
      }
      /* Set the values for the initial guess y-edge */
      {
         HYPRE_Int var = 1;
         HYPRE_Int ilower[3] = {0 + pi*n, 1 + pj*n, 0 + pk*n};
         HYPRE_Int iupper[3] = {n + pi*n, n + pj*n, n + pk*n};
         HYPRE_SStructVectorSetBoxValues(x, part, ilower, iupper, var, values);
      }
      /* Set the values for the initial guess z-edge */
      {
         HYPRE_Int var = 2;
         HYPRE_Int ilower[3] = {0 + pi*n, 0 + pj*n, 1 + pk*n};
         HYPRE_Int iupper[3] = {n + pi*n, n + pj*n, n + pk*n};
         HYPRE_SStructVectorSetBoxValues(x, part, ilower, iupper, var, values);
      }

      free(values);

      /* Finalize the vector assembly */
      HYPRE_SStructVectorAssemble(x);
   }

   /* Finalize current timing */
   hypre_EndTiming(time_index);
   hypre_PrintTiming("SStruct phase times", MPI_COMM_WORLD);
   hypre_FinalizeTiming(time_index);
   hypre_ClearTiming();

   /* 6. Set up and call the PCG-AMS solver (Solver options can be found in the
         Reference Manual.) */
   {
      double final_res_norm;
      HYPRE_Int its;

      HYPRE_ParCSRMatrix    par_A;
      HYPRE_ParVector       par_b;
      HYPRE_ParVector       par_x;

      HYPRE_ParCSRMatrix    par_G;
      HYPRE_ParVector       par_xcoord;
      HYPRE_ParVector       par_ycoord;
      HYPRE_ParVector       par_zcoord;

      /* Extract the ParCSR objects needed in the solver */
      HYPRE_SStructMatrixGetObject(A, (void **) &par_A);
      HYPRE_SStructVectorGetObject(b, (void **) &par_b);
      HYPRE_SStructVectorGetObject(x, (void **) &par_x);
      HYPRE_SStructMatrixGetObject(G, (void **) &par_G);
      HYPRE_SStructVectorGetObject(xcoord, (void **) &par_xcoord);
      HYPRE_SStructVectorGetObject(ycoord, (void **) &par_ycoord);
      HYPRE_SStructVectorGetObject(zcoord, (void **) &par_zcoord);

      if (myid == 0)
         printf("Problem size: %lld\n\n",
             hypre_ParCSRMatrixGlobalNumRows((hypre_ParCSRMatrix*)par_A));

      /* Start timing */
      time_index = hypre_InitializeTiming("AMS Setup");
      hypre_BeginTiming(time_index);

      /* Create solver */
      HYPRE_ParCSRPCGCreate(MPI_COMM_WORLD, &solver);

      /* Set some parameters (See Reference Manual for more parameters) */
      HYPRE_PCGSetMaxIter(solver, maxit); /* max iterations */
      HYPRE_PCGSetTol(solver, tol); /* conv. tolerance */
      HYPRE_PCGSetTwoNorm(solver, 0); /* use the two norm as the stopping criteria */
      HYPRE_PCGSetPrintLevel(solver, 2); /* print solve info */
      HYPRE_PCGSetLogging(solver, 1); /* needed to get run info later */

      /* Create AMS preconditioner */
      HYPRE_AMSCreate(&precond);

      /* Set AMS parameters */
      HYPRE_AMSSetMaxIter(precond, 1);
      HYPRE_AMSSetTol(precond, 0.0);
      HYPRE_AMSSetCycleType(precond, cycle_type);
      HYPRE_AMSSetPrintLevel(precond, 1);

      /* Set discrete gradient */
      HYPRE_AMSSetDiscreteGradient(precond, par_G);

      /* Set vertex coordinates */
      HYPRE_AMSSetCoordinateVectors(precond,
                                    par_xcoord, par_ycoord, par_zcoord);

      if (singular_problem)
         HYPRE_AMSSetBetaPoissonMatrix(precond, NULL);

      /* Smoothing and AMG options */
      HYPRE_AMSSetSmoothingOptions(precond,
                                   rlx_type, rlx_sweeps,
                                   rlx_weight, rlx_omega);
      HYPRE_AMSSetAlphaAMGOptions(precond,
                                  amg_coarsen_type, amg_agg_levels,
                                  amg_rlx_type, theta, amg_interp_type,
                                  amg_Pmax);
      HYPRE_AMSSetBetaAMGOptions(precond,
                                 amg_coarsen_type, amg_agg_levels,
                                 amg_rlx_type, theta, amg_interp_type,
                                 amg_Pmax);

      /* Set the PCG preconditioner */
      HYPRE_PCGSetPrecond(solver,
                          (HYPRE_PtrToSolverFcn) HYPRE_AMSSolve,
                          (HYPRE_PtrToSolverFcn) HYPRE_AMSSetup,
                          precond);

      /* Call the setup */
      HYPRE_ParCSRPCGSetup(solver, par_A, par_b, par_x);

      /* Finalize current timing */
      hypre_EndTiming(time_index);
      hypre_PrintTiming("Setup phase times", MPI_COMM_WORLD);
      hypre_FinalizeTiming(time_index);
      hypre_ClearTiming();

      /* Start timing again */
      time_index = hypre_InitializeTiming("AMS Solve");
      hypre_BeginTiming(time_index);

      /* Call the solve */
      HYPRE_ParCSRPCGSolve(solver, par_A, par_b, par_x);

      /* Finalize current timing */
      hypre_EndTiming(time_index);
      hypre_PrintTiming("Solve phase times", MPI_COMM_WORLD);
      hypre_FinalizeTiming(time_index);
      hypre_ClearTiming();

      /* Get some info */
      HYPRE_PCGGetNumIterations(solver, &its);
      HYPRE_PCGGetFinalRelativeResidualNorm(solver, &final_res_norm);

      /* Clean up */
      HYPRE_AMSDestroy(precond);
      HYPRE_ParCSRPCGDestroy(solver);

      /* Gather the solution vector */
      HYPRE_SStructVectorGather(x);

      if (myid == 0)
      {
         printf("\n");
         printf("Iterations = %lld\n", its);
         printf("Final Relative Residual Norm = %g\n", final_res_norm);
         printf("\n");
      }
   }

   /* Free memory */
   HYPRE_SStructGridDestroy(edge_grid);
   HYPRE_SStructGraphDestroy(A_graph);
   HYPRE_SStructMatrixDestroy(A);
   HYPRE_SStructVectorDestroy(b);
   HYPRE_SStructVectorDestroy(x);
   HYPRE_SStructGridDestroy(node_grid);
   HYPRE_SStructGraphDestroy(G_graph);
   HYPRE_SStructStencilDestroy(G_stencil[0]);
   HYPRE_SStructStencilDestroy(G_stencil[1]);
   HYPRE_SStructStencilDestroy(G_stencil[2]);
   HYPRE_SStructMatrixDestroy(G);
   HYPRE_SStructVectorDestroy(xcoord);
   HYPRE_SStructVectorDestroy(ycoord);
   HYPRE_SStructVectorDestroy(zcoord);

   /* Finalize MPI */
   MPI_Finalize();

   return 0;
}
Exemplo n.º 14
0
double *solve(double *Ab, int solver_id, struct parms parms)
{
    int i, j;
    double final_res_norm;
    int time_index, n_pre, n_post, num_iterations;
    n_pre  = 1; n_post = 1;
    double *A_val, *b_val;
    A_val = (double *) calloc(parms.N*parms.nsten, sizeof(double));
    b_val = (double *) calloc(parms.N, sizeof(double));
                    
    for (i = 0; i < (parms.N*parms.nsten); i++){
        A_val[i] = Ab[i];
    }
    for (i = 0; i < parms.N; i++){
        b_val[i] = Ab[i+parms.N*parms.nsten];
    }

    // HYPRE //
    HYPRE_StructGrid     grid;
    HYPRE_StructStencil  stencil;
    HYPRE_StructMatrix   A;
    HYPRE_StructVector   b;
    HYPRE_StructVector   x;
    HYPRE_StructSolver   solver;
    HYPRE_StructSolver   precond;

#if Dim == 2
    HYPRE_Int ilower[2] = {parms.x0, parms.y0};
    HYPRE_Int iupper[2] = {parms.x1, parms.y1};
#endif

#if Dim == 3
    HYPRE_Int ilower[3] = {parms.x0, parms.y0, 0};
    HYPRE_Int iupper[3] = {parms.x1, parms.y1, parms.Nz-1};
#endif
    {
    // Create an empty 2D grid object
        HYPRE_StructGridCreate(MPI_COMM_WORLD, Dim, &grid);

    // Add a new box to the grid
        HYPRE_StructGridSetExtents(grid, ilower, iupper);

    // 1. Set up periodic boundary condition in y-direction and create the grid 
        int pset[3]; 
        pset[0] = 0; pset[1] = parms.Ny; pset[2] = 0;
#if Dim == 3
        pset[2] = parms.Nz;
#endif
    //HYPRE_StructGridSetNumGhost(grid,pset)
        HYPRE_StructGridSetPeriodic(grid, pset);
        HYPRE_StructGridAssemble(grid);
    }

    // 2. Define the discretization stencil
    {
        if (Dim == 2){

        // Create an empty 2D, 5-pt stencil object
            HYPRE_StructStencilCreate(2, parms.nsten, &stencil);

        // Define the geometry of the stencil
            {
                int offsets[5][2] = {{0,0}, {-1,0}, {0,-1}, {0,1}, {1,0}};
                for (i = 0; i < parms.nsten; i++)
                    HYPRE_StructStencilSetElement(stencil, i, offsets[i]);
            }
        }
        else
        {
            HYPRE_StructStencilCreate(3, parms.nsten, &stencil);

            // Define the geometry of the 3D stencil
            {
                int offsets[7][3] = {{0,0,0}, {-1,0,0}, {0,-1,0}, {0,1,0}, {1,0,0}, {0,0,-1}, {0,0,1}};

                for (i = 0; i < parms.nsten; i++)
                    HYPRE_StructStencilSetElement(stencil, i, offsets[i]);
            }
        }
    }
    // 3. Set up a Struct Matrix A from Aval
    {
        HYPRE_Int stencil_indices[parms.nsten];

        // Create an empty matrix object
        HYPRE_StructMatrixCreate(MPI_COMM_WORLD, grid, stencil, &A);

        // Indicate that the matrix coefficients are ready to be set
        HYPRE_StructMatrixInitialize(A);

        for (j = 0; j < parms.nsten; j++)
            stencil_indices[j] = j;

        HYPRE_StructMatrixSetBoxValues(A, ilower, iupper, parms.nsten, stencil_indices, A_val);

        free(A_val);
    }

    // 4. Set up Struct Vectors for b from b_val and set x = 0
    {
        double *values;

        HYPRE_StructVectorCreate(MPI_COMM_WORLD, grid, &b);
        HYPRE_StructVectorCreate(MPI_COMM_WORLD, grid, &x);

        HYPRE_StructVectorInitialize(b);
        HYPRE_StructVectorInitialize(x);

        values = calloc((parms.N), sizeof(double));

        for (i = 0; i < (parms.N); i++)
            values[i] = 0.0;
        HYPRE_StructVectorSetBoxValues(x, ilower, iupper, values);
        HYPRE_StructVectorSetBoxValues(b, ilower, iupper, b_val);

        free(b_val);
        free(values);
    }

    //Finalize the vector and matrix assembly

    HYPRE_StructMatrixAssemble(A);
    HYPRE_StructVectorAssemble(b);
    HYPRE_StructVectorAssemble(x);
#if DEBUG == 3
    HYPRE_StructMatrixPrint("./poisson.matrix", A, 0);
    HYPRE_StructVectorPrint("./poisson.rhs", b, 0);
    /*char fname[64];
    char Aname[64], bname[64];
    sprintf(Aname,"data/A%d.",parms.cyc);
    sprintf(bname,"data/b%d.",parms.cyc);
    filename(fname, Aname, parms.wkdir, parms);
    HYPRE_StructMatrixPrint(fname, A, 0);
    filename(fname, bname, parms.wkdir, parms);
    HYPRE_StructVectorPrint(fname, b, 0);*/
#endif

    // 6. Set up and use a solver (SMG)
    if (solver_id == 0)
    {
        time_index = hypre_InitializeTiming("SMG Setup");
        hypre_BeginTiming(time_index);
        HYPRE_StructSMGCreate(MPI_COMM_WORLD, &solver);
        HYPRE_StructSMGSetMemoryUse(solver, 0);
        HYPRE_StructSMGSetMaxIter(solver, 100);
        HYPRE_StructSMGSetTol(solver, 1.0e-12);
        HYPRE_StructSMGSetRelChange(solver, 0);
        HYPRE_StructSMGSetNumPreRelax(solver, n_pre);
        HYPRE_StructSMGSetNumPostRelax(solver, n_post);
        // Logging must be on to get iterations and residual norm info below
        HYPRE_StructSMGSetLogging(solver, 1);

        // Setup and print setup timings
        HYPRE_StructSMGSetup(solver, A, b, x);
        hypre_EndTiming(time_index);
#if DEBUG == 3
        hypre_PrintTiming("Setup phase times", MPI_COMM_WORLD);
#endif
        hypre_FinalizeTiming(time_index);
        hypre_ClearTiming();

        // Solve and print solve timings
        time_index = hypre_InitializeTiming("SMG Solve");
        hypre_BeginTiming(time_index);
        HYPRE_StructSMGSolve(solver, A, b, x);
        hypre_EndTiming(time_index);
#if DEBUG == 3
        hypre_PrintTiming("Solve phase times", MPI_COMM_WORLD);
#endif
        hypre_FinalizeTiming(time_index);
        hypre_ClearTiming();

        // Get some info on the run
        HYPRE_StructSMGGetNumIterations(solver, &num_iterations);
        HYPRE_StructSMGGetFinalRelativeResidualNorm(solver, &final_res_norm);
#if DEBUG == 2
        if (parms.rank == 0){
            fprintf(stdout, "Number of Iterations = %4d ; Final Relative Residual Norm = %e\n\n", num_iterations, final_res_norm);
        }
#endif
        // Clean up 
        HYPRE_StructSMGDestroy(solver);
    }

    // 6. Set up and use a solver (PCG) with SMG Preconditioner
    if (solver_id == 1)
    {
        HYPRE_StructPCGCreate(MPI_COMM_WORLD, &solver);
        //HYPRE_StructPCGSetMemoryUse(solver, 0);
        HYPRE_StructPCGSetMaxIter(solver, 100);
        HYPRE_StructPCGSetTol(solver, 1.0e-12);
        HYPRE_StructPCGSetTwoNorm(solver, 1);
        HYPRE_StructPCGSetRelChange(solver, 0);
        //HYPRE_StructPCGSetPrintLevel(solver, 2 ); /* print each CG iteration */
        HYPRE_StructPCGSetLogging(solver, 1);
       
        /* Use symmetric SMG as preconditioner */
        HYPRE_StructSMGCreate(MPI_COMM_WORLD, &precond);
        HYPRE_StructSMGSetMemoryUse(precond, 0);
        HYPRE_StructSMGSetMaxIter(precond, 32);
        HYPRE_StructSMGSetTol(precond, 0.0);
        HYPRE_StructSMGSetZeroGuess(precond);
        HYPRE_StructSMGSetNumPreRelax(precond, 1);
        HYPRE_StructSMGSetNumPostRelax(precond, 1);
 
        /* Set the preconditioner and solve */
        HYPRE_StructPCGSetPrecond(solver, HYPRE_StructSMGSolve, HYPRE_StructSMGSetup, precond);
        HYPRE_StructPCGSetup(solver, A, b, x);
        HYPRE_StructPCGSolve(solver, A, b, x);
 
        /* Get some info on the run */
        HYPRE_StructPCGGetNumIterations(solver, &num_iterations);
        HYPRE_StructPCGGetFinalRelativeResidualNorm(solver, &final_res_norm);
#if DEBUG == 2
        if (parms.rank == 0){
            fprintf(stdout, "Number of Iterations = %4d ; Final Relative Residual Norm = %e\n\n", num_iterations, final_res_norm);
        }
#endif

        /* Clean up */
        HYPRE_StructSMGDestroy(precond);
        HYPRE_StructPCGDestroy(solver);
    }

    // get the local solution
    double *values = calloc(parms.N, sizeof(double));
    HYPRE_StructVectorGetBoxValues(x, ilower, iupper, values);

    // Free memory
    HYPRE_StructGridDestroy(grid);
    HYPRE_StructStencilDestroy(stencil);
    HYPRE_StructMatrixDestroy(A);
    HYPRE_StructVectorDestroy(b);
    HYPRE_StructVectorDestroy(x);
    free(Ab);
    return(values);
}
int hypre_SMGResidual(void *residual_vdata,hypre_StructMatrix *A,hypre_StructVector *x,hypre_StructVector *b,hypre_StructVector *r)
{
  int ierr = 0;
  hypre_SMGResidualData *residual_data = residual_vdata;
  hypre_IndexRef base_stride = (residual_data -> base_stride);
  hypre_BoxArray *base_points = (residual_data -> base_points);
  hypre_ComputePkg *compute_pkg = (residual_data -> compute_pkg);
  hypre_CommHandle *comm_handle;
  hypre_BoxArrayArray *compute_box_aa;
  hypre_BoxArray *compute_box_a;
  hypre_Box *compute_box;
  hypre_Box *A_data_box;
  hypre_Box *x_data_box;
  hypre_Box *b_data_box;
  hypre_Box *r_data_box;
  int Ai;
  int xi;
  int bi;
  int ri;
  double *Ap;
  double *xp;
  double *bp;
  double *rp;
  hypre_Index loop_size;
  hypre_IndexRef start;
  hypre_StructStencil *stencil;
  hypre_Index *stencil_shape;
  int stencil_size;
  int compute_i;
  int i;
  int j;
  int si;
  int loopi;
  int loopj;
  int loopk;
/* New static variables, precomputed */
  hypre_BeginTiming((residual_data -> time_index));
  stencil = (A -> stencil);
  stencil_shape = (stencil -> shape);
  stencil_size = (stencil -> size);
  (stencil_size <= 15)?0 : ((__assert_fail(("stencil_size <= 15"),("/home/liao6/svnrepos/benchmarks/smg2000/struct_ls/smg_residual.c"),(203),("int hypre_SMGResidual(void *, struct hypre_StructMatrix_struct *, struct hypre_StructVector_struct *, struct hypre_StructVector_struct *, struct hypre_StructVector_struct *)")) , 0));
  for (compute_i = 0; compute_i < 2; compute_i++) {
    switch(compute_i){
      case 0:
{
{
          xp = (x -> data);
          hypre_InitializeIndtComputations(compute_pkg,xp,&comm_handle);
          compute_box_aa = (compute_pkg -> indt_boxes);
          compute_box_a = base_points;
          for (i = 0; i < (compute_box_a -> size); i++) {
            compute_box = ((compute_box_a -> boxes) + i);
            start = (compute_box -> imin);
            b_data_box = ((( *(b -> data_space)).boxes) + i);
            r_data_box = ((( *(r -> data_space)).boxes) + i);
            bp = ((b -> data) + ((b -> data_indices)[i]));
            rp = ((r -> data) + ((r -> data_indices)[i]));
            hypre_BoxGetStrideSize(compute_box,base_stride,loop_size);
{
              int hypre__i1start = (((start[0]) - ((b_data_box -> imin)[0])) + ((((start[1]) - ((b_data_box -> imin)[1])) + (((start[2]) - ((b_data_box -> imin)[2])) * (((0 < ((((b_data_box -> imax)[1]) - ((b_data_box -> imin)[1])) + 1))?((((b_data_box -> imax)[1]) - ((b_data_box -> imin)[1])) + 1) : 0)))) * (((0 < ((((b_data_box -> imax)[0]) - ((b_data_box -> imin)[0])) + 1))?((((b_data_box -> imax)[0]) - ((b_data_box -> imin)[0])) + 1) : 0))));
              int hypre__i2start = (((start[0]) - ((r_data_box -> imin)[0])) + ((((start[1]) - ((r_data_box -> imin)[1])) + (((start[2]) - ((r_data_box -> imin)[2])) * (((0 < ((((r_data_box -> imax)[1]) - ((r_data_box -> imin)[1])) + 1))?((((r_data_box -> imax)[1]) - ((r_data_box -> imin)[1])) + 1) : 0)))) * (((0 < ((((r_data_box -> imax)[0]) - ((r_data_box -> imin)[0])) + 1))?((((r_data_box -> imax)[0]) - ((r_data_box -> imin)[0])) + 1) : 0))));
              int hypre__sx1 = (base_stride[0]);
              int hypre__sy1 = ((base_stride[1]) * (((0 < ((((b_data_box -> imax)[0]) - ((b_data_box -> imin)[0])) + 1))?((((b_data_box -> imax)[0]) - ((b_data_box -> imin)[0])) + 1) : 0)));
              int hypre__sz1 = (((base_stride[2]) * (((0 < ((((b_data_box -> imax)[0]) - ((b_data_box -> imin)[0])) + 1))?((((b_data_box -> imax)[0]) - ((b_data_box -> imin)[0])) + 1) : 0))) * (((0 < ((((b_data_box -> imax)[1]) - ((b_data_box -> imin)[1])) + 1))?((((b_data_box -> imax)[1]) - ((b_data_box -> imin)[1])) + 1) : 0)));
              int hypre__sx2 = (base_stride[0]);
              int hypre__sy2 = ((base_stride[1]) * (((0 < ((((r_data_box -> imax)[0]) - ((r_data_box -> imin)[0])) + 1))?((((r_data_box -> imax)[0]) - ((r_data_box -> imin)[0])) + 1) : 0)));
              int hypre__sz2 = (((base_stride[2]) * (((0 < ((((r_data_box -> imax)[0]) - ((r_data_box -> imin)[0])) + 1))?((((r_data_box -> imax)[0]) - ((r_data_box -> imin)[0])) + 1) : 0))) * (((0 < ((((r_data_box -> imax)[1]) - ((r_data_box -> imin)[1])) + 1))?((((r_data_box -> imax)[1]) - ((r_data_box -> imin)[1])) + 1) : 0)));
              int hypre__nx = (loop_size[0]);
              int hypre__ny = (loop_size[1]);
              int hypre__nz = (loop_size[2]);
              int hypre__mx = hypre__nx;
              int hypre__my = hypre__ny;
              int hypre__mz = hypre__nz;
              int hypre__dir;
              int hypre__max;
              int hypre__div;
              int hypre__mod;
              int hypre__block;
              int hypre__num_blocks;
              hypre__dir = 0;
              hypre__max = hypre__nx;
              if (hypre__ny > hypre__max) {
                hypre__dir = 1;
                hypre__max = hypre__ny;
              }
              if (hypre__nz > hypre__max) {
                hypre__dir = 2;
                hypre__max = hypre__nz;
              }
              hypre__num_blocks = 1;
              if (hypre__max < hypre__num_blocks) {
                hypre__num_blocks = hypre__max;
              }
              if (hypre__num_blocks > 0) {
                hypre__div = (hypre__max / hypre__num_blocks);
                hypre__mod = (hypre__max % hypre__num_blocks);
              }
/* # 236 "smg_residual.c" */
              (hypre__sx1 == 1)?0 : ((__assert_fail(("hypre__sx1 == 1"),("/home/liao6/svnrepos/benchmarks/smg2000/struct_ls/smg_residual.c"),(357),("int hypre_SMGResidual(void *, struct hypre_StructMatrix_struct *, struct hypre_StructVector_struct *, struct hypre_StructVector_struct *, struct hypre_StructVector_struct *)")) , 0));
              (hypre__sx2 == 1)?0 : ((__assert_fail(("hypre__sx2 == 1"),("/home/liao6/svnrepos/benchmarks/smg2000/struct_ls/smg_residual.c"),(358),("int hypre_SMGResidual(void *, struct hypre_StructMatrix_struct *, struct hypre_StructVector_struct *, struct hypre_StructVector_struct *, struct hypre_StructVector_struct *)")) , 0));
              if (hypre__num_blocks == 1) {
                int ii;
                int jj;
                int kk;
                const double *bp_0 = (bp + hypre__i1start);
                double *rp_0 = (rp + hypre__i2start);
                for (kk = 0; kk < hypre__mz; kk++) {
                  for (jj = 0; jj < hypre__my; jj++) {
                    const double *bpp = ((bp_0 + (jj * hypre__sy1)) + (kk * hypre__sz1));
                    double *rpp = ((rp_0 + (jj * hypre__sy2)) + (kk * hypre__sz2));
                    for (ii = 0; ii < hypre__mx; ii++) {
                      rpp[ii] = (bpp[ii]);
                    }
                  }
                }
/* hypre__num_blocks > 1 */
              }
              else {
                for (hypre__block = 0; hypre__block < hypre__num_blocks; hypre__block++) {
                  loopi = 0;
                  loopj = 0;
                  loopk = 0;
                  hypre__nx = hypre__mx;
                  hypre__ny = hypre__my;
                  hypre__nz = hypre__mz;
                  if (hypre__dir == 0) {
                    loopi = ((hypre__block * hypre__div) + (((hypre__mod < hypre__block)?hypre__mod : hypre__block)));
                    hypre__nx = (hypre__div + (((hypre__mod > hypre__block)?1 : 0)));
                  }
                  else if (hypre__dir == 1) {
                    loopj = ((hypre__block * hypre__div) + (((hypre__mod < hypre__block)?hypre__mod : hypre__block)));
                    hypre__ny = (hypre__div + (((hypre__mod > hypre__block)?1 : 0)));
                  }
                  else if (hypre__dir == 2) {
                    loopk = ((hypre__block * hypre__div) + (((hypre__mod < hypre__block)?hypre__mod : hypre__block)));
                    hypre__nz = (hypre__div + (((hypre__mod > hypre__block)?1 : 0)));
                  }
                  bi = (((hypre__i1start + loopi) + (loopj * hypre__sy1)) + (loopk * hypre__sz1));
                  ri = (((hypre__i2start + loopi) + (loopj * hypre__sy2)) + (loopk * hypre__sz2));
/* AAA */
{
                    int ii;
                    int jj;
                    int kk;
                    const double *bp_0 = (bp + bi);
                    double *rp_0 = (rp + ri);
                    for (kk = 0; kk < hypre__nz; kk++) {
                      for (jj = 0; jj < hypre__ny; jj++) {
                        const double *bpp = ((bp_0 + (jj * hypre__sy1)) + (kk * hypre__sz1));
                        double *rpp = ((rp_0 + (jj * hypre__sy2)) + (kk * hypre__sz2));
                        for (ii = 0; ii < hypre__nx; ii++) {
                          rpp[ii] = (bpp[ii]);
                        }
                      }
                    }
/* AAA */
                  }
                }
/* hypre__num_blocks > 1 */
              }
            }
          }
        }
        break; 
      }
      case 1:
{
{
          hypre_FinalizeIndtComputations(comm_handle);
          compute_box_aa = (compute_pkg -> dept_boxes);
        }
        break; 
      }
/* switch */
    }
/*--------------------------------------------------------------------
     * Compute r -= A*x
     *--------------------------------------------------------------------*/
    for (i = 0; i < (compute_box_aa -> size); i++) {
      int dxp_s[15UL];
      compute_box_a = ((compute_box_aa -> box_arrays)[i]);
      A_data_box = ((( *(A -> data_space)).boxes) + i);
      x_data_box = ((( *(x -> data_space)).boxes) + i);
      r_data_box = ((( *(r -> data_space)).boxes) + i);
      rp = ((r -> data) + ((r -> data_indices)[i]));
      for (si = 0; si < stencil_size; si++) {
        dxp_s[si] = (((stencil_shape[si])[0]) + ((((stencil_shape[si])[1]) + (((stencil_shape[si])[2]) * (((0 < ((((x_data_box -> imax)[1]) - ((x_data_box -> imin)[1])) + 1))?((((x_data_box -> imax)[1]) - ((x_data_box -> imin)[1])) + 1) : 0)))) * (((0 < ((((x_data_box -> imax)[0]) - ((x_data_box -> imin)[0])) + 1))?((((x_data_box -> imax)[0]) - ((x_data_box -> imin)[0])) + 1) : 0))));
      }
      for (j = 0; j < (compute_box_a -> size); j++) {{
          int hypre__i1start;
          int hypre__i2start;
          int hypre__i3start;
          int hypre__sx1;
          int hypre__sy1;
          int hypre__sz1;
          int hypre__sx2;
          int hypre__sy2;
          int hypre__sz2;
          int hypre__sx3;
          int hypre__sy3;
          int hypre__sz3;
          int hypre__nx;
          int hypre__ny;
          int hypre__nz;
          int hypre__mx;
          int hypre__my;
          int hypre__mz;
          int hypre__dir;
          int hypre__max;
          int hypre__div;
          int hypre__mod;
          int hypre__block;
          int hypre__num_blocks;
          compute_box = ((compute_box_a -> boxes) + j);
          start = (compute_box -> imin);
          hypre__i1start = (((start[0]) - ((A_data_box -> imin)[0])) + ((((start[1]) - ((A_data_box -> imin)[1])) + (((start[2]) - ((A_data_box -> imin)[2])) * (((0 < ((((A_data_box -> imax)[1]) - ((A_data_box -> imin)[1])) + 1))?((((A_data_box -> imax)[1]) - ((A_data_box -> imin)[1])) + 1) : 0)))) * (((0 < ((((A_data_box -> imax)[0]) - ((A_data_box -> imin)[0])) + 1))?((((A_data_box -> imax)[0]) - ((A_data_box -> imin)[0])) + 1) : 0))));
          hypre__i2start = (((start[0]) - ((x_data_box -> imin)[0])) + ((((start[1]) - ((x_data_box -> imin)[1])) + (((start[2]) - ((x_data_box -> imin)[2])) * (((0 < ((((x_data_box -> imax)[1]) - ((x_data_box -> imin)[1])) + 1))?((((x_data_box -> imax)[1]) - ((x_data_box -> imin)[1])) + 1) : 0)))) * (((0 < ((((x_data_box -> imax)[0]) - ((x_data_box -> imin)[0])) + 1))?((((x_data_box -> imax)[0]) - ((x_data_box -> imin)[0])) + 1) : 0))));
          hypre__i3start = (((start[0]) - ((r_data_box -> imin)[0])) + ((((start[1]) - ((r_data_box -> imin)[1])) + (((start[2]) - ((r_data_box -> imin)[2])) * (((0 < ((((r_data_box -> imax)[1]) - ((r_data_box -> imin)[1])) + 1))?((((r_data_box -> imax)[1]) - ((r_data_box -> imin)[1])) + 1) : 0)))) * (((0 < ((((r_data_box -> imax)[0]) - ((r_data_box -> imin)[0])) + 1))?((((r_data_box -> imax)[0]) - ((r_data_box -> imin)[0])) + 1) : 0))));
          hypre_BoxGetStrideSize(compute_box,base_stride,loop_size);
          hypre__sx1 = (base_stride[0]);
          hypre__sy1 = ((base_stride[1]) * (((0 < ((((A_data_box -> imax)[0]) - ((A_data_box -> imin)[0])) + 1))?((((A_data_box -> imax)[0]) - ((A_data_box -> imin)[0])) + 1) : 0)));
          hypre__sz1 = (((base_stride[2]) * (((0 < ((((A_data_box -> imax)[0]) - ((A_data_box -> imin)[0])) + 1))?((((A_data_box -> imax)[0]) - ((A_data_box -> imin)[0])) + 1) : 0))) * (((0 < ((((A_data_box -> imax)[1]) - ((A_data_box -> imin)[1])) + 1))?((((A_data_box -> imax)[1]) - ((A_data_box -> imin)[1])) + 1) : 0)));
          hypre__sx2 = (base_stride[0]);
          hypre__sy2 = ((base_stride[1]) * (((0 < ((((x_data_box -> imax)[0]) - ((x_data_box -> imin)[0])) + 1))?((((x_data_box -> imax)[0]) - ((x_data_box -> imin)[0])) + 1) : 0)));
          hypre__sz2 = (((base_stride[2]) * (((0 < ((((x_data_box -> imax)[0]) - ((x_data_box -> imin)[0])) + 1))?((((x_data_box -> imax)[0]) - ((x_data_box -> imin)[0])) + 1) : 0))) * (((0 < ((((x_data_box -> imax)[1]) - ((x_data_box -> imin)[1])) + 1))?((((x_data_box -> imax)[1]) - ((x_data_box -> imin)[1])) + 1) : 0)));
          hypre__sx3 = (base_stride[0]);
          hypre__sy3 = ((base_stride[1]) * (((0 < ((((r_data_box -> imax)[0]) - ((r_data_box -> imin)[0])) + 1))?((((r_data_box -> imax)[0]) - ((r_data_box -> imin)[0])) + 1) : 0)));
          hypre__sz3 = (((base_stride[2]) * (((0 < ((((r_data_box -> imax)[0]) - ((r_data_box -> imin)[0])) + 1))?((((r_data_box -> imax)[0]) - ((r_data_box -> imin)[0])) + 1) : 0))) * (((0 < ((((r_data_box -> imax)[1]) - ((r_data_box -> imin)[1])) + 1))?((((r_data_box -> imax)[1]) - ((r_data_box -> imin)[1])) + 1) : 0)));
/* Based on BG/L Milestone #46 */
          (hypre__sx1 == 1)?0 : ((__assert_fail(("hypre__sx1 == 1"),("/home/liao6/svnrepos/benchmarks/smg2000/struct_ls/smg_residual.c"),(602),("int hypre_SMGResidual(void *, struct hypre_StructMatrix_struct *, struct hypre_StructVector_struct *, struct hypre_StructVector_struct *, struct hypre_StructVector_struct *)")) , 0));
          (hypre__sx2 == 1)?0 : ((__assert_fail(("hypre__sx2 == 1"),("/home/liao6/svnrepos/benchmarks/smg2000/struct_ls/smg_residual.c"),(603),("int hypre_SMGResidual(void *, struct hypre_StructMatrix_struct *, struct hypre_StructVector_struct *, struct hypre_StructVector_struct *, struct hypre_StructVector_struct *)")) , 0));
          (hypre__sx3 == 1)?0 : ((__assert_fail(("hypre__sx3 == 1"),("/home/liao6/svnrepos/benchmarks/smg2000/struct_ls/smg_residual.c"),(604),("int hypre_SMGResidual(void *, struct hypre_StructMatrix_struct *, struct hypre_StructVector_struct *, struct hypre_StructVector_struct *, struct hypre_StructVector_struct *)")) , 0));
          hypre__mx = (loop_size[0]);
          hypre__my = (loop_size[1]);
          hypre__mz = (loop_size[2]);
          hypre__dir = 0;
          hypre__max = hypre__mx;
          if (hypre__my > hypre__max) {
            hypre__dir = 1;
            hypre__max = hypre__my;
          }
          if (hypre__mz > hypre__max) {
            hypre__dir = 2;
            hypre__max = hypre__mz;
          }
          hypre__num_blocks = 1;
          if (hypre__max < hypre__num_blocks) {
            hypre__num_blocks = hypre__max;
          }
          if (hypre__num_blocks > 0) {
            hypre__div = (hypre__max / hypre__num_blocks);
            hypre__mod = (hypre__max % hypre__num_blocks);
          }
          else 
            continue; 
          if (hypre__num_blocks == 1) {
            int si;
            int ii;
            int jj;
            int kk;
            const double *Ap_0 = ((A -> data) + hypre__i1start);
            const double *xp_0 = (((x -> data) + hypre__i2start) + ((x -> data_indices)[i]));
            ri = hypre__i3start;

            void *__out_argv1__1527__[21];
            *(__out_argv1__1527__ + 0) = ((void *)(&xp_0));
            *(__out_argv1__1527__ + 1) = ((void *)(&Ap_0));
            *(__out_argv1__1527__ + 2) = ((void *)(&kk));
            *(__out_argv1__1527__ + 3) = ((void *)(&jj));
            *(__out_argv1__1527__ + 4) = ((void *)(&ii));
            *(__out_argv1__1527__ + 5) = ((void *)(&si));
            *(__out_argv1__1527__ + 6) = ((void *)(&hypre__mz));
            *(__out_argv1__1527__ + 7) = ((void *)(&hypre__my));
            *(__out_argv1__1527__ + 8) = ((void *)(&hypre__mx));
            *(__out_argv1__1527__ + 9) = ((void *)(&hypre__sz3));
            *(__out_argv1__1527__ + 10) = ((void *)(&hypre__sy3));
            *(__out_argv1__1527__ + 11) = ((void *)(&hypre__sz2));
            *(__out_argv1__1527__ + 12) = ((void *)(&hypre__sy2));
            *(__out_argv1__1527__ + 13) = ((void *)(&hypre__sz1));
            *(__out_argv1__1527__ + 14) = ((void *)(&hypre__sy1));
            *(__out_argv1__1527__ + 15) = ((void *)(&dxp_s));
            *(__out_argv1__1527__ + 16) = ((void *)(&i));
            *(__out_argv1__1527__ + 17) = ((void *)(&stencil_size));
            *(__out_argv1__1527__ + 18) = ((void *)(&rp));
            *(__out_argv1__1527__ + 19) = ((void *)(&ri));
            *(__out_argv1__1527__ + 20) = ((void *)(&A));
#ifdef USE_DLOPEN
            if (g_execution_flag == 0){
              printf("Opening the .so file ...\n");
              FunctionLib = dlopen("/home/liao6/svnrepos/benchmarks/smg2000/struct_ls/OUT__1__6119__.so",RTLD_LAZY);
              dlError = dlerror();
              if( dlError ) {
                printf("cannot open .so file!\n");
                exit(1);
              }

              /* Find the first loaded function */
              OUT__1__6119__ = dlsym( FunctionLib, "OUT__1__6119__");
              dlError = dlerror();
              if( dlError )
              {
                printf("cannot find OUT__1__6755__() !\n");
                exit(1);
              }
              //remove("/tmp/peri.result");
              //time1=time_stamp();
            } // end if (flag ==0)
             g_execution_flag ++;

            (*OUT__1__6119__)(__out_argv1__1527__);
#else            
            OUT__1__6119__(__out_argv1__1527__);
#endif
/* hypre__num_blocks > 1 */
          }
          else {
            for (si = 0; si < stencil_size; si++) {
              Ap = ((A -> data) + (((A -> data_indices)[i])[si]));
              xp = (((x -> data) + ((x -> data_indices)[i])) + (dxp_s[si]));
              for (hypre__block = 0; hypre__block < hypre__num_blocks; hypre__block++) {
                loopi = 0;
                loopj = 0;
                loopk = 0;
                hypre__nx = hypre__mx;
                hypre__ny = hypre__my;
                hypre__nz = hypre__mz;
                if (hypre__dir == 0) {
                  loopi = ((hypre__block * hypre__div) + (((hypre__mod < hypre__block)?hypre__mod : hypre__block)));
                  hypre__nx = (hypre__div + (((hypre__mod > hypre__block)?1 : 0)));
                }
                else if (hypre__dir == 1) {
                  loopj = ((hypre__block * hypre__div) + (((hypre__mod < hypre__block)?hypre__mod : hypre__block)));
                  hypre__ny = (hypre__div + (((hypre__mod > hypre__block)?1 : 0)));
                }
                else if (hypre__dir == 2) {
                  loopk = ((hypre__block * hypre__div) + (((hypre__mod < hypre__block)?hypre__mod : hypre__block)));
                  hypre__nz = (hypre__div + (((hypre__mod > hypre__block)?1 : 0)));
                }
                Ai = (((hypre__i1start + (loopi * hypre__sx1)) + (loopj * hypre__sy1)) + (loopk * hypre__sz1));
                xi = (((hypre__i2start + (loopi * hypre__sx2)) + (loopj * hypre__sy2)) + (loopk * hypre__sz2));
                ri = (((hypre__i3start + (loopi * hypre__sx3)) + (loopj * hypre__sy3)) + (loopk * hypre__sz3));
/* CORE LOOP BEGIN */
                (hypre__sx1 == 1)?0 : ((__assert_fail(("hypre__sx1 == 1"),("/home/liao6/svnrepos/benchmarks/smg2000/struct_ls/smg_residual.c"),(689),("int hypre_SMGResidual(void *, struct hypre_StructMatrix_struct *, struct hypre_StructVector_struct *, struct hypre_StructVector_struct *, struct hypre_StructVector_struct *)")) , 0));
                (hypre__sx2 == 1)?0 : ((__assert_fail(("hypre__sx2 == 1"),("/home/liao6/svnrepos/benchmarks/smg2000/struct_ls/smg_residual.c"),(690),("int hypre_SMGResidual(void *, struct hypre_StructMatrix_struct *, struct hypre_StructVector_struct *, struct hypre_StructVector_struct *, struct hypre_StructVector_struct *)")) , 0));
                (hypre__sx3 == 1)?0 : ((__assert_fail(("hypre__sx3 == 1"),("/home/liao6/svnrepos/benchmarks/smg2000/struct_ls/smg_residual.c"),(691),("int hypre_SMGResidual(void *, struct hypre_StructMatrix_struct *, struct hypre_StructVector_struct *, struct hypre_StructVector_struct *, struct hypre_StructVector_struct *)")) , 0));
{
/* In essence, this loop computes:
                   *
                   FOR_ALL i, j, k DO
                   rp[ri + i + j*DJ_R + k*DK_R]
                   -= Ap[Ai + i + j*DJ_A + k*DK_A]
                   * xp[xi + i + j*DJ_X + k*DK_X];
                   */
// 1. promoting loop invariant expressions
// j loop increment for Ai, xi, and ri
                  int DJA0 = (hypre__sy1 - (hypre__nx * hypre__sx1));
                  int DJX0 = (hypre__sy2 - (hypre__nx * hypre__sx2));
                  int DJR0 = (hypre__sy3 - (hypre__nx * hypre__sx3));
// k loop increment for Ai, xi, and ri
                  int DKA0 = (hypre__sz1 - (hypre__ny * hypre__sy1));
                  int DKX0 = (hypre__sz2 - (hypre__ny * hypre__sy2));
                  int DKR0 = (hypre__sz3 - (hypre__ny * hypre__sy3));
// pre-compute array index offset changes for one iteration within each level of loop
// one iteration of j loop on ri
                  int DJR1 = (DJR0 + (hypre__nx * hypre__sx3));
// one iteration of k loop on ri
                  int DKR1 = (DKR0 + (hypre__ny * DJR1));
// one iteration of j loop on Ai
                  int DJA1 = (DJA0 + (hypre__nx * hypre__sx1));
// one iteration of k loop on Ai
                  int DKA1 = (DKA0 + (hypre__ny * DJA1));
// one iteration of j loop on xi
                  int DJX1 = (DJX0 + (hypre__nx * hypre__sx2));
// one iteration of k loop on xi
                  int DKX1 = (DKX0 + (hypre__ny * DJX1));
                  for (loopk = 0; loopk < hypre__nz; loopk++) {
                    for (loopj = 0; loopj < hypre__ny; loopj++) {
                      for (loopi = 0; loopi < hypre__nx; loopi++) {{
                          rp[((ri + (loopi * hypre__sx1)) + (loopj * DJR1)) + (loopk * DKR1)] -= ((Ap[((Ai + (loopi * hypre__sx1)) + (loopj * DJA1)) + (loopk * DKA1)]) * (xp[((xi + (loopi * hypre__sx2)) + (loopj * DJX1)) + (loopk * DKX1)]));
//rp[ri] -= Ap[Ai] * xp[xi];
                        }
//Ai += hypre__sx1; // 2. merging loop index changes
//xi += hypre__sx2;
//ri += hypre__sx3;
                      }
//Ai += DJA0;//(hypre__sy1 - (hypre__nx * hypre__sx1));
//xi += DJX0;//(hypre__sy2 - (hypre__nx * hypre__sx2));
//ri += DJR0;//(hypre__sy3 - (hypre__nx * hypre__sx3));
                    }
//Ai += DKA0;//(hypre__sz1 - (hypre__ny * hypre__sy1));
//xi += DKX0; //(hypre__sz2 - (hypre__ny * hypre__sy2));
//ri += DKR0;//(hypre__sz3 - (hypre__ny * hypre__sy3));
                  }
                }
/* CORE LOOP END */
/* hypre__block */
              }
/* si */
            }
/* else hypre__num_blocks > 1 */
          }
/* j */
        }
      }
/* i */
    }
/* compute_i */
  }
  hypre_IncFLOPCount((residual_data -> flops));
  hypre_EndTiming((residual_data -> time_index));
  return ierr;
}
Exemplo n.º 16
0
HYPRE_Int
hypre_SparseMSGInterp( void               *interp_vdata,
                       hypre_StructMatrix *P,
                       hypre_StructVector *xc,
                       hypre_StructVector *e            )
{
   HYPRE_Int ierr = 0;

   hypre_SparseMSGInterpData   *interp_data = interp_vdata;

   hypre_ComputePkg       *compute_pkg;
   hypre_IndexRef          cindex;
   hypre_IndexRef          findex;
   hypre_IndexRef          stride;
   hypre_IndexRef          strideP;

   hypre_StructGrid       *fgrid;
   HYPRE_Int              *fgrid_ids;
   hypre_StructGrid       *cgrid;
   hypre_BoxArray         *cgrid_boxes;
   HYPRE_Int              *cgrid_ids;

   hypre_CommHandle       *comm_handle;
                       
   hypre_BoxArrayArray    *compute_box_aa;
   hypre_BoxArray         *compute_box_a;
   hypre_Box              *compute_box;
                       
   hypre_Box              *P_dbox;
   hypre_Box              *xc_dbox;
   hypre_Box              *e_dbox;
                       
   HYPRE_Int               Pi;
   HYPRE_Int               xci;
   HYPRE_Int               ei;
                         
   double                 *Pp0, *Pp1;
   double                 *xcp;
   double                 *ep, *ep0, *ep1;
                       
   hypre_Index             loop_size;
   hypre_Index             start;
   hypre_Index             startc;
   hypre_Index             startP;
   hypre_Index             stridec;
                       
   hypre_StructStencil    *stencil;
   hypre_Index            *stencil_shape;

   HYPRE_Int               compute_i, fi, ci, j;
   HYPRE_Int               loopi, loopj, loopk;

   /*-----------------------------------------------------------------------
    * Initialize some things
    *-----------------------------------------------------------------------*/

   hypre_BeginTiming(interp_data -> time_index);

   compute_pkg   = (interp_data -> compute_pkg);
   cindex        = (interp_data -> cindex);
   findex        = (interp_data -> findex);
   stride        = (interp_data -> stride);
   strideP       = (interp_data -> strideP);

   stencil       = hypre_StructMatrixStencil(P);
   stencil_shape = hypre_StructStencilShape(stencil);

   hypre_SetIndex(stridec, 1, 1, 1);

   /*-----------------------------------------------------------------------
    * Compute e at coarse points (injection)
    *-----------------------------------------------------------------------*/

   fgrid = hypre_StructVectorGrid(e);
   fgrid_ids = hypre_StructGridIDs(fgrid);
   cgrid = hypre_StructVectorGrid(xc);
   cgrid_boxes = hypre_StructGridBoxes(cgrid);
   cgrid_ids = hypre_StructGridIDs(cgrid);

   fi = 0;
   hypre_ForBoxI(ci, cgrid_boxes)
      {
         while (fgrid_ids[fi] != cgrid_ids[ci])
         {
            fi++;
         }

         compute_box = hypre_BoxArrayBox(cgrid_boxes, ci);

         hypre_CopyIndex(hypre_BoxIMin(compute_box), startc);
         hypre_StructMapCoarseToFine(startc, cindex, stride, start);

         e_dbox  = hypre_BoxArrayBox(hypre_StructVectorDataSpace(e), fi);
         xc_dbox = hypre_BoxArrayBox(hypre_StructVectorDataSpace(xc), ci);

         ep  = hypre_StructVectorBoxData(e, fi);
         xcp = hypre_StructVectorBoxData(xc, ci);

         hypre_BoxGetSize(compute_box, loop_size);

         hypre_BoxLoop2Begin(loop_size,
                             e_dbox,  start,  stride,  ei,
                             xc_dbox, startc, stridec, xci);
#define HYPRE_BOX_SMP_PRIVATE loopk,loopi,loopj,ei,xci
#include "hypre_box_smp_forloop.h"
         hypre_BoxLoop2For(loopi, loopj, loopk, ei, xci)
            {
               ep[ei] = xcp[xci];
            }
         hypre_BoxLoop2End(ei, xci);
      }

   /*-----------------------------------------------------------------------
    * Compute e at fine points
    *-----------------------------------------------------------------------*/

   for (compute_i = 0; compute_i < 2; compute_i++)
   {
      switch(compute_i)
      {
         case 0:
         {
            ep = hypre_StructVectorData(e);
            hypre_InitializeIndtComputations(compute_pkg, ep, &comm_handle);
            compute_box_aa = hypre_ComputePkgIndtBoxes(compute_pkg);
         }
         break;

         case 1:
         {
            hypre_FinalizeIndtComputations(comm_handle);
            compute_box_aa = hypre_ComputePkgDeptBoxes(compute_pkg);
         }
         break;
      }

      hypre_ForBoxArrayI(fi, compute_box_aa)
         {
            compute_box_a = hypre_BoxArrayArrayBoxArray(compute_box_aa, fi);

            P_dbox = hypre_BoxArrayBox(hypre_StructMatrixDataSpace(P), fi);
            e_dbox = hypre_BoxArrayBox(hypre_StructVectorDataSpace(e), fi);

            Pp0 = hypre_StructMatrixBoxData(P, fi, 0);
            Pp1 = hypre_StructMatrixBoxData(P, fi, 1);
            ep  = hypre_StructVectorBoxData(e, fi);
            ep0 = ep + hypre_BoxOffsetDistance(e_dbox, stencil_shape[0]);
            ep1 = ep + hypre_BoxOffsetDistance(e_dbox, stencil_shape[1]);

            hypre_ForBoxI(j, compute_box_a)
               {
                  compute_box = hypre_BoxArrayBox(compute_box_a, j);

                  hypre_CopyIndex(hypre_BoxIMin(compute_box), start);
                  hypre_StructMapFineToCoarse(start,  findex, stride,  startc);
                  hypre_StructMapCoarseToFine(startc, cindex, strideP, startP);

                  hypre_BoxGetStrideSize(compute_box, stride, loop_size);

                  hypre_BoxLoop2Begin(loop_size,
                                      P_dbox, startP, strideP, Pi,
                                      e_dbox, start,  stride,  ei);
#define HYPRE_BOX_SMP_PRIVATE loopk,loopi,loopj,Pi,ei
#include "hypre_box_smp_forloop.h"
                  hypre_BoxLoop2For(loopi, loopj, loopk, Pi, ei)
                     {
                        ep[ei] =  (Pp0[Pi] * ep0[ei] +
                                   Pp1[Pi] * ep1[ei]);
                     }
                  hypre_BoxLoop2End(Pi, ei);
               }
         }
int
hypre_SMGResidual( void               *residual_vdata,
                   hypre_StructMatrix *A,
                   hypre_StructVector *x,
                   hypre_StructVector *b,
                   hypre_StructVector *r              )
{
   int ierr = 0;

   hypre_SMGResidualData *residual_data = (hypre_SMGResidualData *)residual_vdata;

   hypre_IndexRef          base_stride = (residual_data -> base_stride);
   hypre_BoxArray         *base_points = (residual_data -> base_points);
   hypre_ComputePkg       *compute_pkg = (residual_data -> compute_pkg);

   hypre_CommHandle       *comm_handle;
                       
   hypre_BoxArrayArray    *compute_box_aa;
   hypre_BoxArray         *compute_box_a;
   hypre_Box              *compute_box;
                       
   hypre_Box              *A_data_box;
   hypre_Box              *x_data_box;
   hypre_Box              *b_data_box;
   hypre_Box              *r_data_box;
                       
   int                     Ai;
   int                     xi;
   int                     bi;
   int                     ri;
                         
   double                 *Ap;
   double                 *xp;
   double                 *bp;
   double                 *rp;
                       
   hypre_Index             loop_size;
   hypre_IndexRef          start;
                       
   hypre_StructStencil    *stencil;
   hypre_Index            *stencil_shape;
   int                     stencil_size;

   int                     compute_i, i, j, si;
   int                     loopi, loopj, loopk;

   hypre_BeginTiming(residual_data -> time_index);

   /*-----------------------------------------------------------------------
    * Compute residual r = b - Ax
    *-----------------------------------------------------------------------*/

   stencil       = hypre_StructMatrixStencil(A);
   stencil_shape = hypre_StructStencilShape(stencil);
   stencil_size  = hypre_StructStencilSize(stencil);

   for (compute_i = 0; compute_i < 2; compute_i++)
   {
      switch(compute_i)
      {
         case 0:
         {
            xp = hypre_StructVectorData(x);
            hypre_InitializeIndtComputations(compute_pkg, xp, &comm_handle);
            compute_box_aa = hypre_ComputePkgIndtBoxes(compute_pkg);

            /*----------------------------------------
             * Copy b into r
             *----------------------------------------*/

            compute_box_a = base_points;
            hypre_ForBoxI(i, compute_box_a)
               {
                  compute_box = hypre_BoxArrayBox(compute_box_a, i);
                  start = hypre_BoxIMin(compute_box);

                  b_data_box =
                     hypre_BoxArrayBox(hypre_StructVectorDataSpace(b), i);
                  r_data_box =
                     hypre_BoxArrayBox(hypre_StructVectorDataSpace(r), i);

                  bp = hypre_StructVectorBoxData(b, i);
                  rp = hypre_StructVectorBoxData(r, i);

                  hypre_BoxGetStrideSize(compute_box, base_stride, loop_size);
                  hypre_BoxLoop2Begin(loop_size,
                                      b_data_box, start, base_stride, bi,
                                      r_data_box, start, base_stride, ri);
#define HYPRE_BOX_SMP_PRIVATE loopk,loopi,loopj,bi,ri
#include "hypre_box_smp_forloop.h"
                  hypre_BoxLoop2For(loopi, loopj, loopk, bi, ri)
                     {
                        rp[ri] = bp[bi];
                     }
                  hypre_BoxLoop2End(bi, ri);
               }
         }
         break;

         case 1:
         {
            hypre_FinalizeIndtComputations(comm_handle);
            compute_box_aa = hypre_ComputePkgDeptBoxes(compute_pkg);
         }
         break;
      }

      /*--------------------------------------------------------------------
       * Compute r -= A*x
       *--------------------------------------------------------------------*/

      hypre_ForBoxArrayI(i, compute_box_aa)
         {
            compute_box_a = hypre_BoxArrayArrayBoxArray(compute_box_aa, i);

            A_data_box = hypre_BoxArrayBox(hypre_StructMatrixDataSpace(A), i);
            x_data_box = hypre_BoxArrayBox(hypre_StructVectorDataSpace(x), i);
            r_data_box = hypre_BoxArrayBox(hypre_StructVectorDataSpace(r), i);

            rp = hypre_StructVectorBoxData(r, i);

            hypre_ForBoxI(j, compute_box_a)
               {
                  compute_box = hypre_BoxArrayBox(compute_box_a, j);

                  start  = hypre_BoxIMin(compute_box);

                  for (si = 0; si < stencil_size; si++)
                  {
                     Ap = hypre_StructMatrixBoxData(A, i, si);
                     xp = hypre_StructVectorBoxData(x, i) +
                        hypre_BoxOffsetDistance(x_data_box, stencil_shape[si]);

                     hypre_BoxGetStrideSize(compute_box, base_stride,
                                            loop_size);
                     hypre_BoxLoop3Begin(loop_size,
                                         A_data_box, start, base_stride, Ai,
                                         x_data_box, start, base_stride, xi,
                                         r_data_box, start, base_stride, ri);
#if 0                     
/*  The following portion is preprocessed to be handled by ROSE outliner */
#define HYPRE_BOX_SMP_PRIVATE loopk,loopi,loopj,Ai,xi,ri
#include "hypre_box_smp_forloop.h"
                     hypre_BoxLoop3For(loopi, loopj, loopk, Ai, xi, ri)
                        {
                           rp[ri] -= Ap[Ai] * xp[xi];
                        }
                     hypre_BoxLoop3End(Ai, xi, ri);
#else
                    for (hypre__block = 0; hypre__block < hypre__num_blocks;
                         hypre__block++)
                      {
                        loopi = 0;
                        loopj = 0;
                        loopk = 0;
                        hypre__nx = hypre__mx;
                        hypre__ny = hypre__my;
                        hypre__nz = hypre__mz;
                        if (hypre__num_blocks > 1)
                          {
                            if (hypre__dir == 0)
                              {
                                loopi =
                                  hypre__block * hypre__div +
                                  (((hypre__mod) <
                                    (hypre__block)) ? (hypre__mod)
                                   : (hypre__block));
                                hypre__nx =
                                  hypre__div +
                                  ((hypre__mod > hypre__block) ? 1 : 0);
                              }
                            else if (hypre__dir == 1)
                              {
                                loopj =
                                  hypre__block * hypre__div +
                                  (((hypre__mod) <
                                    (hypre__block)) ? (hypre__mod)
                                   : (hypre__block));
                                hypre__ny =
                                  hypre__div +
                                  ((hypre__mod > hypre__block) ? 1 : 0);
                              }
                            else if (hypre__dir == 2)
                              {
                                loopk =
                                  hypre__block * hypre__div +
                                  (((hypre__mod) <
                                    (hypre__block)) ? (hypre__mod)
                                   : (hypre__block));
                                hypre__nz =
                                  hypre__div +
                                  ((hypre__mod > hypre__block) ? 1 : 0);
                              }
                          };
                        Ai =
                          hypre__i1start + loopi * hypre__sx1 +
                          loopj * hypre__sy1 + loopk * hypre__sz1;
                        xi =
                          hypre__i2start + loopi * hypre__sx2 +
                          loopj * hypre__sy2 + loopk * hypre__sz2;
                        ri =
                          hypre__i3start + loopi * hypre__sx3 +
                          loopj * hypre__sy3 + loopk * hypre__sz3;

                          //begin of the loop
#if 0                          //
                        for (loopk = 0; loopk < hypre__nz; loopk++)
                          {
                            for (loopj = 0; loopj < hypre__ny; loopj++)
                              {
                                for (loopi = 0; loopi < hypre__nx; loopi++)
                                  {
                                    {
                                      rp[ri] -= Ap[Ai] * xp[xi];
                                    }
                                    Ai += hypre__sx1;
                                    xi += hypre__sx2;
                                    ri += hypre__sx3;
                                  }
                                Ai += hypre__sy1 - hypre__nx * hypre__sx1;
                                xi += hypre__sy2 - hypre__nx * hypre__sx2;
                                ri += hypre__sy3 - hypre__nx * hypre__sx3;
                              }
                            Ai += hypre__sz1 - hypre__ny * hypre__sy1;
                            xi += hypre__sz2 - hypre__ny * hypre__sy2;
                            ri += hypre__sz3 - hypre__ny * hypre__sy3;
                          } // end of the loop
#else

#if BLCR_CHECKPOINTING
                        // Only checkpoint it at the first occurrance.
                        if (g_checkpoint_flag == 0)
                        {
                          int err;
                          cr_checkpoint_args_t cr_args;  
                          cr_checkpoint_handle_t cr_handle;
                          cr_initialize_checkpoint_args_t(&cr_args);
                          cr_args.cr_scope = CR_SCOPE_PROC;// a process
                          cr_args.cr_target = 0; //self
                          cr_args.cr_signal = SIGKILL; // kill after checkpointing
                          cr_args.cr_fd = open("dump.yy", O_WRONLY|O_CREAT|O_LARGEFILE, 0400);
                          if (cr_args.cr_fd < 0) {
                              printf("Error: cannot open file for checkpoiting context\n");
                              abort();
                          }

                          g_checkpoint_flag ++;
                          printf("Checkpoiting: starting here ..\n");

                          err = cr_request_checkpoint(&cr_args, &cr_handle);
                          if (err < 0) {
                            printf("cannot request checkpoining! err=%d\n",err);
                            abort();
                          }
                          // block until the request is served
                          cr_enter_cs(cr);
                          cr_leave_cs(cr);

                          printf("Checkpoiting: restarting here ..\n");
                        }
#endif

                           OUT__1__6755__(&Ai,&xi,&ri,&Ap,&xp,&rp,&loopi,&loopj,&loopk,&hypre__sx1,&hypre__sy1,&hypre__sz1,&hypre__sx2,&hypre__sy2,&hypre__sz2,&hypre__sx3,&hypre__sy3,&hypre__sz3,&hypre__nx,&hypre__ny,&hypre__nz);

#endif
                      }
                  };
Exemplo n.º 18
0
HYPRE_Int
hypre_SemiInterp( void               *interp_vdata,
                  hypre_StructMatrix *P,
                  hypre_StructVector *xc,
                  hypre_StructVector *e            )
{
   hypre_SemiInterpData   *interp_data = interp_vdata;

   HYPRE_Int               P_stored_as_transpose;
   hypre_ComputePkg       *compute_pkg;
   hypre_IndexRef          cindex;
   hypre_IndexRef          findex;
   hypre_IndexRef          stride;

   hypre_StructGrid       *fgrid;
   HYPRE_Int              *fgrid_ids;
   hypre_StructGrid       *cgrid;
   hypre_BoxArray         *cgrid_boxes;
   HYPRE_Int              *cgrid_ids;

   hypre_CommHandle       *comm_handle;
                       
   hypre_BoxArrayArray    *compute_box_aa;
   hypre_BoxArray         *compute_box_a;
   hypre_Box              *compute_box;
                       
   hypre_Box              *P_dbox;
   hypre_Box              *xc_dbox;
   hypre_Box              *e_dbox;
                       
   HYPRE_Int               Pi;
   HYPRE_Int               xci;
   HYPRE_Int               ei;
   HYPRE_Int               constant_coefficient;
                         
   HYPRE_Real             *Pp0, *Pp1;
   HYPRE_Real             *xcp;
   HYPRE_Real             *ep, *ep0, *ep1;
                       
   hypre_Index             loop_size;
   hypre_Index             start;
   hypre_Index             startc;
   hypre_Index             stridec;
                       
   hypre_StructStencil    *stencil;
   hypre_Index            *stencil_shape;

   HYPRE_Int               compute_i, fi, ci, j;

   /*-----------------------------------------------------------------------
    * Initialize some things
    *-----------------------------------------------------------------------*/

   hypre_BeginTiming(interp_data -> time_index);

   P_stored_as_transpose = (interp_data -> P_stored_as_transpose);
   compute_pkg   = (interp_data -> compute_pkg);
   cindex        = (interp_data -> cindex);
   findex        = (interp_data -> findex);
   stride        = (interp_data -> stride);

   stencil       = hypre_StructMatrixStencil(P);
   stencil_shape = hypre_StructStencilShape(stencil);
   constant_coefficient = hypre_StructMatrixConstantCoefficient(P);
   hypre_assert( constant_coefficient==0 || constant_coefficient==1 );
   /* ... constant_coefficient==2 for P shouldn't happen, see
      hypre_PFMGCreateInterpOp in pfmg_setup_interp.c */

   if (constant_coefficient) hypre_StructVectorClearBoundGhostValues(e, 0);

   hypre_SetIndex3(stridec, 1, 1, 1);

   /*-----------------------------------------------------------------------
    * Compute e at coarse points (injection)
    *-----------------------------------------------------------------------*/

   fgrid = hypre_StructVectorGrid(e);
   fgrid_ids = hypre_StructGridIDs(fgrid);
   cgrid = hypre_StructVectorGrid(xc);
   cgrid_boxes = hypre_StructGridBoxes(cgrid);
   cgrid_ids = hypre_StructGridIDs(cgrid);

   fi = 0;
   hypre_ForBoxI(ci, cgrid_boxes)
   {
      while (fgrid_ids[fi] != cgrid_ids[ci])
      {
         fi++;
      }

      compute_box = hypre_BoxArrayBox(cgrid_boxes, ci);

      hypre_CopyIndex(hypre_BoxIMin(compute_box), startc);
      hypre_StructMapCoarseToFine(startc, cindex, stride, start);

      e_dbox  = hypre_BoxArrayBox(hypre_StructVectorDataSpace(e), fi);
      xc_dbox = hypre_BoxArrayBox(hypre_StructVectorDataSpace(xc), ci);

      ep  = hypre_StructVectorBoxData(e, fi);
      xcp = hypre_StructVectorBoxData(xc, ci);

      hypre_BoxGetSize(compute_box, loop_size);

      hypre_BoxLoop2Begin(hypre_StructMatrixNDim(P), loop_size,
                          e_dbox, start, stride, ei,
                          xc_dbox, startc, stridec, xci);
#ifdef HYPRE_USING_OPENMP
#pragma omp parallel for private(HYPRE_BOX_PRIVATE,ei,xci) HYPRE_SMP_SCHEDULE
#endif
      hypre_BoxLoop2For(ei, xci)
      {
         ep[ei] = xcp[xci];
      }
      hypre_BoxLoop2End(ei, xci);
   }

   /*-----------------------------------------------------------------------
    * Compute e at fine points
    *-----------------------------------------------------------------------*/

   for (compute_i = 0; compute_i < 2; compute_i++)
   {
      switch(compute_i)
      {
         case 0:
         {
            ep = hypre_StructVectorData(e);
            hypre_InitializeIndtComputations(compute_pkg, ep, &comm_handle);
            compute_box_aa = hypre_ComputePkgIndtBoxes(compute_pkg);
         }
         break;

         case 1:
         {
            hypre_FinalizeIndtComputations(comm_handle);
            compute_box_aa = hypre_ComputePkgDeptBoxes(compute_pkg);
         }
         break;
      }

      hypre_ForBoxArrayI(fi, compute_box_aa)
      {
         compute_box_a = hypre_BoxArrayArrayBoxArray(compute_box_aa, fi);

         P_dbox = hypre_BoxArrayBox(hypre_StructMatrixDataSpace(P), fi);
         e_dbox = hypre_BoxArrayBox(hypre_StructVectorDataSpace(e), fi);

         if (P_stored_as_transpose)
         {
            if ( constant_coefficient )
            {
               Pp0 = hypre_StructMatrixBoxData(P, fi, 1);
               Pp1 = hypre_StructMatrixBoxData(P, fi, 0) -
                  hypre_CCBoxOffsetDistance(P_dbox, stencil_shape[0]);
            }
            else
            {
               Pp0 = hypre_StructMatrixBoxData(P, fi, 1);
               Pp1 = hypre_StructMatrixBoxData(P, fi, 0) -
                  hypre_BoxOffsetDistance(P_dbox, stencil_shape[0]);
            }
         }
         else
         {
            Pp0 = hypre_StructMatrixBoxData(P, fi, 0);
            Pp1 = hypre_StructMatrixBoxData(P, fi, 1);
         }
         ep  = hypre_StructVectorBoxData(e, fi);
         ep0 = ep + hypre_BoxOffsetDistance(e_dbox, stencil_shape[0]);
         ep1 = ep + hypre_BoxOffsetDistance(e_dbox, stencil_shape[1]);

         hypre_ForBoxI(j, compute_box_a)
         {
            compute_box = hypre_BoxArrayBox(compute_box_a, j);

            hypre_CopyIndex(hypre_BoxIMin(compute_box), start);
            hypre_StructMapFineToCoarse(start, findex, stride, startc);

            hypre_BoxGetStrideSize(compute_box, stride, loop_size);

            if ( constant_coefficient )
            {
               Pi = hypre_CCBoxIndexRank( P_dbox, startc );
               hypre_BoxLoop1Begin(hypre_StructMatrixNDim(P), loop_size,
                                   e_dbox, start, stride, ei);
#ifdef HYPRE_USING_OPENMP
#pragma omp parallel for private(HYPRE_BOX_PRIVATE,ei) HYPRE_SMP_SCHEDULE
#endif
               hypre_BoxLoop1For(ei)
               {
                  ep[ei] =  (Pp0[Pi] * ep0[ei] +
                             Pp1[Pi] * ep1[ei]);
               }
               hypre_BoxLoop1End(ei);
            }
            else
            {
               hypre_BoxLoop2Begin(hypre_StructMatrixNDim(P), loop_size,
                                   P_dbox, startc, stridec, Pi,
                                   e_dbox, start, stride, ei);
#ifdef HYPRE_USING_OPENMP
#pragma omp parallel for private(HYPRE_BOX_PRIVATE,Pi,ei) HYPRE_SMP_SCHEDULE
#endif
               hypre_BoxLoop2For(Pi, ei)
               {
                  ep[ei] =  (Pp0[Pi] * ep0[ei] +
                             Pp1[Pi] * ep1[ei]);
               }
               hypre_BoxLoop2End(Pi, ei);
            }
         }
Exemplo n.º 19
0
HYPRE_Int
hypre_SparseMSGSolve( void               *smsg_vdata,
                      hypre_StructMatrix *A,
                      hypre_StructVector *b,
                      hypre_StructVector *x          )
{
   hypre_SparseMSGData  *smsg_data = smsg_vdata;

   HYPRE_Real            tol                 = (smsg_data -> tol);
   HYPRE_Int             max_iter            = (smsg_data -> max_iter);
   HYPRE_Int             rel_change          = (smsg_data -> rel_change);
   HYPRE_Int             zero_guess          = (smsg_data -> zero_guess);
   HYPRE_Int             jump                = (smsg_data -> jump);
   HYPRE_Int             num_pre_relax       = (smsg_data -> num_pre_relax);
   HYPRE_Int             num_post_relax      = (smsg_data -> num_post_relax);
   HYPRE_Int             num_fine_relax      = (smsg_data -> num_fine_relax);
   HYPRE_Int            *num_grids           = (smsg_data -> num_grids);
   HYPRE_Int             num_all_grids       = (smsg_data -> num_all_grids);
   HYPRE_Int             num_levels          = (smsg_data -> num_levels);
   hypre_StructMatrix  **A_array             = (smsg_data -> A_array);
   hypre_StructMatrix  **Px_array            = (smsg_data -> Px_array);
   hypre_StructMatrix  **Py_array            = (smsg_data -> Py_array);
   hypre_StructMatrix  **Pz_array            = (smsg_data -> Pz_array);
   hypre_StructMatrix  **RTx_array           = (smsg_data -> RTx_array);
   hypre_StructMatrix  **RTy_array           = (smsg_data -> RTy_array);
   hypre_StructMatrix  **RTz_array           = (smsg_data -> RTz_array);
   hypre_StructVector  **b_array             = (smsg_data -> b_array);
   hypre_StructVector  **x_array             = (smsg_data -> x_array);
   hypre_StructVector  **t_array             = (smsg_data -> t_array);
   hypre_StructVector  **r_array             = (smsg_data -> r_array);
   hypre_StructVector  **e_array             = (smsg_data -> e_array);
   hypre_StructVector  **visitx_array        = (smsg_data -> visitx_array);
   hypre_StructVector  **visity_array        = (smsg_data -> visity_array);
   hypre_StructVector  **visitz_array        = (smsg_data -> visitz_array);
   HYPRE_Int            *grid_on             = (smsg_data -> grid_on);
   void                **relax_array         = (smsg_data -> relax_array);
   void                **matvec_array        = (smsg_data -> matvec_array);
   void                **restrictx_array     = (smsg_data -> restrictx_array);
   void                **restricty_array     = (smsg_data -> restricty_array);
   void                **restrictz_array     = (smsg_data -> restrictz_array);
   void                **interpx_array       = (smsg_data -> interpx_array);
   void                **interpy_array       = (smsg_data -> interpy_array);
   void                **interpz_array       = (smsg_data -> interpz_array);
   HYPRE_Int             logging             = (smsg_data -> logging);
   HYPRE_Real           *norms               = (smsg_data -> norms);
   HYPRE_Real           *rel_norms           = (smsg_data -> rel_norms);

   HYPRE_Int            *restrict_count;

   HYPRE_Real            b_dot_b, r_dot_r, eps;
   HYPRE_Real            e_dot_e, x_dot_x;
                    
   HYPRE_Int             i, l, lx, ly, lz;
   HYPRE_Int             lymin, lymax, lzmin, lzmax;
   HYPRE_Int             fi, ci;                              
   HYPRE_Int             ierr = 0;

#if DEBUG
   char                  filename[255];
#endif

   /*-----------------------------------------------------
    * Initialize some things and deal with special cases
    *-----------------------------------------------------*/

   hypre_BeginTiming(smsg_data -> time_index);

   hypre_StructMatrixDestroy(A_array[0]);
   hypre_StructVectorDestroy(b_array[0]);
   hypre_StructVectorDestroy(x_array[0]);
   A_array[0] = hypre_StructMatrixRef(A);
   b_array[0] = hypre_StructVectorRef(b);
   x_array[0] = hypre_StructVectorRef(x);

   (smsg_data -> num_iterations) = 0;

   /* if max_iter is zero, return */
   if (max_iter == 0)
   {
      /* if using a zero initial guess, return zero */
      if (zero_guess)
      {
         hypre_StructVectorSetConstantValues(x, 0.0);
      }

      hypre_EndTiming(smsg_data -> time_index);
      return ierr;
   }

   /* part of convergence check */
   if (tol > 0.0)
   {
      /* eps = (tol^2) */
      b_dot_b = hypre_StructInnerProd(b_array[0], b_array[0]);
      eps = tol*tol;

      /* if rhs is zero, return a zero solution */
      if (b_dot_b == 0.0)
      {
         hypre_StructVectorSetConstantValues(x, 0.0);
         if (logging > 0)
         {
            norms[0]     = 0.0;
            rel_norms[0] = 0.0;
         }

         hypre_EndTiming(smsg_data -> time_index);
         return ierr;
      }
   }

   restrict_count = hypre_TAlloc(HYPRE_Int, num_all_grids);

   /*-----------------------------------------------------
    * Do V-cycles:
    *   For each index l, "fine" = l, "coarse" = (l+1)
    *-----------------------------------------------------*/

   for (i = 0; i < max_iter; i++)
   {
      /*--------------------------------------------------
       * Down cycle:
       *   Note that r = b = x through the jump region
       *--------------------------------------------------*/

      /* fine grid pre-relaxation */
      hypre_PFMGRelaxSetPreRelax(relax_array[0]);
      hypre_PFMGRelaxSetMaxIter(relax_array[0], num_fine_relax);
      hypre_PFMGRelaxSetZeroGuess(relax_array[0], zero_guess);
      hypre_PFMGRelax(relax_array[0], A_array[0], b_array[0], x_array[0]);
      zero_guess = 0;

      /* compute fine grid residual (b - Ax) */
      hypre_StructCopy(b_array[0], r_array[0]);
      hypre_StructMatvecCompute(matvec_array[0],
                                -1.0, A_array[0], x_array[0], 1.0, r_array[0]);

      /* convergence check */
      if (tol > 0.0)
      {
         r_dot_r = hypre_StructInnerProd(r_array[0], r_array[0]);

         if (logging > 0)
         {
            norms[i] = sqrt(r_dot_r);
            if (b_dot_b > 0)
               rel_norms[i] = sqrt(r_dot_r/b_dot_b);
            else
               rel_norms[i] = 0.0;
         }
/* RDF */
#if 0

hypre_printf("iter = %d, rel_norm = %e\n", i, rel_norms[i]);

#endif

         /* always do at least 1 V-cycle */
         if ((r_dot_r/b_dot_b < eps) && (i > 0))
         {
            if (rel_change)
            {
               if ((e_dot_e/x_dot_x) < eps)
                  break;
            }
            else
            {
               break;
            }
         }
      }

      if (num_levels > 1)
      {
         /* initialize restrict_count */
         for (fi = 0; fi < num_all_grids; fi++)
         {
            restrict_count[fi] = 0;
         }

         for (l = 0; l <= (num_levels - 2); l++)
         {
            lzmin = hypre_max((l - num_grids[1] - num_grids[0] + 2), 0);
            lzmax = hypre_min((l), (num_grids[2] - 1));
            for (lz = lzmin; lz <= lzmax; lz++)
            {
               lymin = hypre_max((l - lz - num_grids[0] + 1), 0);
               lymax = hypre_min((l - lz), (num_grids[1] - 1));
               for (ly = lymin; ly <= lymax; ly++)
               {
                  lx = l - lz - ly;

                  hypre_SparseMSGMapIndex(lx, ly, lz, num_grids, fi);

                  if (!grid_on[fi])
                  {
                     break;
                  }

                  if (restrict_count[fi] > 1)
                  {
                     hypre_StructScale((1.0/restrict_count[fi]), b_array[fi]);
                  }

                  if (l > jump)
                  {
                     /* pre-relaxation */
                     hypre_PFMGRelaxSetPreRelax(relax_array[fi]);
                     hypre_PFMGRelaxSetMaxIter(relax_array[fi], num_pre_relax);
                     hypre_PFMGRelaxSetZeroGuess(relax_array[fi], 1);
                     hypre_PFMGRelax(relax_array[fi], A_array[fi], b_array[fi],
                                     x_array[fi]);

                     /* compute residual (b - Ax) */
                     hypre_StructCopy(b_array[fi], r_array[fi]);
                     hypre_StructMatvecCompute(matvec_array[fi],
                                               -1.0, A_array[fi], x_array[fi],
                                               1.0, r_array[fi]);
                  }
                        
                  if ((lx+1) < num_grids[0])
                  {
                     /* restrict to ((lx+1), ly, lz) */
                     hypre_SparseMSGMapIndex((lx+1), ly, lz, num_grids, ci);
                     if (grid_on[ci])
                     {
                        if (restrict_count[ci])
                        {
                           hypre_SparseMSGRestrict(restrictx_array[fi],
                                                   RTx_array[lx], r_array[fi],
                                                   t_array[ci]);
                           hypre_StructAxpy(1.0, t_array[ci], b_array[ci]);
                        }
                        else
                        {
                           hypre_SparseMSGRestrict(restrictx_array[fi],
                                                   RTx_array[lx], r_array[fi],
                                                   b_array[ci]);
                        }
                        restrict_count[ci]++;
                     }
                  }
                  if ((ly+1) < num_grids[1])
                  {
                     /* restrict to (lx, (ly+1), lz) */
                     hypre_SparseMSGMapIndex(lx, (ly+1), lz, num_grids, ci);
                     if (grid_on[ci])
                     {
                        if (restrict_count[ci])
                        {
                           hypre_SparseMSGRestrict(restricty_array[fi],
                                                   RTy_array[ly], r_array[fi],
                                                   t_array[ci]);
                           hypre_StructAxpy(1.0, t_array[ci], b_array[ci]);
                        }
                        else
                        {
                           hypre_SparseMSGRestrict(restricty_array[fi],
                                                   RTy_array[ly], r_array[fi],
                                                   b_array[ci]);
                        }
                        restrict_count[ci]++;
                     }
                  }
                  if ((lz+1) < num_grids[2])
                  {
                     /* restrict to (lx, ly, (lz+1)) */
                     hypre_SparseMSGMapIndex(lx, ly, (lz+1), num_grids, ci);
                     if (grid_on[ci])
                     {
                        if (restrict_count[ci])
                        {
                           hypre_SparseMSGRestrict(restrictz_array[fi],
                                                   RTz_array[lz], r_array[fi],
                                                   t_array[ci]);
                           hypre_StructAxpy(1.0, t_array[ci], b_array[ci]);
                        }
                        else
                        {
                           hypre_SparseMSGRestrict(restrictz_array[fi],
                                                   RTz_array[lz], r_array[fi],
                                                   b_array[ci]);
                        }
                        restrict_count[ci]++;
                     }
                  }
#if DEBUG
                  hypre_sprintf(filename, "zoutSMSG_bdown.%d.%d.%d", lx, ly, lz);
                  hypre_StructVectorPrint(filename, b_array[fi], 0);
                  hypre_sprintf(filename, "zoutSMSG_xdown.%d.%d.%d", lx, ly, lz);
                  hypre_StructVectorPrint(filename, x_array[fi], 0);
                  hypre_sprintf(filename, "zoutSMSG_rdown.%d.%d.%d", lx, ly, lz);
                  hypre_StructVectorPrint(filename, r_array[fi], 0);
#endif
               }
            }
         }

         /*--------------------------------------------------
          * Bottom
          *--------------------------------------------------*/
      
         fi = num_all_grids - 1;

         if (restrict_count[fi] > 1)
         {
            hypre_StructScale((1.0/restrict_count[fi]), b_array[fi]);
         }

         hypre_PFMGRelaxSetZeroGuess(relax_array[fi], 1);
         hypre_PFMGRelax(relax_array[fi], A_array[fi], b_array[fi],
                         x_array[fi]);

#if DEBUG
         hypre_sprintf(filename, "zoutSMSG_bbottom.%d.%d.%d", lx, ly, lz);
         hypre_StructVectorPrint(filename, b_array[fi], 0);
         hypre_sprintf(filename, "zoutSMSG_xbottom.%d.%d.%d", lx, ly, lz);
         hypre_StructVectorPrint(filename, x_array[fi], 0);
#endif

         /*--------------------------------------------------
          * Up cycle
          *   Note that r = b = x through the jump region
          *--------------------------------------------------*/

         for (l = (num_levels - 2); l >= 0; l--)
         {
            lzmin = hypre_max((l - num_grids[1] - num_grids[0] + 2), 0);
            lzmax = hypre_min((l), (num_grids[2] - 1));
            for (lz = lzmax; lz >= lzmin; lz--)
            {
               lymin = hypre_max((l - lz - num_grids[0] + 1), 0);
               lymax = hypre_min((l - lz), (num_grids[1] - 1));
               for (ly = lymax; ly >= lymin; ly--)
               {
                  lx = l - lz - ly;

                  hypre_SparseMSGMapIndex(lx, ly, lz, num_grids, fi);
                     
                  if (!grid_on[fi])
                  {
                     break;
                  }

                  if ((l >= 1) && (l <= jump))
                  {
                     hypre_StructVectorSetConstantValues(x_array[fi], 0.0);
                  }
                  if ((lx+1) < num_grids[0])
                  {
                     /* interpolate from ((lx+1), ly, lz) */
                     hypre_SparseMSGMapIndex((lx+1), ly, lz, num_grids, ci);
                     if (grid_on[ci])
                     {
                        hypre_SparseMSGInterp(interpx_array[fi],
                                              Px_array[lx], x_array[ci],
                                              e_array[fi]);
                        hypre_SparseMSGFilter(visitx_array[fi], e_array[fi],
                                              lx, ly, lz, jump);
                        hypre_StructAxpy(1.0, e_array[fi], x_array[fi]);
                     }
                  }
                  if ((ly+1) < num_grids[1])
                  {
                     /* interpolate from (lx, (ly+1), lz) */
                     hypre_SparseMSGMapIndex(lx, (ly+1), lz, num_grids, ci);
                     if (grid_on[ci])
                     {
                        hypre_SparseMSGInterp(interpy_array[fi],
                                              Py_array[ly], x_array[ci],
                                              e_array[fi]);
                        hypre_SparseMSGFilter(visity_array[fi], e_array[fi],
                                              lx, ly, lz, jump);
                        hypre_StructAxpy(1.0, e_array[fi], x_array[fi]);
                     }
                  }
                  if ((lz+1) < num_grids[2])
                  {
                     /* interpolate from (lx, ly, (lz+1)) */
                     hypre_SparseMSGMapIndex(lx, ly, (lz+1), num_grids, ci);
                     if (grid_on[ci])
                     {
                        hypre_SparseMSGInterp(interpz_array[fi],
                                              Pz_array[lz], x_array[ci],
                                              e_array[fi]);
                        hypre_SparseMSGFilter(visitz_array[fi], e_array[fi],
                                              lx, ly, lz, jump);
                        hypre_StructAxpy(1.0, e_array[fi], x_array[fi]);
                     }
                  }               
#if DEBUG
                  hypre_sprintf(filename, "zoutSMSG_xup.%d.%d.%d", lx, ly, lz);
                  hypre_StructVectorPrint(filename, x_array[fi], 0);
#endif
                  if (l > jump)
                  {
                     /* post-relaxation */
                     hypre_PFMGRelaxSetPostRelax(relax_array[fi]);
                     hypre_PFMGRelaxSetMaxIter(relax_array[fi],
                                               num_post_relax);
                     hypre_PFMGRelaxSetZeroGuess(relax_array[fi], 0);
                     hypre_PFMGRelax(relax_array[fi], A_array[fi], b_array[fi],
                                     x_array[fi]);
                  }
               }
            }
         }
      }

      /* part of convergence check */
      if ((tol > 0.0) && (rel_change))
      {
         if (num_levels > 1)
         {
            e_dot_e = hypre_StructInnerProd(e_array[0], e_array[0]);
            x_dot_x = hypre_StructInnerProd(x_array[0], x_array[0]);
         }
         else
         {
            e_dot_e = 0.0;
            x_dot_x = 1.0;
         }
      }

      /* fine grid post-relaxation */
      hypre_PFMGRelaxSetPostRelax(relax_array[0]);
      hypre_PFMGRelaxSetMaxIter(relax_array[0], num_fine_relax);
      hypre_PFMGRelaxSetZeroGuess(relax_array[0], 0);
      hypre_PFMGRelax(relax_array[0], A_array[0], b_array[0], x_array[0]);

      (smsg_data -> num_iterations) = (i + 1);
   }

   hypre_EndTiming(smsg_data -> time_index);

   return ierr;
}