Ejemplo n.º 1
0
int
hypre_SMGAxpy( double              alpha,
               hypre_StructVector *x,
               hypre_StructVector *y,
               hypre_Index         base_index,
               hypre_Index         base_stride )
{
   int ierr = 0;

   hypre_Box        *x_data_box;
   hypre_Box        *y_data_box;
                 
   int               xi;
   int               yi;
                    
   double           *xp;
   double           *yp;
                    
   hypre_BoxArray   *boxes;
   hypre_Box        *box;
   hypre_Index       loop_size;
   hypre_IndexRef    start;
                    
   int               i;
   int               loopi, loopj, loopk;

   box = hypre_BoxCreate();
   boxes = hypre_StructGridBoxes(hypre_StructVectorGrid(y));
   hypre_ForBoxI(i, boxes)
      {
         hypre_CopyBox(hypre_BoxArrayBox(boxes, i), box);
         hypre_ProjectBox(box, base_index, base_stride);
         start = hypre_BoxIMin(box);

         x_data_box = hypre_BoxArrayBox(hypre_StructVectorDataSpace(x), i);
         y_data_box = hypre_BoxArrayBox(hypre_StructVectorDataSpace(y), i);

         xp = hypre_StructVectorBoxData(x, i);
         yp = hypre_StructVectorBoxData(y, i);

         hypre_BoxGetStrideSize(box, base_stride, loop_size);
         hypre_BoxLoop2Begin(loop_size,
                             x_data_box, start, base_stride, xi,
                             y_data_box, start, base_stride, yi);
#define HYPRE_BOX_SMP_PRIVATE loopk,loopi,loopj,xi,yi
#include "hypre_box_smp_forloop.h"
	 hypre_BoxLoop2For(loopi, loopj, loopk, xi, yi)
            {
               yp[yi] += alpha * xp[xi];
            }
Ejemplo n.º 2
0
int
hypre_SMGSetStructVectorConstantValues( hypre_StructVector *vector,
                                        double              values,
                                        hypre_BoxArray     *box_array,
                                        hypre_Index         stride    )
{
   int    ierr = 0;

   hypre_Box          *v_data_box;

   int                 vi;
   double             *vp;

   hypre_Box          *box;
   hypre_Index         loop_size;
   hypre_IndexRef      start;

   int                 loopi, loopj, loopk;
   int                 i;

   /*-----------------------------------------------------------------------
    * Set the vector coefficients
    *-----------------------------------------------------------------------*/

   hypre_ForBoxI(i, box_array)
      {
         box   = hypre_BoxArrayBox(box_array, i);
         start = hypre_BoxIMin(box);

         v_data_box =
            hypre_BoxArrayBox(hypre_StructVectorDataSpace(vector), i);
         vp = hypre_StructVectorBoxData(vector, i);

         hypre_BoxGetStrideSize(box, stride, loop_size);

         hypre_BoxLoop1Begin(loop_size,
                             v_data_box, start, stride, vi);
#define HYPRE_BOX_SMP_PRIVATE loopk,loopi,loopj,vi
#include "hypre_box_smp_forloop.h"
         hypre_BoxLoop1For(loopi, loopj, loopk, vi)
            {
               vp[vi] = values;
            }
         hypre_BoxLoop1End(vi);
      }
Ejemplo n.º 3
0
HYPRE_Int
hypre_SemiInterp( void               *interp_vdata,
                  hypre_StructMatrix *P,
                  hypre_StructVector *xc,
                  hypre_StructVector *e            )
{
   hypre_SemiInterpData   *interp_data = interp_vdata;

   HYPRE_Int               P_stored_as_transpose;
   hypre_ComputePkg       *compute_pkg;
   hypre_IndexRef          cindex;
   hypre_IndexRef          findex;
   hypre_IndexRef          stride;

   hypre_StructGrid       *fgrid;
   HYPRE_Int              *fgrid_ids;
   hypre_StructGrid       *cgrid;
   hypre_BoxArray         *cgrid_boxes;
   HYPRE_Int              *cgrid_ids;

   hypre_CommHandle       *comm_handle;
                       
   hypre_BoxArrayArray    *compute_box_aa;
   hypre_BoxArray         *compute_box_a;
   hypre_Box              *compute_box;
                       
   hypre_Box              *P_dbox;
   hypre_Box              *xc_dbox;
   hypre_Box              *e_dbox;
                       
   HYPRE_Int               Pi;
   HYPRE_Int               xci;
   HYPRE_Int               ei;
   HYPRE_Int               constant_coefficient;
                         
   HYPRE_Real             *Pp0, *Pp1;
   HYPRE_Real             *xcp;
   HYPRE_Real             *ep, *ep0, *ep1;
                       
   hypre_Index             loop_size;
   hypre_Index             start;
   hypre_Index             startc;
   hypre_Index             stridec;
                       
   hypre_StructStencil    *stencil;
   hypre_Index            *stencil_shape;

   HYPRE_Int               compute_i, fi, ci, j;

   /*-----------------------------------------------------------------------
    * Initialize some things
    *-----------------------------------------------------------------------*/

   hypre_BeginTiming(interp_data -> time_index);

   P_stored_as_transpose = (interp_data -> P_stored_as_transpose);
   compute_pkg   = (interp_data -> compute_pkg);
   cindex        = (interp_data -> cindex);
   findex        = (interp_data -> findex);
   stride        = (interp_data -> stride);

   stencil       = hypre_StructMatrixStencil(P);
   stencil_shape = hypre_StructStencilShape(stencil);
   constant_coefficient = hypre_StructMatrixConstantCoefficient(P);
   hypre_assert( constant_coefficient==0 || constant_coefficient==1 );
   /* ... constant_coefficient==2 for P shouldn't happen, see
      hypre_PFMGCreateInterpOp in pfmg_setup_interp.c */

   if (constant_coefficient) hypre_StructVectorClearBoundGhostValues(e, 0);

   hypre_SetIndex3(stridec, 1, 1, 1);

   /*-----------------------------------------------------------------------
    * Compute e at coarse points (injection)
    *-----------------------------------------------------------------------*/

   fgrid = hypre_StructVectorGrid(e);
   fgrid_ids = hypre_StructGridIDs(fgrid);
   cgrid = hypre_StructVectorGrid(xc);
   cgrid_boxes = hypre_StructGridBoxes(cgrid);
   cgrid_ids = hypre_StructGridIDs(cgrid);

   fi = 0;
   hypre_ForBoxI(ci, cgrid_boxes)
   {
      while (fgrid_ids[fi] != cgrid_ids[ci])
      {
         fi++;
      }

      compute_box = hypre_BoxArrayBox(cgrid_boxes, ci);

      hypre_CopyIndex(hypre_BoxIMin(compute_box), startc);
      hypre_StructMapCoarseToFine(startc, cindex, stride, start);

      e_dbox  = hypre_BoxArrayBox(hypre_StructVectorDataSpace(e), fi);
      xc_dbox = hypre_BoxArrayBox(hypre_StructVectorDataSpace(xc), ci);

      ep  = hypre_StructVectorBoxData(e, fi);
      xcp = hypre_StructVectorBoxData(xc, ci);

      hypre_BoxGetSize(compute_box, loop_size);

      hypre_BoxLoop2Begin(hypre_StructMatrixNDim(P), loop_size,
                          e_dbox, start, stride, ei,
                          xc_dbox, startc, stridec, xci);
#ifdef HYPRE_USING_OPENMP
#pragma omp parallel for private(HYPRE_BOX_PRIVATE,ei,xci) HYPRE_SMP_SCHEDULE
#endif
      hypre_BoxLoop2For(ei, xci)
      {
         ep[ei] = xcp[xci];
      }
      hypre_BoxLoop2End(ei, xci);
   }

   /*-----------------------------------------------------------------------
    * Compute e at fine points
    *-----------------------------------------------------------------------*/

   for (compute_i = 0; compute_i < 2; compute_i++)
   {
      switch(compute_i)
      {
         case 0:
         {
            ep = hypre_StructVectorData(e);
            hypre_InitializeIndtComputations(compute_pkg, ep, &comm_handle);
            compute_box_aa = hypre_ComputePkgIndtBoxes(compute_pkg);
         }
         break;

         case 1:
         {
            hypre_FinalizeIndtComputations(comm_handle);
            compute_box_aa = hypre_ComputePkgDeptBoxes(compute_pkg);
         }
         break;
      }

      hypre_ForBoxArrayI(fi, compute_box_aa)
      {
         compute_box_a = hypre_BoxArrayArrayBoxArray(compute_box_aa, fi);

         P_dbox = hypre_BoxArrayBox(hypre_StructMatrixDataSpace(P), fi);
         e_dbox = hypre_BoxArrayBox(hypre_StructVectorDataSpace(e), fi);

         if (P_stored_as_transpose)
         {
            if ( constant_coefficient )
            {
               Pp0 = hypre_StructMatrixBoxData(P, fi, 1);
               Pp1 = hypre_StructMatrixBoxData(P, fi, 0) -
                  hypre_CCBoxOffsetDistance(P_dbox, stencil_shape[0]);
            }
            else
            {
               Pp0 = hypre_StructMatrixBoxData(P, fi, 1);
               Pp1 = hypre_StructMatrixBoxData(P, fi, 0) -
                  hypre_BoxOffsetDistance(P_dbox, stencil_shape[0]);
            }
         }
         else
         {
            Pp0 = hypre_StructMatrixBoxData(P, fi, 0);
            Pp1 = hypre_StructMatrixBoxData(P, fi, 1);
         }
         ep  = hypre_StructVectorBoxData(e, fi);
         ep0 = ep + hypre_BoxOffsetDistance(e_dbox, stencil_shape[0]);
         ep1 = ep + hypre_BoxOffsetDistance(e_dbox, stencil_shape[1]);

         hypre_ForBoxI(j, compute_box_a)
         {
            compute_box = hypre_BoxArrayBox(compute_box_a, j);

            hypre_CopyIndex(hypre_BoxIMin(compute_box), start);
            hypre_StructMapFineToCoarse(start, findex, stride, startc);

            hypre_BoxGetStrideSize(compute_box, stride, loop_size);

            if ( constant_coefficient )
            {
               Pi = hypre_CCBoxIndexRank( P_dbox, startc );
               hypre_BoxLoop1Begin(hypre_StructMatrixNDim(P), loop_size,
                                   e_dbox, start, stride, ei);
#ifdef HYPRE_USING_OPENMP
#pragma omp parallel for private(HYPRE_BOX_PRIVATE,ei) HYPRE_SMP_SCHEDULE
#endif
               hypre_BoxLoop1For(ei)
               {
                  ep[ei] =  (Pp0[Pi] * ep0[ei] +
                             Pp1[Pi] * ep1[ei]);
               }
               hypre_BoxLoop1End(ei);
            }
            else
            {
               hypre_BoxLoop2Begin(hypre_StructMatrixNDim(P), loop_size,
                                   P_dbox, startc, stridec, Pi,
                                   e_dbox, start, stride, ei);
#ifdef HYPRE_USING_OPENMP
#pragma omp parallel for private(HYPRE_BOX_PRIVATE,Pi,ei) HYPRE_SMP_SCHEDULE
#endif
               hypre_BoxLoop2For(Pi, ei)
               {
                  ep[ei] =  (Pp0[Pi] * ep0[ei] +
                             Pp1[Pi] * ep1[ei]);
               }
               hypre_BoxLoop2End(Pi, ei);
            }
         }
Ejemplo n.º 4
0
HYPRE_Int
hypre_SparseMSGInterp( void               *interp_vdata,
                       hypre_StructMatrix *P,
                       hypre_StructVector *xc,
                       hypre_StructVector *e            )
{
   HYPRE_Int ierr = 0;

   hypre_SparseMSGInterpData   *interp_data = interp_vdata;

   hypre_ComputePkg       *compute_pkg;
   hypre_IndexRef          cindex;
   hypre_IndexRef          findex;
   hypre_IndexRef          stride;
   hypre_IndexRef          strideP;

   hypre_StructGrid       *fgrid;
   HYPRE_Int              *fgrid_ids;
   hypre_StructGrid       *cgrid;
   hypre_BoxArray         *cgrid_boxes;
   HYPRE_Int              *cgrid_ids;

   hypre_CommHandle       *comm_handle;
                       
   hypre_BoxArrayArray    *compute_box_aa;
   hypre_BoxArray         *compute_box_a;
   hypre_Box              *compute_box;
                       
   hypre_Box              *P_dbox;
   hypre_Box              *xc_dbox;
   hypre_Box              *e_dbox;
                       
   HYPRE_Int               Pi;
   HYPRE_Int               xci;
   HYPRE_Int               ei;
                         
   double                 *Pp0, *Pp1;
   double                 *xcp;
   double                 *ep, *ep0, *ep1;
                       
   hypre_Index             loop_size;
   hypre_Index             start;
   hypre_Index             startc;
   hypre_Index             startP;
   hypre_Index             stridec;
                       
   hypre_StructStencil    *stencil;
   hypre_Index            *stencil_shape;

   HYPRE_Int               compute_i, fi, ci, j;
   HYPRE_Int               loopi, loopj, loopk;

   /*-----------------------------------------------------------------------
    * Initialize some things
    *-----------------------------------------------------------------------*/

   hypre_BeginTiming(interp_data -> time_index);

   compute_pkg   = (interp_data -> compute_pkg);
   cindex        = (interp_data -> cindex);
   findex        = (interp_data -> findex);
   stride        = (interp_data -> stride);
   strideP       = (interp_data -> strideP);

   stencil       = hypre_StructMatrixStencil(P);
   stencil_shape = hypre_StructStencilShape(stencil);

   hypre_SetIndex(stridec, 1, 1, 1);

   /*-----------------------------------------------------------------------
    * Compute e at coarse points (injection)
    *-----------------------------------------------------------------------*/

   fgrid = hypre_StructVectorGrid(e);
   fgrid_ids = hypre_StructGridIDs(fgrid);
   cgrid = hypre_StructVectorGrid(xc);
   cgrid_boxes = hypre_StructGridBoxes(cgrid);
   cgrid_ids = hypre_StructGridIDs(cgrid);

   fi = 0;
   hypre_ForBoxI(ci, cgrid_boxes)
      {
         while (fgrid_ids[fi] != cgrid_ids[ci])
         {
            fi++;
         }

         compute_box = hypre_BoxArrayBox(cgrid_boxes, ci);

         hypre_CopyIndex(hypre_BoxIMin(compute_box), startc);
         hypre_StructMapCoarseToFine(startc, cindex, stride, start);

         e_dbox  = hypre_BoxArrayBox(hypre_StructVectorDataSpace(e), fi);
         xc_dbox = hypre_BoxArrayBox(hypre_StructVectorDataSpace(xc), ci);

         ep  = hypre_StructVectorBoxData(e, fi);
         xcp = hypre_StructVectorBoxData(xc, ci);

         hypre_BoxGetSize(compute_box, loop_size);

         hypre_BoxLoop2Begin(loop_size,
                             e_dbox,  start,  stride,  ei,
                             xc_dbox, startc, stridec, xci);
#define HYPRE_BOX_SMP_PRIVATE loopk,loopi,loopj,ei,xci
#include "hypre_box_smp_forloop.h"
         hypre_BoxLoop2For(loopi, loopj, loopk, ei, xci)
            {
               ep[ei] = xcp[xci];
            }
         hypre_BoxLoop2End(ei, xci);
      }

   /*-----------------------------------------------------------------------
    * Compute e at fine points
    *-----------------------------------------------------------------------*/

   for (compute_i = 0; compute_i < 2; compute_i++)
   {
      switch(compute_i)
      {
         case 0:
         {
            ep = hypre_StructVectorData(e);
            hypre_InitializeIndtComputations(compute_pkg, ep, &comm_handle);
            compute_box_aa = hypre_ComputePkgIndtBoxes(compute_pkg);
         }
         break;

         case 1:
         {
            hypre_FinalizeIndtComputations(comm_handle);
            compute_box_aa = hypre_ComputePkgDeptBoxes(compute_pkg);
         }
         break;
      }

      hypre_ForBoxArrayI(fi, compute_box_aa)
         {
            compute_box_a = hypre_BoxArrayArrayBoxArray(compute_box_aa, fi);

            P_dbox = hypre_BoxArrayBox(hypre_StructMatrixDataSpace(P), fi);
            e_dbox = hypre_BoxArrayBox(hypre_StructVectorDataSpace(e), fi);

            Pp0 = hypre_StructMatrixBoxData(P, fi, 0);
            Pp1 = hypre_StructMatrixBoxData(P, fi, 1);
            ep  = hypre_StructVectorBoxData(e, fi);
            ep0 = ep + hypre_BoxOffsetDistance(e_dbox, stencil_shape[0]);
            ep1 = ep + hypre_BoxOffsetDistance(e_dbox, stencil_shape[1]);

            hypre_ForBoxI(j, compute_box_a)
               {
                  compute_box = hypre_BoxArrayBox(compute_box_a, j);

                  hypre_CopyIndex(hypre_BoxIMin(compute_box), start);
                  hypre_StructMapFineToCoarse(start,  findex, stride,  startc);
                  hypre_StructMapCoarseToFine(startc, cindex, strideP, startP);

                  hypre_BoxGetStrideSize(compute_box, stride, loop_size);

                  hypre_BoxLoop2Begin(loop_size,
                                      P_dbox, startP, strideP, Pi,
                                      e_dbox, start,  stride,  ei);
#define HYPRE_BOX_SMP_PRIVATE loopk,loopi,loopj,Pi,ei
#include "hypre_box_smp_forloop.h"
                  hypre_BoxLoop2For(loopi, loopj, loopk, Pi, ei)
                     {
                        ep[ei] =  (Pp0[Pi] * ep0[ei] +
                                   Pp1[Pi] * ep1[ei]);
                     }
                  hypre_BoxLoop2End(Pi, ei);
               }
         }
int
hypre_SMGResidual( void               *residual_vdata,
                   hypre_StructMatrix *A,
                   hypre_StructVector *x,
                   hypre_StructVector *b,
                   hypre_StructVector *r              )
{
   int ierr = 0;

   hypre_SMGResidualData  *residual_data = residual_vdata;

   hypre_IndexRef          base_stride = (residual_data -> base_stride);
   hypre_BoxArray         *base_points = (residual_data -> base_points);
   hypre_ComputePkg       *compute_pkg = (residual_data -> compute_pkg);

   hypre_CommHandle       *comm_handle;
                       
   hypre_BoxArrayArray    *compute_box_aa;
   hypre_BoxArray         *compute_box_a;
   hypre_Box              *compute_box;
                       
   hypre_Box              *A_data_box;
   hypre_Box              *x_data_box;
   hypre_Box              *b_data_box;
   hypre_Box              *r_data_box;
                       
   int                     Ai;
   int                     xi;
   int                     bi;
   int                     ri;
                         
   double                 *Ap;
   double                 *xp;
   double                 *bp;
   double                 *rp;
                       
   hypre_Index             loop_size;
   hypre_IndexRef          start;
                       
   hypre_StructStencil    *stencil;
   hypre_Index            *stencil_shape;
   int                     stencil_size;

   int                     compute_i, i, j, si;
   int                     loopi, loopj, loopk;

   hypre_BeginTiming(residual_data -> time_index);

   /*-----------------------------------------------------------------------
    * Compute residual r = b - Ax
    *-----------------------------------------------------------------------*/

   stencil       = hypre_StructMatrixStencil(A);
   stencil_shape = hypre_StructStencilShape(stencil);
   stencil_size  = hypre_StructStencilSize(stencil);

   for (compute_i = 0; compute_i < 2; compute_i++)
   {
      switch(compute_i)
      {
         case 0:
         {
            xp = hypre_StructVectorData(x);
            hypre_InitializeIndtComputations(compute_pkg, xp, &comm_handle);
            compute_box_aa = hypre_ComputePkgIndtBoxes(compute_pkg);

            /*----------------------------------------
             * Copy b into r
             *----------------------------------------*/

            compute_box_a = base_points;
            hypre_ForBoxI(i, compute_box_a)
               {
                  compute_box = hypre_BoxArrayBox(compute_box_a, i);
                  start = hypre_BoxIMin(compute_box);

                  b_data_box =
                     hypre_BoxArrayBox(hypre_StructVectorDataSpace(b), i);
                  r_data_box =
                     hypre_BoxArrayBox(hypre_StructVectorDataSpace(r), i);

                  bp = hypre_StructVectorBoxData(b, i);
                  rp = hypre_StructVectorBoxData(r, i);

                  hypre_BoxGetStrideSize(compute_box, base_stride, loop_size);
                  hypre_BoxLoop2Begin(loop_size,
                                      b_data_box, start, base_stride, bi,
                                      r_data_box, start, base_stride, ri);
#define HYPRE_BOX_SMP_PRIVATE loopk,loopi,loopj,bi,ri
#include "hypre_box_smp_forloop.h"
                  hypre_BoxLoop2For(loopi, loopj, loopk, bi, ri)
                     {
                        rp[ri] = bp[bi];
                     }
                  hypre_BoxLoop2End(bi, ri);
               }
         }
         break;

         case 1:
         {
            hypre_FinalizeIndtComputations(comm_handle);
            compute_box_aa = hypre_ComputePkgDeptBoxes(compute_pkg);
         }
         break;
      }

      /*--------------------------------------------------------------------
       * Compute r -= A*x
       *--------------------------------------------------------------------*/

      hypre_ForBoxArrayI(i, compute_box_aa)
         {
            compute_box_a = hypre_BoxArrayArrayBoxArray(compute_box_aa, i);

            A_data_box = hypre_BoxArrayBox(hypre_StructMatrixDataSpace(A), i);
            x_data_box = hypre_BoxArrayBox(hypre_StructVectorDataSpace(x), i);
            r_data_box = hypre_BoxArrayBox(hypre_StructVectorDataSpace(r), i);

            rp = hypre_StructVectorBoxData(r, i);

            hypre_ForBoxI(j, compute_box_a)
               {
                  compute_box = hypre_BoxArrayBox(compute_box_a, j);

                  start  = hypre_BoxIMin(compute_box);

                  for (si = 0; si < stencil_size; si++)
                  {
                     Ap = hypre_StructMatrixBoxData(A, i, si);
                     xp = hypre_StructVectorBoxData(x, i) +
                        hypre_BoxOffsetDistance(x_data_box, stencil_shape[si]);

                     hypre_BoxGetStrideSize(compute_box, base_stride,
                                            loop_size);
                     hypre_BoxLoop3Begin(loop_size,
                                         A_data_box, start, base_stride, Ai,
                                         x_data_box, start, base_stride, xi,
                                         r_data_box, start, base_stride, ri);
#define HYPRE_BOX_SMP_PRIVATE loopk,loopi,loopj,Ai,xi,ri
#include "hypre_box_smp_forloop.h"
                     hypre_BoxLoop3For(loopi, loopj, loopk, Ai, xi, ri)
                        {
                           rp[ri] -= Ap[Ai] * xp[xi];
                        }
                     hypre_BoxLoop3End(Ai, xi, ri);
                  }
Ejemplo n.º 6
0
int
hypre_PointRelax( void               *relax_vdata,
                  hypre_StructMatrix *A,
                  hypre_StructVector *b,
                  hypre_StructVector *x           )
{
   hypre_PointRelaxData *relax_data = (hypre_PointRelaxData *)relax_vdata;

   int                    max_iter         = (relax_data -> max_iter);
   int                    zero_guess       = (relax_data -> zero_guess);
   double                 weight           = (relax_data -> weight);
   int                    num_pointsets    = (relax_data -> num_pointsets);
   int                   *pointset_ranks   = (relax_data -> pointset_ranks);
   hypre_Index           *pointset_strides = (relax_data -> pointset_strides);
   hypre_StructVector    *t                = (relax_data -> t);
   int                    diag_rank        = (relax_data -> diag_rank);
   hypre_ComputePkg     **compute_pkgs     = (relax_data -> compute_pkgs);

   hypre_ComputePkg      *compute_pkg;
   hypre_CommHandle      *comm_handle;
                        
   hypre_BoxArrayArray   *compute_box_aa;
   hypre_BoxArray        *compute_box_a;
   hypre_Box             *compute_box;
                        
   hypre_Box             *A_data_box;
   hypre_Box             *b_data_box;
   hypre_Box             *x_data_box;
   hypre_Box             *t_data_box;
                        
   int                    Ai;
   int                    bi;
   int                    xi;
   int                    ti;
                        
   double                *Ap;
   double                *bp;
   double                *xp;
   double                *tp;
                        
   hypre_IndexRef         stride;
   hypre_IndexRef         start;
   hypre_Index            loop_size;
                        
   hypre_StructStencil   *stencil;
   hypre_Index           *stencil_shape;
   int                    stencil_size;
                        
   int                    iter, p, compute_i, i, j, si;
   int                    loopi, loopj, loopk;
   int                    pointset;

   int                    ierr = 0;

   /*----------------------------------------------------------
    * Initialize some things and deal with special cases
    *----------------------------------------------------------*/

   hypre_BeginTiming(relax_data -> time_index);

   hypre_StructMatrixDestroy(relax_data -> A);
   hypre_StructVectorDestroy(relax_data -> b);
   hypre_StructVectorDestroy(relax_data -> x);
   (relax_data -> A) = hypre_StructMatrixRef(A);
   (relax_data -> x) = hypre_StructVectorRef(x);
   (relax_data -> b) = hypre_StructVectorRef(b);

   (relax_data -> num_iterations) = 0;

   /* if max_iter is zero, return */
   if (max_iter == 0)
   {
      /* if using a zero initial guess, return zero */
      if (zero_guess)
      {
         hypre_StructVectorSetConstantValues(x, 0.0);
      }

      hypre_EndTiming(relax_data -> time_index);
      return ierr;
   }

   stencil       = hypre_StructMatrixStencil(A);
   stencil_shape = hypre_StructStencilShape(stencil);
   stencil_size  = hypre_StructStencilSize(stencil);

   /*----------------------------------------------------------
    * Do zero_guess iteration
    *----------------------------------------------------------*/

   p    = 0;
   iter = 0;

   if (zero_guess)
   {
      pointset = pointset_ranks[p];
      compute_pkg = compute_pkgs[pointset];
      stride = pointset_strides[pointset];

      for (compute_i = 0; compute_i < 2; compute_i++)
      {
         switch(compute_i)
         {
            case 0:
            {
               compute_box_aa = hypre_ComputePkgIndtBoxes(compute_pkg);
            }
            break;

            case 1:
            {
               compute_box_aa = hypre_ComputePkgDeptBoxes(compute_pkg);
            }
            break;
         }

         hypre_ForBoxArrayI(i, compute_box_aa)
            {
               compute_box_a = hypre_BoxArrayArrayBoxArray(compute_box_aa, i);

               A_data_box =
                  hypre_BoxArrayBox(hypre_StructMatrixDataSpace(A), i);
               b_data_box =
                  hypre_BoxArrayBox(hypre_StructVectorDataSpace(b), i);
               x_data_box =
                  hypre_BoxArrayBox(hypre_StructVectorDataSpace(x), i);

               Ap = hypre_StructMatrixBoxData(A, i, diag_rank);
               bp = hypre_StructVectorBoxData(b, i);
               xp = hypre_StructVectorBoxData(x, i);

               hypre_ForBoxI(j, compute_box_a)
                  {
                     compute_box = hypre_BoxArrayBox(compute_box_a, j);

                     start  = hypre_BoxIMin(compute_box);
                     hypre_BoxGetStrideSize(compute_box, stride, loop_size);

                     hypre_BoxLoop3Begin(loop_size,
                                         A_data_box, start, stride, Ai,
                                         b_data_box, start, stride, bi,
                                         x_data_box, start, stride, xi);
#define HYPRE_BOX_SMP_PRIVATE loopk,loopi,loopj,Ai,bi,xi
#include "hypre_box_smp_forloop.h"
                     hypre_BoxLoop3For(loopi, loopj, loopk, Ai, bi, xi)
                        {
                           xp[xi] = bp[bi] / Ap[Ai];
                        }
                     hypre_BoxLoop3End(Ai, bi, xi);
                  }
            }
      }
int
hypre_SMGResidual( void               *residual_vdata,
                   hypre_StructMatrix *A,
                   hypre_StructVector *x,
                   hypre_StructVector *b,
                   hypre_StructVector *r              )
{
   int ierr = 0;

   hypre_SMGResidualData *residual_data = (hypre_SMGResidualData *)residual_vdata;

   hypre_IndexRef          base_stride = (residual_data -> base_stride);
   hypre_BoxArray         *base_points = (residual_data -> base_points);
   hypre_ComputePkg       *compute_pkg = (residual_data -> compute_pkg);

   hypre_CommHandle       *comm_handle;
                       
   hypre_BoxArrayArray    *compute_box_aa;
   hypre_BoxArray         *compute_box_a;
   hypre_Box              *compute_box;
                       
   hypre_Box              *A_data_box;
   hypre_Box              *x_data_box;
   hypre_Box              *b_data_box;
   hypre_Box              *r_data_box;
                       
   int                     Ai;
   int                     xi;
   int                     bi;
   int                     ri;
                         
   double                 *Ap;
   double                 *xp;
   double                 *bp;
   double                 *rp;
                       
   hypre_Index             loop_size;
   hypre_IndexRef          start;
                       
   hypre_StructStencil    *stencil;
   hypre_Index            *stencil_shape;
   int                     stencil_size;

   int                     compute_i, i, j, si;
   int                     loopi, loopj, loopk;

   hypre_BeginTiming(residual_data -> time_index);

   /*-----------------------------------------------------------------------
    * Compute residual r = b - Ax
    *-----------------------------------------------------------------------*/

   stencil       = hypre_StructMatrixStencil(A);
   stencil_shape = hypre_StructStencilShape(stencil);
   stencil_size  = hypre_StructStencilSize(stencil);

   for (compute_i = 0; compute_i < 2; compute_i++)
   {
      switch(compute_i)
      {
         case 0:
         {
            xp = hypre_StructVectorData(x);
            hypre_InitializeIndtComputations(compute_pkg, xp, &comm_handle);
            compute_box_aa = hypre_ComputePkgIndtBoxes(compute_pkg);

            /*----------------------------------------
             * Copy b into r
             *----------------------------------------*/

            compute_box_a = base_points;
            hypre_ForBoxI(i, compute_box_a)
               {
                  compute_box = hypre_BoxArrayBox(compute_box_a, i);
                  start = hypre_BoxIMin(compute_box);

                  b_data_box =
                     hypre_BoxArrayBox(hypre_StructVectorDataSpace(b), i);
                  r_data_box =
                     hypre_BoxArrayBox(hypre_StructVectorDataSpace(r), i);

                  bp = hypre_StructVectorBoxData(b, i);
                  rp = hypre_StructVectorBoxData(r, i);

                  hypre_BoxGetStrideSize(compute_box, base_stride, loop_size);
                  hypre_BoxLoop2Begin(loop_size,
                                      b_data_box, start, base_stride, bi,
                                      r_data_box, start, base_stride, ri);
#define HYPRE_BOX_SMP_PRIVATE loopk,loopi,loopj,bi,ri
#include "hypre_box_smp_forloop.h"
                  hypre_BoxLoop2For(loopi, loopj, loopk, bi, ri)
                     {
                        rp[ri] = bp[bi];
                     }
                  hypre_BoxLoop2End(bi, ri);
               }
         }
         break;

         case 1:
         {
            hypre_FinalizeIndtComputations(comm_handle);
            compute_box_aa = hypre_ComputePkgDeptBoxes(compute_pkg);
         }
         break;
      }

      /*--------------------------------------------------------------------
       * Compute r -= A*x
       *--------------------------------------------------------------------*/

      hypre_ForBoxArrayI(i, compute_box_aa)
         {
            compute_box_a = hypre_BoxArrayArrayBoxArray(compute_box_aa, i);

            A_data_box = hypre_BoxArrayBox(hypre_StructMatrixDataSpace(A), i);
            x_data_box = hypre_BoxArrayBox(hypre_StructVectorDataSpace(x), i);
            r_data_box = hypre_BoxArrayBox(hypre_StructVectorDataSpace(r), i);

            rp = hypre_StructVectorBoxData(r, i);

            hypre_ForBoxI(j, compute_box_a)
               {
                  compute_box = hypre_BoxArrayBox(compute_box_a, j);

                  start  = hypre_BoxIMin(compute_box);

                  for (si = 0; si < stencil_size; si++)
                  {
                     Ap = hypre_StructMatrixBoxData(A, i, si);
                     xp = hypre_StructVectorBoxData(x, i) +
                        hypre_BoxOffsetDistance(x_data_box, stencil_shape[si]);

                     hypre_BoxGetStrideSize(compute_box, base_stride,
                                            loop_size);
                     hypre_BoxLoop3Begin(loop_size,
                                         A_data_box, start, base_stride, Ai,
                                         x_data_box, start, base_stride, xi,
                                         r_data_box, start, base_stride, ri);
#if 0                     
/*  The following portion is preprocessed to be handled by ROSE outliner */
#define HYPRE_BOX_SMP_PRIVATE loopk,loopi,loopj,Ai,xi,ri
#include "hypre_box_smp_forloop.h"
                     hypre_BoxLoop3For(loopi, loopj, loopk, Ai, xi, ri)
                        {
                           rp[ri] -= Ap[Ai] * xp[xi];
                        }
                     hypre_BoxLoop3End(Ai, xi, ri);
#else
                    for (hypre__block = 0; hypre__block < hypre__num_blocks;
                         hypre__block++)
                      {
                        loopi = 0;
                        loopj = 0;
                        loopk = 0;
                        hypre__nx = hypre__mx;
                        hypre__ny = hypre__my;
                        hypre__nz = hypre__mz;
                        if (hypre__num_blocks > 1)
                          {
                            if (hypre__dir == 0)
                              {
                                loopi =
                                  hypre__block * hypre__div +
                                  (((hypre__mod) <
                                    (hypre__block)) ? (hypre__mod)
                                   : (hypre__block));
                                hypre__nx =
                                  hypre__div +
                                  ((hypre__mod > hypre__block) ? 1 : 0);
                              }
                            else if (hypre__dir == 1)
                              {
                                loopj =
                                  hypre__block * hypre__div +
                                  (((hypre__mod) <
                                    (hypre__block)) ? (hypre__mod)
                                   : (hypre__block));
                                hypre__ny =
                                  hypre__div +
                                  ((hypre__mod > hypre__block) ? 1 : 0);
                              }
                            else if (hypre__dir == 2)
                              {
                                loopk =
                                  hypre__block * hypre__div +
                                  (((hypre__mod) <
                                    (hypre__block)) ? (hypre__mod)
                                   : (hypre__block));
                                hypre__nz =
                                  hypre__div +
                                  ((hypre__mod > hypre__block) ? 1 : 0);
                              }
                          };
                        Ai =
                          hypre__i1start + loopi * hypre__sx1 +
                          loopj * hypre__sy1 + loopk * hypre__sz1;
                        xi =
                          hypre__i2start + loopi * hypre__sx2 +
                          loopj * hypre__sy2 + loopk * hypre__sz2;
                        ri =
                          hypre__i3start + loopi * hypre__sx3 +
                          loopj * hypre__sy3 + loopk * hypre__sz3;

                          //begin of the loop
#if 0                          //
                        for (loopk = 0; loopk < hypre__nz; loopk++)
                          {
                            for (loopj = 0; loopj < hypre__ny; loopj++)
                              {
                                for (loopi = 0; loopi < hypre__nx; loopi++)
                                  {
                                    {
                                      rp[ri] -= Ap[Ai] * xp[xi];
                                    }
                                    Ai += hypre__sx1;
                                    xi += hypre__sx2;
                                    ri += hypre__sx3;
                                  }
                                Ai += hypre__sy1 - hypre__nx * hypre__sx1;
                                xi += hypre__sy2 - hypre__nx * hypre__sx2;
                                ri += hypre__sy3 - hypre__nx * hypre__sx3;
                              }
                            Ai += hypre__sz1 - hypre__ny * hypre__sy1;
                            xi += hypre__sz2 - hypre__ny * hypre__sy2;
                            ri += hypre__sz3 - hypre__ny * hypre__sy3;
                          } // end of the loop
#else

#if BLCR_CHECKPOINTING
                        // Only checkpoint it at the first occurrance.
                        if (g_checkpoint_flag == 0)
                        {
                          int err;
                          cr_checkpoint_args_t cr_args;  
                          cr_checkpoint_handle_t cr_handle;
                          cr_initialize_checkpoint_args_t(&cr_args);
                          cr_args.cr_scope = CR_SCOPE_PROC;// a process
                          cr_args.cr_target = 0; //self
                          cr_args.cr_signal = SIGKILL; // kill after checkpointing
                          cr_args.cr_fd = open("dump.yy", O_WRONLY|O_CREAT|O_LARGEFILE, 0400);
                          if (cr_args.cr_fd < 0) {
                              printf("Error: cannot open file for checkpoiting context\n");
                              abort();
                          }

                          g_checkpoint_flag ++;
                          printf("Checkpoiting: starting here ..\n");

                          err = cr_request_checkpoint(&cr_args, &cr_handle);
                          if (err < 0) {
                            printf("cannot request checkpoining! err=%d\n",err);
                            abort();
                          }
                          // block until the request is served
                          cr_enter_cs(cr);
                          cr_leave_cs(cr);

                          printf("Checkpoiting: restarting here ..\n");
                        }
#endif

                           OUT__1__6755__(&Ai,&xi,&ri,&Ap,&xp,&rp,&loopi,&loopj,&loopk,&hypre__sx1,&hypre__sy1,&hypre__sz1,&hypre__sx2,&hypre__sy2,&hypre__sz2,&hypre__sx3,&hypre__sy3,&hypre__sz3,&hypre__nx,&hypre__ny,&hypre__nz);

#endif
                      }
                  };
int hypre_SMGResidual(void *residual_vdata,hypre_StructMatrix *A,hypre_StructVector *x,hypre_StructVector *b,hypre_StructVector *r)
{
  int ierr = 0;
  hypre_SMGResidualData *residual_data = residual_vdata;
  hypre_IndexRef base_stride = (residual_data -> base_stride);
  hypre_BoxArray *base_points = (residual_data -> base_points);
  hypre_ComputePkg *compute_pkg = (residual_data -> compute_pkg);
  hypre_CommHandle *comm_handle;
  hypre_BoxArrayArray *compute_box_aa;
  hypre_BoxArray *compute_box_a;
  hypre_Box *compute_box;
  hypre_Box *A_data_box;
  hypre_Box *x_data_box;
  hypre_Box *b_data_box;
  hypre_Box *r_data_box;
  int Ai;
  int xi;
  int bi;
  int ri;
  double *Ap;
  double *xp;
  double *bp;
  double *rp;
  hypre_Index loop_size;
  hypre_IndexRef start;
  hypre_StructStencil *stencil;
  hypre_Index *stencil_shape;
  int stencil_size;
  int compute_i;
  int i;
  int j;
  int si;
  int loopi;
  int loopj;
  int loopk;
/* New static variables, precomputed */
  hypre_BeginTiming((residual_data -> time_index));
  stencil = (A -> stencil);
  stencil_shape = (stencil -> shape);
  stencil_size = (stencil -> size);
  (stencil_size <= 15)?0 : ((__assert_fail(("stencil_size <= 15"),("/home/liao6/svnrepos/benchmarks/smg2000/struct_ls/smg_residual.c"),(203),("int hypre_SMGResidual(void *, struct hypre_StructMatrix_struct *, struct hypre_StructVector_struct *, struct hypre_StructVector_struct *, struct hypre_StructVector_struct *)")) , 0));
  for (compute_i = 0; compute_i < 2; compute_i++) {
    switch(compute_i){
      case 0:
{
{
          xp = (x -> data);
          hypre_InitializeIndtComputations(compute_pkg,xp,&comm_handle);
          compute_box_aa = (compute_pkg -> indt_boxes);
          compute_box_a = base_points;
          for (i = 0; i < (compute_box_a -> size); i++) {
            compute_box = ((compute_box_a -> boxes) + i);
            start = (compute_box -> imin);
            b_data_box = ((( *(b -> data_space)).boxes) + i);
            r_data_box = ((( *(r -> data_space)).boxes) + i);
            bp = ((b -> data) + ((b -> data_indices)[i]));
            rp = ((r -> data) + ((r -> data_indices)[i]));
            hypre_BoxGetStrideSize(compute_box,base_stride,loop_size);
{
              int hypre__i1start = (((start[0]) - ((b_data_box -> imin)[0])) + ((((start[1]) - ((b_data_box -> imin)[1])) + (((start[2]) - ((b_data_box -> imin)[2])) * (((0 < ((((b_data_box -> imax)[1]) - ((b_data_box -> imin)[1])) + 1))?((((b_data_box -> imax)[1]) - ((b_data_box -> imin)[1])) + 1) : 0)))) * (((0 < ((((b_data_box -> imax)[0]) - ((b_data_box -> imin)[0])) + 1))?((((b_data_box -> imax)[0]) - ((b_data_box -> imin)[0])) + 1) : 0))));
              int hypre__i2start = (((start[0]) - ((r_data_box -> imin)[0])) + ((((start[1]) - ((r_data_box -> imin)[1])) + (((start[2]) - ((r_data_box -> imin)[2])) * (((0 < ((((r_data_box -> imax)[1]) - ((r_data_box -> imin)[1])) + 1))?((((r_data_box -> imax)[1]) - ((r_data_box -> imin)[1])) + 1) : 0)))) * (((0 < ((((r_data_box -> imax)[0]) - ((r_data_box -> imin)[0])) + 1))?((((r_data_box -> imax)[0]) - ((r_data_box -> imin)[0])) + 1) : 0))));
              int hypre__sx1 = (base_stride[0]);
              int hypre__sy1 = ((base_stride[1]) * (((0 < ((((b_data_box -> imax)[0]) - ((b_data_box -> imin)[0])) + 1))?((((b_data_box -> imax)[0]) - ((b_data_box -> imin)[0])) + 1) : 0)));
              int hypre__sz1 = (((base_stride[2]) * (((0 < ((((b_data_box -> imax)[0]) - ((b_data_box -> imin)[0])) + 1))?((((b_data_box -> imax)[0]) - ((b_data_box -> imin)[0])) + 1) : 0))) * (((0 < ((((b_data_box -> imax)[1]) - ((b_data_box -> imin)[1])) + 1))?((((b_data_box -> imax)[1]) - ((b_data_box -> imin)[1])) + 1) : 0)));
              int hypre__sx2 = (base_stride[0]);
              int hypre__sy2 = ((base_stride[1]) * (((0 < ((((r_data_box -> imax)[0]) - ((r_data_box -> imin)[0])) + 1))?((((r_data_box -> imax)[0]) - ((r_data_box -> imin)[0])) + 1) : 0)));
              int hypre__sz2 = (((base_stride[2]) * (((0 < ((((r_data_box -> imax)[0]) - ((r_data_box -> imin)[0])) + 1))?((((r_data_box -> imax)[0]) - ((r_data_box -> imin)[0])) + 1) : 0))) * (((0 < ((((r_data_box -> imax)[1]) - ((r_data_box -> imin)[1])) + 1))?((((r_data_box -> imax)[1]) - ((r_data_box -> imin)[1])) + 1) : 0)));
              int hypre__nx = (loop_size[0]);
              int hypre__ny = (loop_size[1]);
              int hypre__nz = (loop_size[2]);
              int hypre__mx = hypre__nx;
              int hypre__my = hypre__ny;
              int hypre__mz = hypre__nz;
              int hypre__dir;
              int hypre__max;
              int hypre__div;
              int hypre__mod;
              int hypre__block;
              int hypre__num_blocks;
              hypre__dir = 0;
              hypre__max = hypre__nx;
              if (hypre__ny > hypre__max) {
                hypre__dir = 1;
                hypre__max = hypre__ny;
              }
              if (hypre__nz > hypre__max) {
                hypre__dir = 2;
                hypre__max = hypre__nz;
              }
              hypre__num_blocks = 1;
              if (hypre__max < hypre__num_blocks) {
                hypre__num_blocks = hypre__max;
              }
              if (hypre__num_blocks > 0) {
                hypre__div = (hypre__max / hypre__num_blocks);
                hypre__mod = (hypre__max % hypre__num_blocks);
              }
/* # 236 "smg_residual.c" */
              (hypre__sx1 == 1)?0 : ((__assert_fail(("hypre__sx1 == 1"),("/home/liao6/svnrepos/benchmarks/smg2000/struct_ls/smg_residual.c"),(357),("int hypre_SMGResidual(void *, struct hypre_StructMatrix_struct *, struct hypre_StructVector_struct *, struct hypre_StructVector_struct *, struct hypre_StructVector_struct *)")) , 0));
              (hypre__sx2 == 1)?0 : ((__assert_fail(("hypre__sx2 == 1"),("/home/liao6/svnrepos/benchmarks/smg2000/struct_ls/smg_residual.c"),(358),("int hypre_SMGResidual(void *, struct hypre_StructMatrix_struct *, struct hypre_StructVector_struct *, struct hypre_StructVector_struct *, struct hypre_StructVector_struct *)")) , 0));
              if (hypre__num_blocks == 1) {
                int ii;
                int jj;
                int kk;
                const double *bp_0 = (bp + hypre__i1start);
                double *rp_0 = (rp + hypre__i2start);
                for (kk = 0; kk < hypre__mz; kk++) {
                  for (jj = 0; jj < hypre__my; jj++) {
                    const double *bpp = ((bp_0 + (jj * hypre__sy1)) + (kk * hypre__sz1));
                    double *rpp = ((rp_0 + (jj * hypre__sy2)) + (kk * hypre__sz2));
                    for (ii = 0; ii < hypre__mx; ii++) {
                      rpp[ii] = (bpp[ii]);
                    }
                  }
                }
/* hypre__num_blocks > 1 */
              }
              else {
                for (hypre__block = 0; hypre__block < hypre__num_blocks; hypre__block++) {
                  loopi = 0;
                  loopj = 0;
                  loopk = 0;
                  hypre__nx = hypre__mx;
                  hypre__ny = hypre__my;
                  hypre__nz = hypre__mz;
                  if (hypre__dir == 0) {
                    loopi = ((hypre__block * hypre__div) + (((hypre__mod < hypre__block)?hypre__mod : hypre__block)));
                    hypre__nx = (hypre__div + (((hypre__mod > hypre__block)?1 : 0)));
                  }
                  else if (hypre__dir == 1) {
                    loopj = ((hypre__block * hypre__div) + (((hypre__mod < hypre__block)?hypre__mod : hypre__block)));
                    hypre__ny = (hypre__div + (((hypre__mod > hypre__block)?1 : 0)));
                  }
                  else if (hypre__dir == 2) {
                    loopk = ((hypre__block * hypre__div) + (((hypre__mod < hypre__block)?hypre__mod : hypre__block)));
                    hypre__nz = (hypre__div + (((hypre__mod > hypre__block)?1 : 0)));
                  }
                  bi = (((hypre__i1start + loopi) + (loopj * hypre__sy1)) + (loopk * hypre__sz1));
                  ri = (((hypre__i2start + loopi) + (loopj * hypre__sy2)) + (loopk * hypre__sz2));
/* AAA */
{
                    int ii;
                    int jj;
                    int kk;
                    const double *bp_0 = (bp + bi);
                    double *rp_0 = (rp + ri);
                    for (kk = 0; kk < hypre__nz; kk++) {
                      for (jj = 0; jj < hypre__ny; jj++) {
                        const double *bpp = ((bp_0 + (jj * hypre__sy1)) + (kk * hypre__sz1));
                        double *rpp = ((rp_0 + (jj * hypre__sy2)) + (kk * hypre__sz2));
                        for (ii = 0; ii < hypre__nx; ii++) {
                          rpp[ii] = (bpp[ii]);
                        }
                      }
                    }
/* AAA */
                  }
                }
/* hypre__num_blocks > 1 */
              }
            }
          }
        }
        break; 
      }
      case 1:
{
{
          hypre_FinalizeIndtComputations(comm_handle);
          compute_box_aa = (compute_pkg -> dept_boxes);
        }
        break; 
      }
/* switch */
    }
/*--------------------------------------------------------------------
     * Compute r -= A*x
     *--------------------------------------------------------------------*/
    for (i = 0; i < (compute_box_aa -> size); i++) {
      int dxp_s[15UL];
      compute_box_a = ((compute_box_aa -> box_arrays)[i]);
      A_data_box = ((( *(A -> data_space)).boxes) + i);
      x_data_box = ((( *(x -> data_space)).boxes) + i);
      r_data_box = ((( *(r -> data_space)).boxes) + i);
      rp = ((r -> data) + ((r -> data_indices)[i]));
      for (si = 0; si < stencil_size; si++) {
        dxp_s[si] = (((stencil_shape[si])[0]) + ((((stencil_shape[si])[1]) + (((stencil_shape[si])[2]) * (((0 < ((((x_data_box -> imax)[1]) - ((x_data_box -> imin)[1])) + 1))?((((x_data_box -> imax)[1]) - ((x_data_box -> imin)[1])) + 1) : 0)))) * (((0 < ((((x_data_box -> imax)[0]) - ((x_data_box -> imin)[0])) + 1))?((((x_data_box -> imax)[0]) - ((x_data_box -> imin)[0])) + 1) : 0))));
      }
      for (j = 0; j < (compute_box_a -> size); j++) {{
          int hypre__i1start;
          int hypre__i2start;
          int hypre__i3start;
          int hypre__sx1;
          int hypre__sy1;
          int hypre__sz1;
          int hypre__sx2;
          int hypre__sy2;
          int hypre__sz2;
          int hypre__sx3;
          int hypre__sy3;
          int hypre__sz3;
          int hypre__nx;
          int hypre__ny;
          int hypre__nz;
          int hypre__mx;
          int hypre__my;
          int hypre__mz;
          int hypre__dir;
          int hypre__max;
          int hypre__div;
          int hypre__mod;
          int hypre__block;
          int hypre__num_blocks;
          compute_box = ((compute_box_a -> boxes) + j);
          start = (compute_box -> imin);
          hypre__i1start = (((start[0]) - ((A_data_box -> imin)[0])) + ((((start[1]) - ((A_data_box -> imin)[1])) + (((start[2]) - ((A_data_box -> imin)[2])) * (((0 < ((((A_data_box -> imax)[1]) - ((A_data_box -> imin)[1])) + 1))?((((A_data_box -> imax)[1]) - ((A_data_box -> imin)[1])) + 1) : 0)))) * (((0 < ((((A_data_box -> imax)[0]) - ((A_data_box -> imin)[0])) + 1))?((((A_data_box -> imax)[0]) - ((A_data_box -> imin)[0])) + 1) : 0))));
          hypre__i2start = (((start[0]) - ((x_data_box -> imin)[0])) + ((((start[1]) - ((x_data_box -> imin)[1])) + (((start[2]) - ((x_data_box -> imin)[2])) * (((0 < ((((x_data_box -> imax)[1]) - ((x_data_box -> imin)[1])) + 1))?((((x_data_box -> imax)[1]) - ((x_data_box -> imin)[1])) + 1) : 0)))) * (((0 < ((((x_data_box -> imax)[0]) - ((x_data_box -> imin)[0])) + 1))?((((x_data_box -> imax)[0]) - ((x_data_box -> imin)[0])) + 1) : 0))));
          hypre__i3start = (((start[0]) - ((r_data_box -> imin)[0])) + ((((start[1]) - ((r_data_box -> imin)[1])) + (((start[2]) - ((r_data_box -> imin)[2])) * (((0 < ((((r_data_box -> imax)[1]) - ((r_data_box -> imin)[1])) + 1))?((((r_data_box -> imax)[1]) - ((r_data_box -> imin)[1])) + 1) : 0)))) * (((0 < ((((r_data_box -> imax)[0]) - ((r_data_box -> imin)[0])) + 1))?((((r_data_box -> imax)[0]) - ((r_data_box -> imin)[0])) + 1) : 0))));
          hypre_BoxGetStrideSize(compute_box,base_stride,loop_size);
          hypre__sx1 = (base_stride[0]);
          hypre__sy1 = ((base_stride[1]) * (((0 < ((((A_data_box -> imax)[0]) - ((A_data_box -> imin)[0])) + 1))?((((A_data_box -> imax)[0]) - ((A_data_box -> imin)[0])) + 1) : 0)));
          hypre__sz1 = (((base_stride[2]) * (((0 < ((((A_data_box -> imax)[0]) - ((A_data_box -> imin)[0])) + 1))?((((A_data_box -> imax)[0]) - ((A_data_box -> imin)[0])) + 1) : 0))) * (((0 < ((((A_data_box -> imax)[1]) - ((A_data_box -> imin)[1])) + 1))?((((A_data_box -> imax)[1]) - ((A_data_box -> imin)[1])) + 1) : 0)));
          hypre__sx2 = (base_stride[0]);
          hypre__sy2 = ((base_stride[1]) * (((0 < ((((x_data_box -> imax)[0]) - ((x_data_box -> imin)[0])) + 1))?((((x_data_box -> imax)[0]) - ((x_data_box -> imin)[0])) + 1) : 0)));
          hypre__sz2 = (((base_stride[2]) * (((0 < ((((x_data_box -> imax)[0]) - ((x_data_box -> imin)[0])) + 1))?((((x_data_box -> imax)[0]) - ((x_data_box -> imin)[0])) + 1) : 0))) * (((0 < ((((x_data_box -> imax)[1]) - ((x_data_box -> imin)[1])) + 1))?((((x_data_box -> imax)[1]) - ((x_data_box -> imin)[1])) + 1) : 0)));
          hypre__sx3 = (base_stride[0]);
          hypre__sy3 = ((base_stride[1]) * (((0 < ((((r_data_box -> imax)[0]) - ((r_data_box -> imin)[0])) + 1))?((((r_data_box -> imax)[0]) - ((r_data_box -> imin)[0])) + 1) : 0)));
          hypre__sz3 = (((base_stride[2]) * (((0 < ((((r_data_box -> imax)[0]) - ((r_data_box -> imin)[0])) + 1))?((((r_data_box -> imax)[0]) - ((r_data_box -> imin)[0])) + 1) : 0))) * (((0 < ((((r_data_box -> imax)[1]) - ((r_data_box -> imin)[1])) + 1))?((((r_data_box -> imax)[1]) - ((r_data_box -> imin)[1])) + 1) : 0)));
/* Based on BG/L Milestone #46 */
          (hypre__sx1 == 1)?0 : ((__assert_fail(("hypre__sx1 == 1"),("/home/liao6/svnrepos/benchmarks/smg2000/struct_ls/smg_residual.c"),(602),("int hypre_SMGResidual(void *, struct hypre_StructMatrix_struct *, struct hypre_StructVector_struct *, struct hypre_StructVector_struct *, struct hypre_StructVector_struct *)")) , 0));
          (hypre__sx2 == 1)?0 : ((__assert_fail(("hypre__sx2 == 1"),("/home/liao6/svnrepos/benchmarks/smg2000/struct_ls/smg_residual.c"),(603),("int hypre_SMGResidual(void *, struct hypre_StructMatrix_struct *, struct hypre_StructVector_struct *, struct hypre_StructVector_struct *, struct hypre_StructVector_struct *)")) , 0));
          (hypre__sx3 == 1)?0 : ((__assert_fail(("hypre__sx3 == 1"),("/home/liao6/svnrepos/benchmarks/smg2000/struct_ls/smg_residual.c"),(604),("int hypre_SMGResidual(void *, struct hypre_StructMatrix_struct *, struct hypre_StructVector_struct *, struct hypre_StructVector_struct *, struct hypre_StructVector_struct *)")) , 0));
          hypre__mx = (loop_size[0]);
          hypre__my = (loop_size[1]);
          hypre__mz = (loop_size[2]);
          hypre__dir = 0;
          hypre__max = hypre__mx;
          if (hypre__my > hypre__max) {
            hypre__dir = 1;
            hypre__max = hypre__my;
          }
          if (hypre__mz > hypre__max) {
            hypre__dir = 2;
            hypre__max = hypre__mz;
          }
          hypre__num_blocks = 1;
          if (hypre__max < hypre__num_blocks) {
            hypre__num_blocks = hypre__max;
          }
          if (hypre__num_blocks > 0) {
            hypre__div = (hypre__max / hypre__num_blocks);
            hypre__mod = (hypre__max % hypre__num_blocks);
          }
          else 
            continue; 
          if (hypre__num_blocks == 1) {
            int si;
            int ii;
            int jj;
            int kk;
            const double *Ap_0 = ((A -> data) + hypre__i1start);
            const double *xp_0 = (((x -> data) + hypre__i2start) + ((x -> data_indices)[i]));
            ri = hypre__i3start;

            void *__out_argv1__1527__[21];
            *(__out_argv1__1527__ + 0) = ((void *)(&xp_0));
            *(__out_argv1__1527__ + 1) = ((void *)(&Ap_0));
            *(__out_argv1__1527__ + 2) = ((void *)(&kk));
            *(__out_argv1__1527__ + 3) = ((void *)(&jj));
            *(__out_argv1__1527__ + 4) = ((void *)(&ii));
            *(__out_argv1__1527__ + 5) = ((void *)(&si));
            *(__out_argv1__1527__ + 6) = ((void *)(&hypre__mz));
            *(__out_argv1__1527__ + 7) = ((void *)(&hypre__my));
            *(__out_argv1__1527__ + 8) = ((void *)(&hypre__mx));
            *(__out_argv1__1527__ + 9) = ((void *)(&hypre__sz3));
            *(__out_argv1__1527__ + 10) = ((void *)(&hypre__sy3));
            *(__out_argv1__1527__ + 11) = ((void *)(&hypre__sz2));
            *(__out_argv1__1527__ + 12) = ((void *)(&hypre__sy2));
            *(__out_argv1__1527__ + 13) = ((void *)(&hypre__sz1));
            *(__out_argv1__1527__ + 14) = ((void *)(&hypre__sy1));
            *(__out_argv1__1527__ + 15) = ((void *)(&dxp_s));
            *(__out_argv1__1527__ + 16) = ((void *)(&i));
            *(__out_argv1__1527__ + 17) = ((void *)(&stencil_size));
            *(__out_argv1__1527__ + 18) = ((void *)(&rp));
            *(__out_argv1__1527__ + 19) = ((void *)(&ri));
            *(__out_argv1__1527__ + 20) = ((void *)(&A));
#ifdef USE_DLOPEN
            if (g_execution_flag == 0){
              printf("Opening the .so file ...\n");
              FunctionLib = dlopen("/home/liao6/svnrepos/benchmarks/smg2000/struct_ls/OUT__1__6119__.so",RTLD_LAZY);
              dlError = dlerror();
              if( dlError ) {
                printf("cannot open .so file!\n");
                exit(1);
              }

              /* Find the first loaded function */
              OUT__1__6119__ = dlsym( FunctionLib, "OUT__1__6119__");
              dlError = dlerror();
              if( dlError )
              {
                printf("cannot find OUT__1__6755__() !\n");
                exit(1);
              }
              //remove("/tmp/peri.result");
              //time1=time_stamp();
            } // end if (flag ==0)
             g_execution_flag ++;

            (*OUT__1__6119__)(__out_argv1__1527__);
#else            
            OUT__1__6119__(__out_argv1__1527__);
#endif
/* hypre__num_blocks > 1 */
          }
          else {
            for (si = 0; si < stencil_size; si++) {
              Ap = ((A -> data) + (((A -> data_indices)[i])[si]));
              xp = (((x -> data) + ((x -> data_indices)[i])) + (dxp_s[si]));
              for (hypre__block = 0; hypre__block < hypre__num_blocks; hypre__block++) {
                loopi = 0;
                loopj = 0;
                loopk = 0;
                hypre__nx = hypre__mx;
                hypre__ny = hypre__my;
                hypre__nz = hypre__mz;
                if (hypre__dir == 0) {
                  loopi = ((hypre__block * hypre__div) + (((hypre__mod < hypre__block)?hypre__mod : hypre__block)));
                  hypre__nx = (hypre__div + (((hypre__mod > hypre__block)?1 : 0)));
                }
                else if (hypre__dir == 1) {
                  loopj = ((hypre__block * hypre__div) + (((hypre__mod < hypre__block)?hypre__mod : hypre__block)));
                  hypre__ny = (hypre__div + (((hypre__mod > hypre__block)?1 : 0)));
                }
                else if (hypre__dir == 2) {
                  loopk = ((hypre__block * hypre__div) + (((hypre__mod < hypre__block)?hypre__mod : hypre__block)));
                  hypre__nz = (hypre__div + (((hypre__mod > hypre__block)?1 : 0)));
                }
                Ai = (((hypre__i1start + (loopi * hypre__sx1)) + (loopj * hypre__sy1)) + (loopk * hypre__sz1));
                xi = (((hypre__i2start + (loopi * hypre__sx2)) + (loopj * hypre__sy2)) + (loopk * hypre__sz2));
                ri = (((hypre__i3start + (loopi * hypre__sx3)) + (loopj * hypre__sy3)) + (loopk * hypre__sz3));
/* CORE LOOP BEGIN */
                (hypre__sx1 == 1)?0 : ((__assert_fail(("hypre__sx1 == 1"),("/home/liao6/svnrepos/benchmarks/smg2000/struct_ls/smg_residual.c"),(689),("int hypre_SMGResidual(void *, struct hypre_StructMatrix_struct *, struct hypre_StructVector_struct *, struct hypre_StructVector_struct *, struct hypre_StructVector_struct *)")) , 0));
                (hypre__sx2 == 1)?0 : ((__assert_fail(("hypre__sx2 == 1"),("/home/liao6/svnrepos/benchmarks/smg2000/struct_ls/smg_residual.c"),(690),("int hypre_SMGResidual(void *, struct hypre_StructMatrix_struct *, struct hypre_StructVector_struct *, struct hypre_StructVector_struct *, struct hypre_StructVector_struct *)")) , 0));
                (hypre__sx3 == 1)?0 : ((__assert_fail(("hypre__sx3 == 1"),("/home/liao6/svnrepos/benchmarks/smg2000/struct_ls/smg_residual.c"),(691),("int hypre_SMGResidual(void *, struct hypre_StructMatrix_struct *, struct hypre_StructVector_struct *, struct hypre_StructVector_struct *, struct hypre_StructVector_struct *)")) , 0));
{
/* In essence, this loop computes:
                   *
                   FOR_ALL i, j, k DO
                   rp[ri + i + j*DJ_R + k*DK_R]
                   -= Ap[Ai + i + j*DJ_A + k*DK_A]
                   * xp[xi + i + j*DJ_X + k*DK_X];
                   */
// 1. promoting loop invariant expressions
// j loop increment for Ai, xi, and ri
                  int DJA0 = (hypre__sy1 - (hypre__nx * hypre__sx1));
                  int DJX0 = (hypre__sy2 - (hypre__nx * hypre__sx2));
                  int DJR0 = (hypre__sy3 - (hypre__nx * hypre__sx3));
// k loop increment for Ai, xi, and ri
                  int DKA0 = (hypre__sz1 - (hypre__ny * hypre__sy1));
                  int DKX0 = (hypre__sz2 - (hypre__ny * hypre__sy2));
                  int DKR0 = (hypre__sz3 - (hypre__ny * hypre__sy3));
// pre-compute array index offset changes for one iteration within each level of loop
// one iteration of j loop on ri
                  int DJR1 = (DJR0 + (hypre__nx * hypre__sx3));
// one iteration of k loop on ri
                  int DKR1 = (DKR0 + (hypre__ny * DJR1));
// one iteration of j loop on Ai
                  int DJA1 = (DJA0 + (hypre__nx * hypre__sx1));
// one iteration of k loop on Ai
                  int DKA1 = (DKA0 + (hypre__ny * DJA1));
// one iteration of j loop on xi
                  int DJX1 = (DJX0 + (hypre__nx * hypre__sx2));
// one iteration of k loop on xi
                  int DKX1 = (DKX0 + (hypre__ny * DJX1));
                  for (loopk = 0; loopk < hypre__nz; loopk++) {
                    for (loopj = 0; loopj < hypre__ny; loopj++) {
                      for (loopi = 0; loopi < hypre__nx; loopi++) {{
                          rp[((ri + (loopi * hypre__sx1)) + (loopj * DJR1)) + (loopk * DKR1)] -= ((Ap[((Ai + (loopi * hypre__sx1)) + (loopj * DJA1)) + (loopk * DKA1)]) * (xp[((xi + (loopi * hypre__sx2)) + (loopj * DJX1)) + (loopk * DKX1)]));
//rp[ri] -= Ap[Ai] * xp[xi];
                        }
//Ai += hypre__sx1; // 2. merging loop index changes
//xi += hypre__sx2;
//ri += hypre__sx3;
                      }
//Ai += DJA0;//(hypre__sy1 - (hypre__nx * hypre__sx1));
//xi += DJX0;//(hypre__sy2 - (hypre__nx * hypre__sx2));
//ri += DJR0;//(hypre__sy3 - (hypre__nx * hypre__sx3));
                    }
//Ai += DKA0;//(hypre__sz1 - (hypre__ny * hypre__sy1));
//xi += DKX0; //(hypre__sz2 - (hypre__ny * hypre__sy2));
//ri += DKR0;//(hypre__sz3 - (hypre__ny * hypre__sy3));
                  }
                }
/* CORE LOOP END */
/* hypre__block */
              }
/* si */
            }
/* else hypre__num_blocks > 1 */
          }
/* j */
        }
      }
/* i */
    }
/* compute_i */
  }
  hypre_IncFLOPCount((residual_data -> flops));
  hypre_EndTiming((residual_data -> time_index));
  return ierr;
}
Ejemplo n.º 9
0
HYPRE_Int
hypre_SMGResidual( void               *residual_vdata,
                   hypre_StructMatrix *A,
                   hypre_StructVector *x,
                   hypre_StructVector *b,
                   hypre_StructVector *r              )
{
   HYPRE_Int ierr;

   hypre_SMGResidualData  *residual_data = residual_vdata;

   hypre_IndexRef          base_stride = (residual_data -> base_stride);
   hypre_BoxArray         *base_points = (residual_data -> base_points);
   hypre_ComputePkg       *compute_pkg = (residual_data -> compute_pkg);

   hypre_CommHandle       *comm_handle;
                       
   hypre_BoxArrayArray    *compute_box_aa;
   hypre_BoxArray         *compute_box_a;
   hypre_Box              *compute_box;
                       
   hypre_Box              *A_data_box;
   hypre_Box              *x_data_box;
   hypre_Box              *b_data_box;
   hypre_Box              *r_data_box;
                       
   HYPRE_Int               Ai;
   HYPRE_Int               xi;
   HYPRE_Int               bi;
   HYPRE_Int               ri;
                         
   double                 *Ap0;
   double                 *xp0;
   double                 *bp;
   double                 *rp;
                       
   hypre_Index             loop_size;
   hypre_IndexRef          start;
                       
   hypre_StructStencil    *stencil;
   hypre_Index            *stencil_shape;
   HYPRE_Int               stencil_size;

   HYPRE_Int               compute_i, i, j, si;
   HYPRE_Int               loopi, loopj, loopk;

   double            *Ap1, *Ap2;
   double            *Ap3, *Ap4;
   double            *Ap5, *Ap6;
   double            *Ap7, *Ap8, *Ap9;
   double            *Ap10, *Ap11, *Ap12, *Ap13, *Ap14;
   double            *Ap15, *Ap16, *Ap17, *Ap18;
   double            *Ap19, *Ap20, *Ap21, *Ap22, *Ap23, *Ap24, *Ap25, *Ap26;
   double            *xp1, *xp2;
   double            *xp3, *xp4;
   double            *xp5, *xp6;
   double            *xp7, *xp8, *xp9;
   double            *xp10, *xp11, *xp12, *xp13, *xp14;
   double            *xp15, *xp16, *xp17, *xp18;
   double            *xp19, *xp20, *xp21, *xp22, *xp23, *xp24, *xp25, *xp26;

   hypre_BeginTiming(residual_data -> time_index);

   /*-----------------------------------------------------------------------
    * Compute residual r = b - Ax
    *-----------------------------------------------------------------------*/

   stencil       = hypre_StructMatrixStencil(A);
   stencil_shape = hypre_StructStencilShape(stencil);
   stencil_size  = hypre_StructStencilSize(stencil);

   for (compute_i = 0; compute_i < 2; compute_i++)
   {
      switch(compute_i)
      {
         case 0:
         {
            xp0 = hypre_StructVectorData(x);
            hypre_InitializeIndtComputations(compute_pkg, xp0, &comm_handle);
            compute_box_aa = hypre_ComputePkgIndtBoxes(compute_pkg);

            /*----------------------------------------
             * Copy b into r
             *----------------------------------------*/

            compute_box_a = base_points;
            hypre_ForBoxI(i, compute_box_a)
               {
                  compute_box = hypre_BoxArrayBox(compute_box_a, i);
                  start = hypre_BoxIMin(compute_box);

                  b_data_box =
                     hypre_BoxArrayBox(hypre_StructVectorDataSpace(b), i);
                  r_data_box =
                     hypre_BoxArrayBox(hypre_StructVectorDataSpace(r), i);

                  bp = hypre_StructVectorBoxData(b, i);
                  rp = hypre_StructVectorBoxData(r, i);

                  hypre_BoxGetStrideSize(compute_box, base_stride, loop_size);
                  hypre_BoxLoop2Begin(loop_size,
                                      b_data_box, start, base_stride, bi,
                                      r_data_box, start, base_stride, ri);
#define HYPRE_BOX_SMP_PRIVATE loopk,loopi,loopj,bi,ri
#include "hypre_box_smp_forloop.h"
                  hypre_BoxLoop2For(loopi, loopj, loopk, bi, ri)
                     {
                        rp[ri] = bp[bi];
                     }
                  hypre_BoxLoop2End(bi, ri);
               }
         }
         break;

         case 1:
         {
            hypre_FinalizeIndtComputations(comm_handle);
            compute_box_aa = hypre_ComputePkgDeptBoxes(compute_pkg);
         }
         break;
      }

      /*--------------------------------------------------------------------
       * Compute r -= A*x
       *--------------------------------------------------------------------*/

      hypre_ForBoxArrayI(i, compute_box_aa)
         {
            compute_box_a = hypre_BoxArrayArrayBoxArray(compute_box_aa, i);

            A_data_box = hypre_BoxArrayBox(hypre_StructMatrixDataSpace(A), i);
            x_data_box = hypre_BoxArrayBox(hypre_StructVectorDataSpace(x), i);
            r_data_box = hypre_BoxArrayBox(hypre_StructVectorDataSpace(r), i);

            rp = hypre_StructVectorBoxData(r, i);

            /*--------------------------------------------------------------
             * Switch statement to direct control (based on stencil size) to
             * code to get pointers and offsets fo A and x.
             *--------------------------------------------------------------*/

            switch (stencil_size)
            {
               case 1:

               Ap0 = hypre_StructMatrixBoxData(A, i, 0);
               xp0 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[0]);

               break;

               case 3:

               Ap0 = hypre_StructMatrixBoxData(A, i, 0);
               Ap1 = hypre_StructMatrixBoxData(A, i, 1);
               Ap2 = hypre_StructMatrixBoxData(A, i, 2);

               xp0 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[0]);
               xp1 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[1]);
               xp2 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[2]);

               break;

               case 5:

               Ap0 = hypre_StructMatrixBoxData(A, i, 0);
               Ap1 = hypre_StructMatrixBoxData(A, i, 1);
               Ap2 = hypre_StructMatrixBoxData(A, i, 2);
               Ap3 = hypre_StructMatrixBoxData(A, i, 3);
               Ap4 = hypre_StructMatrixBoxData(A, i, 4);

               xp0 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[0]);
               xp1 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[1]);
               xp2 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[2]);
               xp3 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[3]);
               xp4 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[4]);

               break;

               case 7:

               Ap0 = hypre_StructMatrixBoxData(A, i, 0);
               Ap1 = hypre_StructMatrixBoxData(A, i, 1);
               Ap2 = hypre_StructMatrixBoxData(A, i, 2);
               Ap3 = hypre_StructMatrixBoxData(A, i, 3);
               Ap4 = hypre_StructMatrixBoxData(A, i, 4);
               Ap5 = hypre_StructMatrixBoxData(A, i, 5);
               Ap6 = hypre_StructMatrixBoxData(A, i, 6);

               xp0 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[0]);
               xp1 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[1]);
               xp2 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[2]);
               xp3 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[3]);
               xp4 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[4]);
               xp5 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[5]);
               xp6 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[6]);

               break;

               case 9:

               Ap0 = hypre_StructMatrixBoxData(A, i, 0);
               Ap1 = hypre_StructMatrixBoxData(A, i, 1);
               Ap2 = hypre_StructMatrixBoxData(A, i, 2);
               Ap3 = hypre_StructMatrixBoxData(A, i, 3);
               Ap4 = hypre_StructMatrixBoxData(A, i, 4);
               Ap5 = hypre_StructMatrixBoxData(A, i, 5);
               Ap6 = hypre_StructMatrixBoxData(A, i, 6);
               Ap7 = hypre_StructMatrixBoxData(A, i, 7);
               Ap8 = hypre_StructMatrixBoxData(A, i, 8);

               xp0 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[0]);
               xp1 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[1]);
               xp2 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[2]);
               xp3 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[3]);
               xp4 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[4]);
               xp5 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[5]);
               xp6 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[6]);
               xp7 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[7]);
               xp8 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[8]);

               break;

               case 15:

               Ap0 = hypre_StructMatrixBoxData(A, i, 0);
               Ap1 = hypre_StructMatrixBoxData(A, i, 1);
               Ap2 = hypre_StructMatrixBoxData(A, i, 2);
               Ap3 = hypre_StructMatrixBoxData(A, i, 3);
               Ap4 = hypre_StructMatrixBoxData(A, i, 4);
               Ap5 = hypre_StructMatrixBoxData(A, i, 5);
               Ap6 = hypre_StructMatrixBoxData(A, i, 6);
               Ap7 = hypre_StructMatrixBoxData(A, i, 7);
               Ap8 = hypre_StructMatrixBoxData(A, i, 8);
               Ap9 = hypre_StructMatrixBoxData(A, i, 9);
               Ap10 = hypre_StructMatrixBoxData(A, i, 10);
               Ap11 = hypre_StructMatrixBoxData(A, i, 11);
               Ap12 = hypre_StructMatrixBoxData(A, i, 12);
               Ap13 = hypre_StructMatrixBoxData(A, i, 13);
               Ap14 = hypre_StructMatrixBoxData(A, i, 14);

               xp0 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[0]);
               xp1 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[1]);
               xp2 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[2]);
               xp3 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[3]);
               xp4 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[4]);
               xp5 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[5]);
               xp6 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[6]);
               xp7 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[7]);
               xp8 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[8]);
               xp9 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[9]);
               xp10 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[10]);
               xp11 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[11]);
               xp12 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[12]);
               xp13 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[13]);
               xp14 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[14]);

               break;

               case 19:

               Ap0 = hypre_StructMatrixBoxData(A, i, 0);
               Ap1 = hypre_StructMatrixBoxData(A, i, 1);
               Ap2 = hypre_StructMatrixBoxData(A, i, 2);
               Ap3 = hypre_StructMatrixBoxData(A, i, 3);
               Ap4 = hypre_StructMatrixBoxData(A, i, 4);
               Ap5 = hypre_StructMatrixBoxData(A, i, 5);
               Ap6 = hypre_StructMatrixBoxData(A, i, 6);
               Ap7 = hypre_StructMatrixBoxData(A, i, 7);
               Ap8 = hypre_StructMatrixBoxData(A, i, 8);
               Ap9 = hypre_StructMatrixBoxData(A, i, 9);
               Ap10 = hypre_StructMatrixBoxData(A, i, 10);
               Ap11 = hypre_StructMatrixBoxData(A, i, 11);
               Ap12 = hypre_StructMatrixBoxData(A, i, 12);
               Ap13 = hypre_StructMatrixBoxData(A, i, 13);
               Ap14 = hypre_StructMatrixBoxData(A, i, 14);
               Ap15 = hypre_StructMatrixBoxData(A, i, 15);
               Ap16 = hypre_StructMatrixBoxData(A, i, 16);
               Ap17 = hypre_StructMatrixBoxData(A, i, 17);
               Ap18 = hypre_StructMatrixBoxData(A, i, 18);

               xp0 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[0]);
               xp1 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[1]);
               xp2 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[2]);
               xp3 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[3]);
               xp4 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[4]);
               xp5 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[5]);
               xp6 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[6]);
               xp7 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[7]);
               xp8 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[8]);
               xp9 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[9]);
               xp10 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[10]);
               xp11 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[11]);
               xp12 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[12]);
               xp13 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[13]);
               xp14 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[14]);
               xp15 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[15]);
               xp16 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[16]);
               xp17 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[17]);
               xp18 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[18]);

               break;

               case 27:

               Ap0 = hypre_StructMatrixBoxData(A, i, 0);
               Ap1 = hypre_StructMatrixBoxData(A, i, 1);
               Ap2 = hypre_StructMatrixBoxData(A, i, 2);
               Ap3 = hypre_StructMatrixBoxData(A, i, 3);
               Ap4 = hypre_StructMatrixBoxData(A, i, 4);
               Ap5 = hypre_StructMatrixBoxData(A, i, 5);
               Ap6 = hypre_StructMatrixBoxData(A, i, 6);
               Ap7 = hypre_StructMatrixBoxData(A, i, 7);
               Ap8 = hypre_StructMatrixBoxData(A, i, 8);
               Ap9 = hypre_StructMatrixBoxData(A, i, 9);
               Ap10 = hypre_StructMatrixBoxData(A, i, 10);
               Ap11 = hypre_StructMatrixBoxData(A, i, 11);
               Ap12 = hypre_StructMatrixBoxData(A, i, 12);
               Ap13 = hypre_StructMatrixBoxData(A, i, 13);
               Ap14 = hypre_StructMatrixBoxData(A, i, 14);
               Ap15 = hypre_StructMatrixBoxData(A, i, 15);
               Ap16 = hypre_StructMatrixBoxData(A, i, 16);
               Ap17 = hypre_StructMatrixBoxData(A, i, 17);
               Ap18 = hypre_StructMatrixBoxData(A, i, 18);
               Ap19 = hypre_StructMatrixBoxData(A, i, 19);
               Ap20 = hypre_StructMatrixBoxData(A, i, 20);
               Ap21 = hypre_StructMatrixBoxData(A, i, 21);
               Ap22 = hypre_StructMatrixBoxData(A, i, 22);
               Ap23 = hypre_StructMatrixBoxData(A, i, 23);
               Ap24 = hypre_StructMatrixBoxData(A, i, 24);
               Ap25 = hypre_StructMatrixBoxData(A, i, 25);
               Ap26 = hypre_StructMatrixBoxData(A, i, 26);

               xp0 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[0]);
               xp1 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[1]);
               xp2 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[2]);
               xp3 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[3]);
               xp4 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[4]);
               xp5 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[5]);
               xp6 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[6]);
               xp7 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[7]);
               xp8 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[8]);
               xp9 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[9]);
               xp10 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[10]);
               xp11 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[11]);
               xp12 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[12]);
               xp13 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[13]);
               xp14 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[14]);
               xp15 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[15]);
               xp16 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[16]);
               xp17 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[17]);
               xp18 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[18]);
               xp19 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[19]);
               xp20 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[20]);
               xp21 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[21]);
               xp22 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[22]);
               xp23 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[23]);
               xp24 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[24]);
               xp25 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[25]);
               xp26 = hypre_StructVectorBoxData(x, i) +
                  hypre_BoxOffsetDistance(x_data_box, stencil_shape[26]);

               break;

               default:
               ;
            }

            hypre_ForBoxI(j, compute_box_a)
               {
                  compute_box = hypre_BoxArrayBox(compute_box_a, j);

                  start  = hypre_BoxIMin(compute_box);

                  /*------------------------------------------------------
                   * Switch statement to direct control to appropriate
                   * box loop depending on stencil size
                   *------------------------------------------------------*/

                  switch (stencil_size)
                  {

                     case 1:
   
                     hypre_BoxGetStrideSize(compute_box, base_stride, loop_size);
                     hypre_BoxLoop3Begin(loop_size,
                                         A_data_box, start, base_stride, Ai,
                                         x_data_box, start, base_stride, xi,
                                         r_data_box, start, base_stride, ri);
#define HYPRE_BOX_SMP_PRIVATE loopk,loopi,loopj,Ai,xi,ri
#include "hypre_box_smp_forloop.h"
                     hypre_BoxLoop3For(loopi, loopj, loopk, Ai, xi, ri)
                        {

                           rp[ri] = rp[ri]
                              - Ap0[Ai] * xp0[xi];

                        }
                     hypre_BoxLoop3End(Ai, xi, ri);

                     break;

                     case 3:

                     hypre_BoxGetStrideSize(compute_box, base_stride, loop_size);
                     hypre_BoxLoop3Begin(loop_size,
                                         A_data_box, start, base_stride, Ai,
                                         x_data_box, start, base_stride, xi,
                                         r_data_box, start, base_stride, ri);
#define HYPRE_BOX_SMP_PRIVATE loopk,loopi,loopj,Ai,xi,ri
#include "hypre_box_smp_forloop.h"
                     hypre_BoxLoop3For(loopi, loopj, loopk, Ai, xi, ri)
                        {
 
                           rp[ri] = rp[ri]
                              - Ap0[Ai] * xp0[xi]
                              - Ap1[Ai] * xp1[xi]
                              - Ap2[Ai] * xp2[xi];

                        }
Ejemplo n.º 10
0
int
hypre_SemiRestrict( void               *restrict_vdata,
                    hypre_StructMatrix *R,
                    hypre_StructVector *r,
                    hypre_StructVector *rc             )
{
   int ierr = 0;

   hypre_SemiRestrictData *restrict_data = (hypre_SemiRestrictData *)restrict_vdata;

   int                     R_stored_as_transpose;
   hypre_ComputePkg       *compute_pkg;
   hypre_IndexRef          cindex;
   hypre_IndexRef          stride;

   hypre_StructGrid       *fgrid;
   int                    *fgrid_ids;
   hypre_StructGrid       *cgrid;
   hypre_BoxArray         *cgrid_boxes;
   int                    *cgrid_ids;

   hypre_CommHandle       *comm_handle;
                       
   hypre_BoxArrayArray    *compute_box_aa;
   hypre_BoxArray         *compute_box_a;
   hypre_Box              *compute_box;
                       
   hypre_Box              *R_dbox;
   hypre_Box              *r_dbox;
   hypre_Box              *rc_dbox;
                       
   int                     Ri;
   int                     ri;
   int                     rci;
                         
   double                 *Rp0, *Rp1;
   double                 *rp, *rp0, *rp1;
   double                 *rcp;
                       
   hypre_Index             loop_size;
   hypre_IndexRef          start;
   hypre_Index             startc;
   hypre_Index             stridec;
                       
   hypre_StructStencil    *stencil;
   hypre_Index            *stencil_shape;

   int                     compute_i, fi, ci, j;
   int                     loopi, loopj, loopk;

   /*-----------------------------------------------------------------------
    * Initialize some things.
    *-----------------------------------------------------------------------*/

   hypre_BeginTiming(restrict_data -> time_index);

   R_stored_as_transpose = (restrict_data -> R_stored_as_transpose);
   compute_pkg   = (restrict_data -> compute_pkg);
   cindex        = (restrict_data -> cindex);
   stride        = (restrict_data -> stride);

   stencil       = hypre_StructMatrixStencil(R);
   stencil_shape = hypre_StructStencilShape(stencil);

   hypre_SetIndex(stridec, 1, 1, 1);

   /*--------------------------------------------------------------------
    * Restrict the residual.
    *--------------------------------------------------------------------*/

   fgrid = hypre_StructVectorGrid(r);
   fgrid_ids = hypre_StructGridIDs(fgrid);
   cgrid = hypre_StructVectorGrid(rc);
   cgrid_boxes = hypre_StructGridBoxes(cgrid);
   cgrid_ids = hypre_StructGridIDs(cgrid);

   for (compute_i = 0; compute_i < 2; compute_i++)
   {
      switch(compute_i)
      {
         case 0:
         {
            rp = hypre_StructVectorData(r);
            hypre_InitializeIndtComputations(compute_pkg, rp, &comm_handle);
            compute_box_aa = hypre_ComputePkgIndtBoxes(compute_pkg);
         }
         break;

         case 1:
         {
            hypre_FinalizeIndtComputations(comm_handle);
            compute_box_aa = hypre_ComputePkgDeptBoxes(compute_pkg);
         }
         break;
      }

      fi = 0;
      hypre_ForBoxArrayI(ci, cgrid_boxes)
         {
            while (fgrid_ids[fi] != cgrid_ids[ci])
            {
               fi++;
            }

            compute_box_a = hypre_BoxArrayArrayBoxArray(compute_box_aa, fi);

            R_dbox  = hypre_BoxArrayBox(hypre_StructMatrixDataSpace(R),  fi);
            r_dbox  = hypre_BoxArrayBox(hypre_StructVectorDataSpace(r),  fi);
            rc_dbox = hypre_BoxArrayBox(hypre_StructVectorDataSpace(rc), ci);

            if (R_stored_as_transpose)
            {
               Rp0 = hypre_StructMatrixBoxData(R, fi, 1) -
                  hypre_BoxOffsetDistance(R_dbox, stencil_shape[1]);
               Rp1 = hypre_StructMatrixBoxData(R, fi, 0);
            }
            else
            {
               Rp0 = hypre_StructMatrixBoxData(R, fi, 0);
               Rp1 = hypre_StructMatrixBoxData(R, fi, 1);
            }
            rp  = hypre_StructVectorBoxData(r, fi);
            rp0 = rp + hypre_BoxOffsetDistance(r_dbox, stencil_shape[0]);
            rp1 = rp + hypre_BoxOffsetDistance(r_dbox, stencil_shape[1]);
            rcp = hypre_StructVectorBoxData(rc, ci);

            hypre_ForBoxI(j, compute_box_a)
               {
                  compute_box = hypre_BoxArrayBox(compute_box_a, j);

                  start  = hypre_BoxIMin(compute_box);
                  hypre_StructMapFineToCoarse(start, cindex, stride, startc);

                  hypre_BoxGetStrideSize(compute_box, stride, loop_size);
                  hypre_BoxLoop3Begin(loop_size,
                                      R_dbox,  startc, stridec, Ri,
                                      r_dbox,  start,  stride,  ri,
                                      rc_dbox, startc, stridec, rci);
#define HYPRE_BOX_SMP_PRIVATE loopk,loopi,loopj,Ri,ri,rci
#include "hypre_box_smp_forloop.h"
                  hypre_BoxLoop3For(loopi, loopj, loopk, Ri, ri, rci)
                     {
                        rcp[rci] = rp[ri] + (Rp0[Ri] * rp0[ri] +
                                             Rp1[Ri] * rp1[ri]);
                     }
                  hypre_BoxLoop3End(Ri, ri, rci);
               }
         }