void
hypre_F90_IFACE(hypre_structstencilcreate, HYPRE_STRUCTSTENCILCREATE)
   ( hypre_F90_Int *dim,
     hypre_F90_Int *size,
     hypre_F90_Obj *stencil,
     hypre_F90_Int *ierr    )
{
   *ierr = (hypre_F90_Int) HYPRE_StructStencilCreate(
      hypre_F90_PassInt (dim),
      hypre_F90_PassInt (size),
      hypre_F90_PassObjRef (HYPRE_StructStencil, stencil) );
}
Beispiel #2
0
double *solve(double *Ab, int solver_id, struct parms parms)
{
    int i, j;
    double final_res_norm;
    int time_index, n_pre, n_post, num_iterations;
    n_pre  = 1; n_post = 1;
    double *A_val, *b_val;
    A_val = (double *) calloc(parms.N*parms.nsten, sizeof(double));
    b_val = (double *) calloc(parms.N, sizeof(double));
                    
    for (i = 0; i < (parms.N*parms.nsten); i++){
        A_val[i] = Ab[i];
    }
    for (i = 0; i < parms.N; i++){
        b_val[i] = Ab[i+parms.N*parms.nsten];
    }

    // HYPRE //
    HYPRE_StructGrid     grid;
    HYPRE_StructStencil  stencil;
    HYPRE_StructMatrix   A;
    HYPRE_StructVector   b;
    HYPRE_StructVector   x;
    HYPRE_StructSolver   solver;
    HYPRE_StructSolver   precond;

#if Dim == 2
    HYPRE_Int ilower[2] = {parms.x0, parms.y0};
    HYPRE_Int iupper[2] = {parms.x1, parms.y1};
#endif

#if Dim == 3
    HYPRE_Int ilower[3] = {parms.x0, parms.y0, 0};
    HYPRE_Int iupper[3] = {parms.x1, parms.y1, parms.Nz-1};
#endif
    {
    // Create an empty 2D grid object
        HYPRE_StructGridCreate(MPI_COMM_WORLD, Dim, &grid);

    // Add a new box to the grid
        HYPRE_StructGridSetExtents(grid, ilower, iupper);

    // 1. Set up periodic boundary condition in y-direction and create the grid 
        int pset[3]; 
        pset[0] = 0; pset[1] = parms.Ny; pset[2] = 0;
#if Dim == 3
        pset[2] = parms.Nz;
#endif
    //HYPRE_StructGridSetNumGhost(grid,pset)
        HYPRE_StructGridSetPeriodic(grid, pset);
        HYPRE_StructGridAssemble(grid);
    }

    // 2. Define the discretization stencil
    {
        if (Dim == 2){

        // Create an empty 2D, 5-pt stencil object
            HYPRE_StructStencilCreate(2, parms.nsten, &stencil);

        // Define the geometry of the stencil
            {
                int offsets[5][2] = {{0,0}, {-1,0}, {0,-1}, {0,1}, {1,0}};
                for (i = 0; i < parms.nsten; i++)
                    HYPRE_StructStencilSetElement(stencil, i, offsets[i]);
            }
        }
        else
        {
            HYPRE_StructStencilCreate(3, parms.nsten, &stencil);

            // Define the geometry of the 3D stencil
            {
                int offsets[7][3] = {{0,0,0}, {-1,0,0}, {0,-1,0}, {0,1,0}, {1,0,0}, {0,0,-1}, {0,0,1}};

                for (i = 0; i < parms.nsten; i++)
                    HYPRE_StructStencilSetElement(stencil, i, offsets[i]);
            }
        }
    }
    // 3. Set up a Struct Matrix A from Aval
    {
        HYPRE_Int stencil_indices[parms.nsten];

        // Create an empty matrix object
        HYPRE_StructMatrixCreate(MPI_COMM_WORLD, grid, stencil, &A);

        // Indicate that the matrix coefficients are ready to be set
        HYPRE_StructMatrixInitialize(A);

        for (j = 0; j < parms.nsten; j++)
            stencil_indices[j] = j;

        HYPRE_StructMatrixSetBoxValues(A, ilower, iupper, parms.nsten, stencil_indices, A_val);

        free(A_val);
    }

    // 4. Set up Struct Vectors for b from b_val and set x = 0
    {
        double *values;

        HYPRE_StructVectorCreate(MPI_COMM_WORLD, grid, &b);
        HYPRE_StructVectorCreate(MPI_COMM_WORLD, grid, &x);

        HYPRE_StructVectorInitialize(b);
        HYPRE_StructVectorInitialize(x);

        values = calloc((parms.N), sizeof(double));

        for (i = 0; i < (parms.N); i++)
            values[i] = 0.0;
        HYPRE_StructVectorSetBoxValues(x, ilower, iupper, values);
        HYPRE_StructVectorSetBoxValues(b, ilower, iupper, b_val);

        free(b_val);
        free(values);
    }

    //Finalize the vector and matrix assembly

    HYPRE_StructMatrixAssemble(A);
    HYPRE_StructVectorAssemble(b);
    HYPRE_StructVectorAssemble(x);
#if DEBUG == 3
    HYPRE_StructMatrixPrint("./poisson.matrix", A, 0);
    HYPRE_StructVectorPrint("./poisson.rhs", b, 0);
    /*char fname[64];
    char Aname[64], bname[64];
    sprintf(Aname,"data/A%d.",parms.cyc);
    sprintf(bname,"data/b%d.",parms.cyc);
    filename(fname, Aname, parms.wkdir, parms);
    HYPRE_StructMatrixPrint(fname, A, 0);
    filename(fname, bname, parms.wkdir, parms);
    HYPRE_StructVectorPrint(fname, b, 0);*/
#endif

    // 6. Set up and use a solver (SMG)
    if (solver_id == 0)
    {
        time_index = hypre_InitializeTiming("SMG Setup");
        hypre_BeginTiming(time_index);
        HYPRE_StructSMGCreate(MPI_COMM_WORLD, &solver);
        HYPRE_StructSMGSetMemoryUse(solver, 0);
        HYPRE_StructSMGSetMaxIter(solver, 100);
        HYPRE_StructSMGSetTol(solver, 1.0e-12);
        HYPRE_StructSMGSetRelChange(solver, 0);
        HYPRE_StructSMGSetNumPreRelax(solver, n_pre);
        HYPRE_StructSMGSetNumPostRelax(solver, n_post);
        // Logging must be on to get iterations and residual norm info below
        HYPRE_StructSMGSetLogging(solver, 1);

        // Setup and print setup timings
        HYPRE_StructSMGSetup(solver, A, b, x);
        hypre_EndTiming(time_index);
#if DEBUG == 3
        hypre_PrintTiming("Setup phase times", MPI_COMM_WORLD);
#endif
        hypre_FinalizeTiming(time_index);
        hypre_ClearTiming();

        // Solve and print solve timings
        time_index = hypre_InitializeTiming("SMG Solve");
        hypre_BeginTiming(time_index);
        HYPRE_StructSMGSolve(solver, A, b, x);
        hypre_EndTiming(time_index);
#if DEBUG == 3
        hypre_PrintTiming("Solve phase times", MPI_COMM_WORLD);
#endif
        hypre_FinalizeTiming(time_index);
        hypre_ClearTiming();

        // Get some info on the run
        HYPRE_StructSMGGetNumIterations(solver, &num_iterations);
        HYPRE_StructSMGGetFinalRelativeResidualNorm(solver, &final_res_norm);
#if DEBUG == 2
        if (parms.rank == 0){
            fprintf(stdout, "Number of Iterations = %4d ; Final Relative Residual Norm = %e\n\n", num_iterations, final_res_norm);
        }
#endif
        // Clean up 
        HYPRE_StructSMGDestroy(solver);
    }

    // 6. Set up and use a solver (PCG) with SMG Preconditioner
    if (solver_id == 1)
    {
        HYPRE_StructPCGCreate(MPI_COMM_WORLD, &solver);
        //HYPRE_StructPCGSetMemoryUse(solver, 0);
        HYPRE_StructPCGSetMaxIter(solver, 100);
        HYPRE_StructPCGSetTol(solver, 1.0e-12);
        HYPRE_StructPCGSetTwoNorm(solver, 1);
        HYPRE_StructPCGSetRelChange(solver, 0);
        //HYPRE_StructPCGSetPrintLevel(solver, 2 ); /* print each CG iteration */
        HYPRE_StructPCGSetLogging(solver, 1);
       
        /* Use symmetric SMG as preconditioner */
        HYPRE_StructSMGCreate(MPI_COMM_WORLD, &precond);
        HYPRE_StructSMGSetMemoryUse(precond, 0);
        HYPRE_StructSMGSetMaxIter(precond, 32);
        HYPRE_StructSMGSetTol(precond, 0.0);
        HYPRE_StructSMGSetZeroGuess(precond);
        HYPRE_StructSMGSetNumPreRelax(precond, 1);
        HYPRE_StructSMGSetNumPostRelax(precond, 1);
 
        /* Set the preconditioner and solve */
        HYPRE_StructPCGSetPrecond(solver, HYPRE_StructSMGSolve, HYPRE_StructSMGSetup, precond);
        HYPRE_StructPCGSetup(solver, A, b, x);
        HYPRE_StructPCGSolve(solver, A, b, x);
 
        /* Get some info on the run */
        HYPRE_StructPCGGetNumIterations(solver, &num_iterations);
        HYPRE_StructPCGGetFinalRelativeResidualNorm(solver, &final_res_norm);
#if DEBUG == 2
        if (parms.rank == 0){
            fprintf(stdout, "Number of Iterations = %4d ; Final Relative Residual Norm = %e\n\n", num_iterations, final_res_norm);
        }
#endif

        /* Clean up */
        HYPRE_StructSMGDestroy(precond);
        HYPRE_StructPCGDestroy(solver);
    }

    // get the local solution
    double *values = calloc(parms.N, sizeof(double));
    HYPRE_StructVectorGetBoxValues(x, ilower, iupper, values);

    // Free memory
    HYPRE_StructGridDestroy(grid);
    HYPRE_StructStencilDestroy(stencil);
    HYPRE_StructMatrixDestroy(A);
    HYPRE_StructVectorDestroy(b);
    HYPRE_StructVectorDestroy(x);
    free(Ab);
    return(values);
}
Beispiel #3
0
int main (int argc, char *argv[])
{
   int i, j, myid, num_procs;

   int vis = 0;

   HYPRE_StructGrid     grid;
   HYPRE_StructStencil  stencil;
   HYPRE_StructMatrix   A;
   HYPRE_StructVector   b;
   HYPRE_StructVector   x;
   HYPRE_StructSolver   solver;

   /* Initialize MPI */
   MPI_Init(&argc, &argv);
   MPI_Comm_rank(MPI_COMM_WORLD, &myid);
   MPI_Comm_size(MPI_COMM_WORLD, &num_procs);

   if (num_procs != 2)
   {
      if (myid == 0) printf("Must run with 2 processors!\n");
      MPI_Finalize();

      return(0);
   }

   /* Parse command line */
   {
      int arg_index = 0;
      int print_usage = 0;

      while (arg_index < argc)
      {
         if ( strcmp(argv[arg_index], "-vis") == 0 )
         {
            arg_index++;
            vis = 1;
         }
         else if ( strcmp(argv[arg_index], "-help") == 0 )
         {
            print_usage = 1;
            break;
         }
         else
         {
            arg_index++;
         }
      }

      if ((print_usage) && (myid == 0))
      {
         printf("\n");
         printf("Usage: %s [<options>]\n", argv[0]);
         printf("\n");
         printf("  -vis : save the solution for GLVis visualization\n");
         printf("\n");
      }

      if (print_usage)
      {
         MPI_Finalize();
         return (0);
      }
   }

   /* 1. Set up a grid. Each processor describes the piece
      of the grid that it owns. */
   {
      /* Create an empty 2D grid object */
      HYPRE_StructGridCreate(MPI_COMM_WORLD, 2, &grid);

      /* Add boxes to the grid */
      if (myid == 0)
      {
         int ilower[2]={-3,1}, iupper[2]={-1,2};
         HYPRE_StructGridSetExtents(grid, ilower, iupper);
      }
      else if (myid == 1)
      {
         int ilower[2]={0,1}, iupper[2]={2,4};
         HYPRE_StructGridSetExtents(grid, ilower, iupper);
      }

      /* This is a collective call finalizing the grid assembly.
         The grid is now ``ready to be used'' */
      HYPRE_StructGridAssemble(grid);
   }

   /* 2. Define the discretization stencil */
   {
      /* Create an empty 2D, 5-pt stencil object */
      HYPRE_StructStencilCreate(2, 5, &stencil);

      /* Define the geometry of the stencil. Each represents a
         relative offset (in the index space). */
      {
         int entry;
         int offsets[5][2] = {{0,0}, {-1,0}, {1,0}, {0,-1}, {0,1}};

         /* Assign each of the 5 stencil entries */
         for (entry = 0; entry < 5; entry++)
            HYPRE_StructStencilSetElement(stencil, entry, offsets[entry]);
      }
   }

   /* 3. Set up a Struct Matrix */
   {
      /* Create an empty matrix object */
      HYPRE_StructMatrixCreate(MPI_COMM_WORLD, grid, stencil, &A);

      /* Indicate that the matrix coefficients are ready to be set */
      HYPRE_StructMatrixInitialize(A);

      /* Set the matrix coefficients.  Each processor assigns coefficients
         for the boxes in the grid that it owns. Note that the coefficients
         associated with each stencil entry may vary from grid point to grid
         point if desired.  Here, we first set the same stencil entries for
         each grid point.  Then we make modifications to grid points near
         the boundary. */
      if (myid == 0)
      {
         int ilower[2]={-3,1}, iupper[2]={-1,2};
         int stencil_indices[5] = {0,1,2,3,4}; /* labels for the stencil entries -
                                                  these correspond to the offsets
                                                  defined above */
         int nentries = 5;
         int nvalues  = 30; /* 6 grid points, each with 5 stencil entries */
         double values[30];

         /* We have 6 grid points, each with 5 stencil entries */
         for (i = 0; i < nvalues; i += nentries)
         {
            values[i] = 4.0;
            for (j = 1; j < nentries; j++)
               values[i+j] = -1.0;
         }

         HYPRE_StructMatrixSetBoxValues(A, ilower, iupper, nentries,
                                        stencil_indices, values);
      }
      else if (myid == 1)
      {
         int ilower[2]={0,1}, iupper[2]={2,4};
         int stencil_indices[5] = {0,1,2,3,4};
         int nentries = 5;
         int nvalues  = 60; /* 12 grid points, each with 5 stencil entries */
         double values[60];

         for (i = 0; i < nvalues; i += nentries)
         {
            values[i] = 4.0;
            for (j = 1; j < nentries; j++)
               values[i+j] = -1.0;
         }

         HYPRE_StructMatrixSetBoxValues(A, ilower, iupper, nentries,
                                        stencil_indices, values);
      }

      /* Set the coefficients reaching outside of the boundary to 0 */
      if (myid == 0)
      {
         double values[3];
         for (i = 0; i < 3; i++)
            values[i] = 0.0;
         {
            /* values below our box */
            int ilower[2]={-3,1}, iupper[2]={-1,1};
            int stencil_indices[1] = {3};
            HYPRE_StructMatrixSetBoxValues(A, ilower, iupper, 1,
                                           stencil_indices, values);
         }
         {
            /* values to the left of our box */
            int ilower[2]={-3,1}, iupper[2]={-3,2};
            int stencil_indices[1] = {1};
            HYPRE_StructMatrixSetBoxValues(A, ilower, iupper, 1,
                                           stencil_indices, values);
         }
         {
            /* values above our box */
            int ilower[2]={-3,2}, iupper[2]={-1,2};
            int stencil_indices[1] = {4};
            HYPRE_StructMatrixSetBoxValues(A, ilower, iupper, 1,
                                           stencil_indices, values);
         }
      }
      else if (myid == 1)
      {
         double values[4];
         for (i = 0; i < 4; i++)
            values[i] = 0.0;
         {
            /* values below our box */
            int ilower[2]={0,1}, iupper[2]={2,1};
            int stencil_indices[1] = {3};
            HYPRE_StructMatrixSetBoxValues(A, ilower, iupper, 1,
                                           stencil_indices, values);
         }
         {
            /* values to the right of our box */
            int ilower[2]={2,1}, iupper[2]={2,4};
            int stencil_indices[1] = {2};
            HYPRE_StructMatrixSetBoxValues(A, ilower, iupper, 1,
                                           stencil_indices, values);
         }
         {
            /* values above our box */
            int ilower[2]={0,4}, iupper[2]={2,4};
            int stencil_indices[1] = {4};
            HYPRE_StructMatrixSetBoxValues(A, ilower, iupper, 1,
                                           stencil_indices, values);
         }
         {
            /* values to the left of our box
               (that do not border the other box on proc. 0) */
            int ilower[2]={0,3}, iupper[2]={0,4};
            int stencil_indices[1] = {1};
            HYPRE_StructMatrixSetBoxValues(A, ilower, iupper, 1,
                                           stencil_indices, values);
         }
      }

      /* This is a collective call finalizing the matrix assembly.
         The matrix is now ``ready to be used'' */
      HYPRE_StructMatrixAssemble(A);
   }

   /* 4. Set up Struct Vectors for b and x.  Each processor sets the vectors
      corresponding to its boxes. */
   {
      /* Create an empty vector object */
      HYPRE_StructVectorCreate(MPI_COMM_WORLD, grid, &b);
      HYPRE_StructVectorCreate(MPI_COMM_WORLD, grid, &x);

      /* Indicate that the vector coefficients are ready to be set */
      HYPRE_StructVectorInitialize(b);
      HYPRE_StructVectorInitialize(x);

      /* Set the vector coefficients */
      if (myid == 0)
      {
         int ilower[2]={-3,1}, iupper[2]={-1,2};
         double values[6]; /* 6 grid points */

         for (i = 0; i < 6; i ++)
            values[i] = 1.0;
         HYPRE_StructVectorSetBoxValues(b, ilower, iupper, values);

         for (i = 0; i < 6; i ++)
            values[i] = 0.0;
         HYPRE_StructVectorSetBoxValues(x, ilower, iupper, values);
      }
      else if (myid == 1)
      {
         int ilower[2]={0,1}, iupper[2]={2,4};
         double values[12]; /* 12 grid points */

         for (i = 0; i < 12; i ++)
            values[i] = 1.0;
         HYPRE_StructVectorSetBoxValues(b, ilower, iupper, values);

         for (i = 0; i < 12; i ++)
            values[i] = 0.0;
         HYPRE_StructVectorSetBoxValues(x, ilower, iupper, values);
      }

      /* This is a collective call finalizing the vector assembly.
         The vectors are now ``ready to be used'' */
      HYPRE_StructVectorAssemble(b);
      HYPRE_StructVectorAssemble(x);
   }

   /* 5. Set up and use a solver (See the Reference Manual for descriptions
      of all of the options.) */
   {
      /* Create an empty PCG Struct solver */
      HYPRE_StructPCGCreate(MPI_COMM_WORLD, &solver);

      /* Set some parameters */
      HYPRE_StructPCGSetTol(solver, 1.0e-06); /* convergence tolerance */
      HYPRE_StructPCGSetPrintLevel(solver, 2); /* amount of info. printed */

      /* Setup and solve */
      HYPRE_StructPCGSetup(solver, A, b, x);
      HYPRE_StructPCGSolve(solver, A, b, x);
   }

   /* Save the solution for GLVis visualization, see vis/glvis-ex1.sh */
   if (vis)
   {
      GLVis_PrintStructGrid(grid, "vis/ex1.mesh", myid, NULL, NULL);
      GLVis_PrintStructVector(x, "vis/ex1.sol", myid);
      GLVis_PrintData("vis/ex1.data", myid, num_procs);
   }

   /* Free memory */
   HYPRE_StructGridDestroy(grid);
   HYPRE_StructStencilDestroy(stencil);
   HYPRE_StructMatrixDestroy(A);
   HYPRE_StructVectorDestroy(b);
   HYPRE_StructVectorDestroy(x);
   HYPRE_StructPCGDestroy(solver);

   /* Finalize MPI */
   MPI_Finalize();

   return (0);
}
void
CCDivGradHypreLevelSolver::allocateHypreData()
{
    // Get the MPI communicator.
#ifdef HAVE_MPI
    MPI_Comm communicator = SAMRAI_MPI::getCommunicator();
#else
    MPI_Comm communicator;
#endif

    // Setup the hypre grid.
    Pointer<PatchLevel<NDIM> > level = d_hierarchy->getPatchLevel(d_level_num);
    Pointer<CartesianGridGeometry<NDIM> > grid_geometry = d_hierarchy->getGridGeometry();
    const IntVector<NDIM>& ratio = level->getRatio();
    const IntVector<NDIM>& periodic_shift = grid_geometry->getPeriodicShift(ratio);
#ifdef DEBUG_CHECK_ASSERTIONS
    TBOX_ASSERT(periodic_shift.min() > 0);
#endif

    HYPRE_StructGridCreate(communicator, NDIM, &d_grid);
    for (PatchLevel<NDIM>::Iterator p(level); p; p++)
    {
        const Box<NDIM> patch_box = Box<NDIM>::coarsen(level->getPatch(p())->getBox(),2);
        Index<NDIM> lower = patch_box.lower();
        Index<NDIM> upper = patch_box.upper();
        HYPRE_StructGridSetExtents(d_grid, lower, upper);
    }

    int hypre_periodic_shift[3];
    for (unsigned int d = 0; d < NDIM; ++d)
    {
        hypre_periodic_shift[d] = periodic_shift(d)/2;
    }
    for (int d = NDIM; d < 3; ++d)
    {
        hypre_periodic_shift[d] = 0;
    }
    HYPRE_StructGridSetPeriodic(d_grid, hypre_periodic_shift);
    HYPRE_StructGridAssemble(d_grid);

    // Allocate stencil data and set stencil offsets.
    static const int stencil_sz = 2*NDIM+1;
#if (NDIM == 2)
    int stencil_offsets[stencil_sz][2] = {
        { -1, 0 }, { 0, -1}, { +1, 0}, { 0, +1 }, { 0, 0 }
    };
#endif
#if (NDIM == 3)
    int stencil_offsets[stencil_sz][3] = {
        { -1,  0,  0}, { 0,  -1,  0}, { 0,  0,  -1},
        { +1,  0,  0}, { 0,  +1,  0}, { 0,  0,  +1},
        { 0,  0,  0}
    };
#endif
    HYPRE_StructStencilCreate(NDIM, stencil_sz, &d_stencil);
    for (int s = 0; s < stencil_sz; ++s)
    {
        HYPRE_StructStencilSetElement(d_stencil, s, stencil_offsets[s]);
    }

    // Allocate the hypre matrix.
#if (NDIM == 2)
    int full_ghosts[2*3] = { 1, 1, 1, 1, 0, 0 };
#endif
#if (NDIM == 3)
    int full_ghosts[2*3] = { 1, 1, 1, 1, 1, 1 };
#endif
    int   no_ghosts[2*3] = { 0, 0, 0, 0, 0, 0 };

    HYPRE_StructMatrixCreate(communicator, d_grid, d_stencil, &d_matrix);
    HYPRE_StructMatrixSetNumGhost(d_matrix, full_ghosts);
    HYPRE_StructMatrixSetSymmetric(d_matrix, 0);
    HYPRE_StructMatrixInitialize(d_matrix);

    // Allocate the hypre vectors.
    HYPRE_StructVectorCreate(communicator, d_grid, &d_sol_vec);
    HYPRE_StructVectorSetNumGhost(d_sol_vec, full_ghosts);
    HYPRE_StructVectorInitialize(d_sol_vec);

    HYPRE_StructVectorCreate(communicator, d_grid, &d_rhs_vec);
    HYPRE_StructVectorSetNumGhost(d_rhs_vec, no_ghosts);
    HYPRE_StructVectorInitialize(d_rhs_vec);
    return;
}// allocateHypreData
Beispiel #5
0
int main(int argc, char **argv) {
  int nmax, nprocs, me, me_plus;
  int g_a_data, g_a_i, g_a_j, isize;
  int gt_a_data, gt_a_i, gt_a_j;
  int g_b, g_c;
  int i, j, jj, k, one, jcnt;
  int chunk, kp1, ld;
  int *p_i, *p_j;
  double *p_data, *p_b, *p_c;
  double t_beg, t_beg2, t_ga_tot, t_get, t_mult, t_cnstrct, t_mpi_in, t_ga_in;
  double t_hypre_strct, t_ga_trans, t_gp_get;
  double t_get_blk_csr, t_trans_blk_csr, t_trans_blk, t_create_csr_ga, t_beg3;
  double t_gp_tget, t_gp_malloc, t_gp_assign, t_beg4;
  double prdot, dotga, dothypre, tempc;
  double prtot, gatot, hypretot, gatot2, hypretot2;
  double prdot2, prtot2;
  int status;
  int idim, jdim, kdim, idum, memsize;
  int lsize, ntot;
  int heap=200000, fudge=100, stack=200000, ma_heap;
  double *cbuf, *vector;
  int pdi, pdj, pdk, ip, jp, kp, ncells;
  int lo[3],hi[3];
  int blo[3], bhi[3];
  int ld_a, ld_b, ld_c, ld_i, ld_j, irows, ioff, joff, total_procs;
  int iproc, iblock, btot;
  double *amat, *bvec;
  int *ivec, *jvec;
  int *proclist, *proc_inv, *icnt;
  int *voffset, *offset, *mapc;
  int iloop, lo_bl, hi_bl;
  char *buf, **buf_ptr;
  int *iparams, *jval, *ival;
  double *rval, *rvalt;
  int imin, imax, jmin, jmax, irow, icol, nnz;
  int nrows, kmin, kmax, lmin, lmax, jdx;
  int LOOPNUM = 100;
  void **blk_ptr;
  void *blk;
  int blk_size, tsize, zero;
  int *iblk, *jblk, *blkidx;
  int *tblk_ptr;
  int *ivalt, *jvalt, *iparamst;
  int *iblk_t, *jblk_t, *blkidx_t;
/*
   Hypre declarations
*/
  int ierr;
#if USE_HYPRE
  HYPRE_StructGrid grid;
  HYPRE_StructStencil stencil;
  HYPRE_StructMatrix matrix;
  HYPRE_StructVector vec_x, vec_y;
  int i4, j4, ndim, nelems, offsets[7][3];
  int stencil_indices[7], hlo[3], hhi[3];
  double weights[7];
  double *values;
  double alpha, beta;
  int *rows, *cols;
#endif
/*
  ***  Intitialize a message passing library
*/
  zero = 0;
  one = 1;
  ierr = MPI_Init(&argc, &argv);
/*
 ***  Initialize GA
 
      There are 2 choices: ga_initialize or ga_initialize_ltd.
      In the first case, there is no explicit limit on memory usage.
      In the second, user can set limit (per processor) in bytes.
*/
  t_beg = GA_Wtime();
  NGA_Initialize();
  GP_Initialize();
  t_ga_in = GA_Wtime() - t_beg;
  NGA_Dgop(&t_ga_in,one,"+");

  t_ga_tot = 0.0;
  t_ga_trans = 0.0;
  t_get_blk_csr = 0.0;
  t_create_csr_ga = 0.0;
  t_trans_blk_csr = 0.0;
  t_trans_blk = 0.0;
  t_gp_get = 0.0;
  t_gp_malloc = 0.0;
  t_gp_assign = 0.0;
  t_mult = 0.0;
  t_get = 0.0;
  t_gp_tget = 0.0;
  t_hypre_strct = 0.0;
  prtot = 0.0;
  prtot2 = 0.0;
  gatot = 0.0;
  hypretot = 0.0;

  me = NGA_Nodeid();
  me_plus = me + 1;
  nprocs = NGA_Nnodes();
  if (me == 0) {
   printf("Time to initialize GA:                                 %12.4f\n",
          t_ga_in/((double)nprocs));
  }
/*
     we can also use GA_set_memory_limit BEFORE first ga_create call
*/
  ma_heap = heap + fudge;
/*      call GA_set_memory_limit(util_mdtob(ma_heap)) */
 
  if (me == 0) {
    printf("\nNumber of cores used: %d\n\nGA initialized\n\n",nprocs);
  }
/*
 ***  Initialize the MA package
      MA must be initialized before any global array is allocated
*/
  if (!MA_init(MT_DBL, stack, ma_heap)) NGA_Error("ma_init failed",-1);
/*
     create a sparse LMAX x LMAX matrix and two vectors of length
     LMAX. The matrix is stored in compressed row format.
     One of the vectors is filled with random data and the other
     is filled with zeros.
*/
  idim = IMAX;
  jdim = JMAX;
  kdim = KMAX;
  ntot = idim*jdim*kdim;
  if (me == 0) {
    printf("\nDimension of matrix: %d\n\n",ntot);
  }
  t_beg = GA_Wtime();
  grid_factor(nprocs,idim,jdim,kdim,&pdi,&pdj,&pdk);
  if (me == 0) {
    printf("\nProcessor grid configuration\n");
    printf("  PDX: %d\n",pdi);
    printf("  PDY: %d\n",pdj);
    printf("  PDZ: %d\n\n",pdk);
    printf(" Number of Loops: %d\n",LOOPNUM);
  }

  create_laplace_mat(idim,jdim,kdim,pdi,pdj,pdk,&g_a_data,&g_a_j,&g_a_i,&mapc);
  t_cnstrct = GA_Wtime() - t_beg;

  g_b = NGA_Create_handle();
  NGA_Set_data(g_b,one,&ntot,MT_DBL);
  NGA_Set_irreg_distr(g_b,mapc,&nprocs);
  status = NGA_Allocate(g_b);
/*
    fill g_b with random values
*/
  NGA_Distribution(g_b,me,blo,bhi);
  NGA_Access(g_b,blo,bhi,&p_b,&ld);
  ld = bhi[0]-blo[0]+1;
  btot = ld;
  vector = (double*)malloc(ld*sizeof(double));
  for (i=0; i<ld; i++) {
    idum  = 0;
    p_b[i] = ran3(&idum);
    vector[i] = p_b[i];
  }
  NGA_Release(g_b,blo,bhi);
  NGA_Sync();

  g_c = NGA_Create_handle();
  NGA_Set_data(g_c,one,&ntot,MT_DBL);
  NGA_Set_irreg_distr(g_c,mapc,&nprocs);
  status = NGA_Allocate(g_c);
  NGA_Zero(g_c);
#if USE_HYPRE
/*
    Assemble HYPRE grid and use that to create matrix. Start by creating
    grid partition
*/
  ndim = 3;
  i = me;
  ip = i%pdi;
  i = (i-ip)/pdi;
  jp = i%pdj;
  kp = (i-jp)/pdj;
  lo[0] = (int)(((double)idim)*((double)ip)/((double)pdi));
  if (ip < pdi-1) {
    hi[0] = (int)(((double)idim)*((double)(ip+1))/((double)pdi)) - 1;
  } else {
    hi[0] = idim - 1;
  }
  lo[1] = (int)(((double)jdim)*((double)jp)/((double)pdj));
  if (jp < pdj-1) {
    hi[1] = (int)(((double)jdim)*((double)(jp+1))/((double)pdj)) - 1;
  } else {
    hi[1] = jdim - 1;
  }
  lo[2] = (int)(((double)kdim)*((double)kp)/((double)pdk));
  if (kp < pdk-1) {
    hi[2] = (int)(((double)kdim)*((double)(kp+1))/((double)pdk)) - 1;
  } else {
    hi[2] = kdim - 1;
  }
/*
   Create grid
*/
  hlo[0] = lo[0];
  hlo[1] = lo[1];
  hlo[2] = lo[2];
  hhi[0] = hi[0];
  hhi[1] = hi[1];
  hhi[2] = hi[2];
  ierr = HYPRE_StructGridCreate(MPI_COMM_WORLD, ndim, &grid);
  ierr = HYPRE_StructGridSetExtents(grid, hlo, hhi);
  ierr = HYPRE_StructGridAssemble(grid);
/*
   Create stencil
*/
  offsets[0][0] = 0;
  offsets[0][1] = 0;
  offsets[0][2] = 0;

  offsets[1][0] = 1;
  offsets[1][1] = 0;
  offsets[1][2] = 0;

  offsets[2][0] = 0;
  offsets[2][1] = 1;
  offsets[2][2] = 0;

  offsets[3][0] = 0;
  offsets[3][1] = 0;
  offsets[3][2] = 1;

  offsets[4][0] = -1;
  offsets[4][1] = 0;
  offsets[4][2] = 0;

  offsets[5][0] = 0;
  offsets[5][1] = -1;
  offsets[5][2] = 0;

  offsets[6][0] = 0;
  offsets[6][1] = 0;
  offsets[6][2] = -1;

  nelems = 7;
  ierr = HYPRE_StructStencilCreate(ndim, nelems, &stencil);
  for (i=0; i<nelems; i++) {
    ierr = HYPRE_StructStencilSetElement(stencil, i, offsets[i]);
  }

  ncells = (hi[0]-lo[0]+1)*(hi[1]-lo[1]+1)*(hi[2]-lo[2]+1);
  jcnt = 7*ncells;
  values = (double*)malloc(jcnt*sizeof(double));
  jcnt = 0;
  weights[0] = 6.0;
  weights[1] = -1.0;
  weights[2] = -1.0;
  weights[3] = -1.0;
  weights[4] = -1.0;
  weights[5] = -1.0;
  weights[6] = -1.0;
  for (i=0; i<ncells; i++) {
    for (j=0; j<7; j++) {
      values[jcnt] = weights[j];
      jcnt++;
    }
  }

  ierr = HYPRE_StructMatrixCreate(MPI_COMM_WORLD, grid, stencil, &matrix);
  ierr = HYPRE_StructMatrixInitialize(matrix);
  for (i=0; i<7; i++) {
    stencil_indices[i] = i;
  }
  ierr = HYPRE_StructMatrixSetBoxValues(matrix, hlo, hhi, 7, stencil_indices, values);
  free(values);
/*
   Check all six sides of current box to see if any are boundaries.
   Set values to zero if they are.
*/
  if (hi[0] == idim-1) {
    ncells = (hi[1]-lo[1]+1)*(hi[2]-lo[2]+1);
    hlo[0] = idim-1;
    hhi[0] = idim-1;
    hlo[1] = lo[1];
    hhi[1] = hi[1];
    hlo[2] = lo[2];
    hhi[2] = hi[2];
    values = (double*)malloc(ncells*sizeof(double));
    for (i=0; i<ncells; i++) values[i] = 0.0;
    i4 = 1;
    j4 = 1;
    ierr = HYPRE_StructMatrixSetBoxValues(matrix, hlo, hhi, i4, &j4, values);
    free(values);
  }
  if (hi[1] == jdim-1) {
    ncells = (hi[0]-lo[0]+1)*(hi[2]-lo[2]+1);
    hlo[0] = lo[0];
    hhi[0] = hi[0];
    hlo[1] = jdim-1;
    hhi[1] = jdim-1;
    hlo[2] = lo[2];
    hhi[2] = hi[2];
    values = (double*)malloc(ncells*sizeof(double));
    for (i=0; i<ncells; i++) values[i] = 0.0;
    i4 = 1;
    j4 = 2;
    ierr = HYPRE_StructMatrixSetBoxValues(matrix, hlo, hhi, i4, &j4, values);
    free(values);
  } 
  if (hi[2] == kdim-1) {
    ncells = (hi[0]-lo[0]+1)*(hi[1]-lo[1]+1);
    hlo[0] = lo[0];
    hhi[0] = hi[0];
    hlo[1] = lo[1];
    hhi[1] = hi[1];
    hlo[2] = kdim-1;
    hhi[2] = kdim-1;
    values = (double*)malloc(ncells*sizeof(double));
    for (i=0; i<ncells; i++) values[i] = 0.0;
    i4 = 1;
    j4 = 3;
    ierr = HYPRE_StructMatrixSetBoxValues(matrix, hlo, hhi, i4, &j4, values);
    free(values);
  }
  if (lo[0] == 0) {
    ncells = (hi[1]-lo[1]+1)*(hi[2]-lo[2]+1);
    hlo[0] = 0;
    hhi[0] = 0;
    hlo[1] = lo[1];
    hhi[1] = hi[1];
    hlo[2] = lo[2];
    hhi[2] = hi[2];
    values = (double*)malloc(ncells*sizeof(double));
    for (i=0; i<ncells; i++) values[i] = 0.0;
    i4 = 1;
    j4 = 4;
    ierr = HYPRE_StructMatrixSetBoxValues(matrix, hlo, hhi, i4, &j4, values);
    free(values);
  }
  if (lo[1] == 0) {
    ncells = (hi[0]-lo[0]+1)*(hi[2]-lo[2]+1);
    hlo[0] = lo[0];
    hhi[0] = hi[0];
    hlo[1] = 0;
    hhi[1] = 0;
    hlo[2] = lo[2];
    hhi[2] = hi[2];
    values = (double*)malloc(ncells*sizeof(double));
    for (i=0; i<ncells; i++) values[i] = 0.0;
    i4 = 1;
    j4 = 5;
    ierr = HYPRE_StructMatrixSetBoxValues(matrix, hlo, hhi, i4, &j4, values);
    free(values);
  }
  if (lo[2] == 1) {
    ncells = (hi[1]-lo[1]+1)*(hi[2]-lo[2]+1);
    hlo[0] = lo[0];
    hhi[0] = hi[0];
    hlo[1] = lo[1];
    hhi[1] = hi[1];
    hlo[2] = 0;
    hhi[2] = 0;
    values = (double*)malloc(ncells*sizeof(double));
    for (i=0; i<ncells; i++) values[i] = 0.0;
    i4 = 1;
    j4 = 6;
    ierr = HYPRE_StructMatrixSetBoxValues(matrix, hlo, hhi, i4, &j4, values);
    free(values);
  }
  ierr = HYPRE_StructMatrixAssemble(matrix);
/*
    Create vectors for matrix-vector multiply
*/
  ierr = HYPRE_StructVectorCreate(MPI_COMM_WORLD, grid, &vec_x);
  ierr = HYPRE_StructVectorInitialize(vec_x);
  hlo[0] = lo[0];
  hlo[1] = lo[1];
  hlo[2] = lo[2];
  hhi[0] = hi[0];
  hhi[1] = hi[1];
  hhi[2] = hi[2];
  ierr = HYPRE_StructVectorSetBoxValues(vec_x, hlo, hhi, vector);
  ierr = HYPRE_StructVectorAssemble(vec_x);
  NGA_Distribution(g_a_i,me,blo,bhi);

  if (bhi[1] > ntot-1) {
    bhi[1] = ntot-1;
  }

  btot = (hi[0]-lo[0]+1)*(hi[1]-lo[1]+1)*(hi[2]-lo[2]+1);

  for (i=0; i<btot; i++) vector[i] = 0.0;
  hlo[0] = lo[0];
  hlo[1] = lo[1];
  hlo[2] = lo[2];
  hhi[0] = hi[0];
  hhi[1] = hi[1];
  hhi[2] = hi[2];
  ierr = HYPRE_StructVectorGetBoxValues(vec_x, hlo, hhi, vector);

  for (i=0; i<btot; i++) vector[i] = 0.0;
  ierr = HYPRE_StructVectorCreate(MPI_COMM_WORLD, grid, &vec_y);
  ierr = HYPRE_StructVectorInitialize(vec_y);
  ierr = HYPRE_StructVectorSetBoxValues(vec_y, hlo, hhi, vector);
  ierr = HYPRE_StructVectorAssemble(vec_y);
#endif
/* Multiply sparse matrix. Start by accessing pointers to local portions of
   g_a_data, g_a_j, g_a_i */

  NGA_Sync();
  for (iloop=0; iloop<LOOPNUM; iloop++) {
    t_beg2 = GA_Wtime();

    NGA_Distribution(g_c,me,blo,bhi);
    NGA_Access(g_c,blo,bhi,&p_c,&ld_c);
    for (i = 0; i<bhi[0]-blo[0]+1; i++) {
      p_c[i] = 0.0;
    }

/* get number of matrix blocks coupled to this process */
    NGA_Get(g_a_i,&me,&me,&lo_bl,&one);
#if 1
    NGA_Get(g_a_i,&me_plus,&me_plus,&hi_bl,&one);
    hi_bl--;
    total_procs = hi_bl - lo_bl + 1;
    blk_ptr = (void**)malloc(sizeof(void*));
/* Loop through matrix blocks */
    ioff = 0;
    for (iblock = 0; iblock<total_procs; iblock++) {
      t_beg = GA_Wtime();
      jdx = lo_bl+iblock;
#if 0
      GP_Access_element(g_a_data, &jdx, &blk_ptr[0], &isize);
#endif
#if 1
      GP_Get_size(g_a_data, &jdx, &jdx, &isize);
#endif
      blk = (void*)malloc(isize);
#if 1
      GP_Get(g_a_data, &jdx, &jdx, blk, blk_ptr, &one, &blk_size, &one, &tsize, 0); 
#endif
      t_gp_get = t_gp_get + GA_Wtime() - t_beg;
      iparams = (int*)blk_ptr[0];
      rval = (double*)(iparams+7);
      imin = iparams[0];
      imax = iparams[1];
      jmin = iparams[2];
      jmax = iparams[3];
      irow = iparams[4];
      icol = iparams[5];
      nnz = iparams[6];
      jval = (int*)(rval+nnz);
      ival = (int*)(jval+nnz);
      nrows = imax - imin + 1;
      bvec = (double*)malloc((jmax-jmin+1)*sizeof(double));
      j = 0;
      t_beg = GA_Wtime();
      NGA_Get(g_b,&jmin,&jmax,bvec,&j);
      t_get = t_get + GA_Wtime() - t_beg;
      t_beg = GA_Wtime();
      for (i=0; i<nrows; i++) {
        kmin = ival[i];
        kmax = ival[i+1]-1;
        tempc = 0.0;
        for (j = kmin; j<=kmax; j++) {
          jj = jval[j];
          tempc = tempc + rval[j]*bvec[jj];
        }
        p_c[i] = p_c[i] + tempc;
      }
      t_mult = t_mult + GA_Wtime() - t_beg;
      free(bvec);
      free(blk);
    }
    NGA_Sync();
    t_ga_tot = t_ga_tot + GA_Wtime() - t_beg2;

    NGA_Distribution(g_c,me,blo,bhi);
    NGA_Release(g_c,blo,bhi);

#if USE_HYPRE
    alpha = 1.0;
    beta = 0.0;
    t_beg = GA_Wtime();
    ierr = HYPRE_StructMatrixMatvec(alpha, matrix, vec_x, beta, vec_y);
    t_hypre_strct = t_hypre_strct + GA_Wtime() - t_beg;
    hlo[0] = lo[0];
    hlo[1] = lo[1];
    hlo[2] = lo[2];
    hhi[0] = hi[0];
    hhi[1] = hi[1];
    hhi[2] = hi[2];
    ierr = HYPRE_StructVectorGetBoxValues(vec_y, hlo, hhi, vector);
    NGA_Distribution(g_c,me,hlo,hhi);
    cbuf = (double*)malloc((hhi[0]-hlo[0]+1)*sizeof(double));
    NGA_Get(g_c,hlo,hhi,cbuf,&one);
    prdot = 0.0;
    dotga = 0.0;
    dothypre = 0.0;
    for (i=0; i<(hhi[0]-hlo[0]+1); i++) {
      dothypre = dothypre + vector[i]*vector[i];
      dotga = dotga + cbuf[i]*cbuf[i];
      prdot = prdot + (vector[i]-cbuf[i])*(vector[i]-cbuf[i]);
    }
    NGA_Dgop(&dotga,1,"+");
    NGA_Dgop(&dothypre,1,"+");
    NGA_Dgop(&prdot,1,"+");
    gatot += sqrt(dotga);
    hypretot += sqrt(dothypre);
    prtot += sqrt(prdot);
    free(cbuf);
#endif

/* Transpose matrix. Start by making local copies of ival and jval arrays for
   the sparse matrix of blocks stored in the GP array */
#if 1
    t_beg2 = GA_Wtime();
    t_beg3 = GA_Wtime();
    iblk = (int*)malloc((nprocs+1)*sizeof(int));
    iblk_t = (int*)malloc((nprocs+1)*sizeof(int));
#if 0
    NGA_Get(g_a_i,&zero,&nprocs,iblk,&one);
#else
    if (me == 0) {
      NGA_Get(g_a_i,&zero,&nprocs,iblk,&one);
    } else {
      for (i=0; i<nprocs+1; i++) {
        iblk[i] = 0;
      }
    }
    GA_Igop(iblk,nprocs+1,"+");
#endif
    jblk = (int*)malloc(iblk[nprocs]*sizeof(int));
    jblk_t = (int*)malloc(iblk[nprocs]*sizeof(int));
    iblock = iblk[nprocs]-1;
#if 0
    NGA_Get(g_a_j,&zero,&iblock,jblk,&one);
#else
    if (me == 0) {
      NGA_Get(g_a_j,&zero,&iblock,jblk,&one);
    } else {
      for (i=0; i<iblock+1; i++) {
        jblk[i] = 0;
      }
    }
    GA_Igop(jblk,iblock+1,"+");
#endif
    iblock++;
    blkidx = (int*)malloc(iblk[nprocs]*sizeof(int));
    blkidx_t = (int*)malloc(iblk[nprocs]*sizeof(int));
    for (i=0; i<iblock; i++) {
      blkidx[i] = i;
    }
    iblock = nprocs;
    t_get_blk_csr = t_get_blk_csr + GA_Wtime() - t_beg3;
    t_beg3 = GA_Wtime();
    stran(iblock, iblock, iblk, jblk, blkidx, iblk_t, jblk_t, blkidx_t);
    t_trans_blk_csr = t_trans_blk_csr + GA_Wtime() - t_beg3;
    t_beg3 = GA_Wtime();
    gt_a_data = GP_Create_handle();
    i = iblk_t[nprocs];
    GP_Set_dimensions(gt_a_data, one, &i);
    GP_Set_irreg_distr(gt_a_data, iblk_t, &nprocs);
    GP_Allocate(gt_a_data);

    gt_a_j = NGA_Create_handle();
    i = iblk_t[nprocs];
    NGA_Set_data(gt_a_j, one, &i, C_INT);
    NGA_Set_irreg_distr(gt_a_j, iblk_t, &nprocs);
    NGA_Allocate(gt_a_j);

    gt_a_i = NGA_Create_handle();
    i = nprocs+1;
    NGA_Set_data(gt_a_i,one,&i,C_INT);
    for (i=0; i<nprocs; i++) mapc[i] = i;
    NGA_Set_irreg_distr(gt_a_i, mapc, &nprocs);
    NGA_Allocate(gt_a_i);

    /* copy i and j arrays of transposed matrix into distributed arrays */
    if (me==0) {
      lo_bl = 0;
      hi_bl = nprocs;
      NGA_Put(gt_a_i,&lo_bl,&hi_bl,iblk_t,&one);
      lo_bl = 0;
      hi_bl = iblk_t[nprocs]-1;
      NGA_Put(gt_a_j,&lo_bl,&hi_bl,jblk_t,&one);
    }
    NGA_Sync();
    lo_bl = iblk[me];
    hi_bl = iblk[me+1];
    total_procs = hi_bl - lo_bl + 1;
    total_procs = hi_bl - lo_bl;
    t_create_csr_ga = t_create_csr_ga + GA_Wtime() - t_beg3;
    for (iblock = lo_bl; iblock < hi_bl; iblock++) {
      t_beg4 = GA_Wtime();
      jdx = blkidx_t[iblock];
      GP_Get_size(g_a_data, &jdx, &jdx, &isize);
      blk = (void*)malloc(isize);
      GP_Get(g_a_data, &jdx, &jdx, blk, blk_ptr, &one, &blk_size, &one, &tsize, 0); 
      /* Parameters for original block */
      iparams = (int*)blk_ptr[0];
      rval = (double*)(iparams+7);
      imin = iparams[0];
      imax = iparams[1];
      jmin = iparams[2];
      jmax = iparams[3];
      irow = iparams[4];
      icol = iparams[5];
      nnz = iparams[6];
      jval = (int*)(rval+nnz);
      ival = (int*)(jval+nnz);

      /* Create transposed block */
      isize = 7*sizeof(int) + nnz*(sizeof(double)+sizeof(int))
            + (jmax-jmin+2)*sizeof(int);
      t_gp_tget = t_gp_tget + GA_Wtime() - t_beg4;
      t_beg4 = GA_Wtime();
      tblk_ptr = (int*)GP_Malloc(isize);
      t_gp_malloc = t_gp_malloc + GA_Wtime() - t_beg4;
      t_beg3 = GA_Wtime();
      iparamst = (int*)tblk_ptr;
      rvalt = (double*)(iparamst+7);
      jvalt = (int*)(rvalt+nnz);
      ivalt = (int*)(jvalt+nnz);
      iparamst[0] = jmin;
      iparamst[1] = jmax;
      iparamst[2] = imin;
      iparamst[3] = imax;
      iparamst[4] = icol;
      iparamst[5] = irow;
      iparamst[6] = nnz;
      i = imax-imin+1;
      j = jmax-jmin+1;
      stranr(i, j, ival, jval, rval, ivalt, jvalt, rvalt);
      t_trans_blk = t_trans_blk + GA_Wtime() - t_beg3;
      t_beg4 = GA_Wtime();
      GP_Assign_local_element(gt_a_data, &iblock, (void*)tblk_ptr, isize);
      t_gp_assign = t_gp_assign + GA_Wtime() - t_beg4;
#if 1
      free(blk);
#endif
    }

    /* Clean up after transpose */
#if 1
    free(iblk);
    free(iblk_t);
    free(jblk);
    free(jblk_t);
    free(blkidx);
    free(blkidx_t);
#endif
    NGA_Sync();
    t_ga_trans = t_ga_trans + GA_Wtime() - t_beg2;
#if USE_HYPRE
    alpha = 1.0;
    beta = 0.0;
    ierr = HYPRE_StructMatrixMatvec(alpha, matrix, vec_x, beta, vec_y);
    hlo[0] = lo[0];
    hlo[1] = lo[1];
    hlo[2] = lo[2];
    hhi[0] = hi[0];
    hhi[1] = hi[1];
    hhi[2] = hi[2];
    ierr = HYPRE_StructVectorGetBoxValues(vec_y, hlo, hhi, vector);
    NGA_Distribution(g_c,me,hlo,hhi);
    cbuf = (double*)malloc((hhi[0]-hlo[0]+1)*sizeof(double));
    NGA_Get(g_c,hlo,hhi,cbuf,&one);
    dothypre = 0.0;
    dotga = 0.0;
    prdot2 = 0.0;
    for (i=0; i<(hhi[0]-hlo[0]+1); i++) {
      dothypre = dothypre + vector[i]*vector[i];
      dotga = dotga + cbuf[i]*cbuf[i];
      if (fabs(vector[i]-cbuf[i]) > 1.0e-10) {
        printf("p[%d] i: %d vector: %f cbuf: %f\n",me,i,vector[i],cbuf[i]);
      }
      prdot2 = prdot2 + (vector[i]-cbuf[i])*(vector[i]-cbuf[i]);
    }
    NGA_Dgop(&dotga,1,"+");
    NGA_Dgop(&dothypre,1,"+");
    NGA_Dgop(&prdot2,1,"+");
    prtot2 += sqrt(prdot2);
    gatot2 += sqrt(dotga);
    hypretot2 += sqrt(dothypre);
    free(cbuf);
    free(blk_ptr);
#endif
    /* Clean up transposed matrix */
    GP_Distribution(gt_a_data,me,blo,bhi);
    for (i=blo[0]; i<bhi[0]; i++) {
      GP_Free(GP_Free_local_element(gt_a_data,&i));
    }
    GP_Destroy(gt_a_data);
    NGA_Destroy(gt_a_i);
    NGA_Destroy(gt_a_j);
#endif
#endif
  }
  free(vector);
#if USE_HYPRE
  if (me == 0) {
    printf("Magnitude of GA solution:                         %e\n",
        gatot/((double)LOOPNUM));
    printf("Magnitude of HYPRE solution:                      %e\n",
        hypretot/((double)LOOPNUM));
    printf("Magnitude of GA solution(2):                      %e\n",
        gatot2/((double)LOOPNUM));
    printf("Magnitude of HYPRE solution(2):                   %e\n",
        hypretot2/((double)LOOPNUM));
    printf("Difference between GA and HYPRE (Struct) results: %e\n",
        prtot/((double)LOOPNUM));
    printf("Difference between transpose and HYPRE results:   %e\n",
        prtot2/((double)LOOPNUM));
  }
#endif

/*
   Clean up arrays
*/
  NGA_Destroy(g_b);
  NGA_Destroy(g_c);
  GP_Distribution(g_a_data,me,blo,bhi);
  for (i=blo[0]; i<bhi[0]; i++) {
    GP_Free(GP_Free_local_element(g_a_data,&i));
  }
  GP_Destroy(g_a_data);
  NGA_Destroy(g_a_i);
  NGA_Destroy(g_a_j);
#if USE_HYPRE
  ierr = HYPRE_StructStencilDestroy(stencil);
  ierr = HYPRE_StructGridDestroy(grid);
  ierr = HYPRE_StructMatrixDestroy(matrix);
  ierr = HYPRE_StructVectorDestroy(vec_x);
  ierr = HYPRE_StructVectorDestroy(vec_y);
#endif

  NGA_Dgop(&t_cnstrct,1,"+");
  NGA_Dgop(&t_get,1,"+");
  NGA_Dgop(&t_gp_get,1,"+");
  NGA_Dgop(&t_mult,1,"+");
  NGA_Dgop(&t_ga_tot,1,"+");
  NGA_Dgop(&t_ga_trans,1,"+");
  NGA_Dgop(&t_get_blk_csr,1,"+");
  NGA_Dgop(&t_trans_blk_csr,1,"+");
  NGA_Dgop(&t_trans_blk,1,"+");
  NGA_Dgop(&t_create_csr_ga,1,"+");
  NGA_Dgop(&t_gp_tget,1,"+");
  NGA_Dgop(&t_gp_malloc,1,"+");
  NGA_Dgop(&t_gp_assign,1,"+");
#if USE_HYPRE
  NGA_Dgop(&t_hypre_strct,1,"+");
#endif
  free(mapc);

  if (me == 0) {
    printf("Time to create sparse matrix:                         %12.4f\n",
      t_cnstrct/((double)(nprocs*LOOPNUM)));
    printf("Time to get right hand side vector:                   %12.4f\n",
      t_get/((double)(nprocs*LOOPNUM)));
    printf("Time to get GP blocks:                                %12.4f\n",
      t_gp_get/((double)(nprocs*LOOPNUM)));
    printf("Time for sparse matrix block multiplication:          %12.4f\n",
      t_mult/((double)(nprocs*LOOPNUM)));
    printf("Time for total sparse matrix multiplication:          %12.4f\n",
      t_ga_tot/((double)(nprocs*LOOPNUM)));
#if USE_HYPRE
    printf("Total time for HYPRE (Struct)  matrix-vector multiply:%12.4f\n",
      t_hypre_strct/((double)(nprocs*LOOPNUM)));
#endif
    printf("Time to get block CSR distribution:                   %12.4f\n",
      t_get_blk_csr/((double)(nprocs*LOOPNUM)));
    printf("Time for transposing block CSR distribution:          %12.4f\n",
      t_trans_blk_csr/((double)(nprocs*LOOPNUM)));
    printf("Time for creating transposed block CSR GA:            %12.4f\n",
      t_create_csr_ga/((double)(nprocs*LOOPNUM)));
    printf("Time for transposing blocks:                          %12.4f\n",
      t_trans_blk/((double)(nprocs*LOOPNUM)));
    printf("Time to get GP blocks for transpose:                  %12.4f\n",
      t_gp_tget/((double)(nprocs*LOOPNUM)));
    printf("Time to malloc GP blocks for transpose:               %12.4f\n",
      t_gp_malloc/((double)(nprocs*LOOPNUM)));
    printf("Time to assign GP blocks for transpose:               %12.4f\n",
      t_gp_assign/((double)(nprocs*LOOPNUM)));
    printf("Time for total sparse matrix transpose:               %12.4f\n",
      t_ga_trans/((double)(nprocs*LOOPNUM)));
  }
  if (me==0) {
    printf("Terminating GA library\n");
  }
  NGA_Terminate();
/*
 ***  Tidy up after message-passing library
 */
  ierr = MPI_Finalize();
}
Beispiel #6
0
int main (int argc, char *argv[])
{
   int i, j;

   int myid, num_procs;

   HYPRE_StructGrid     grid;
   HYPRE_StructStencil  stencil;
   HYPRE_StructMatrix   A;
   HYPRE_StructVector   b;
   HYPRE_StructVector   x;
   HYPRE_StructSolver   solver;
   HYPRE_StructSolver   precond;

   /* Initialize MPI */
   MPI_Init(&argc, &argv);
   MPI_Comm_rank(MPI_COMM_WORLD, &myid);
   MPI_Comm_size(MPI_COMM_WORLD, &num_procs);

   if (num_procs != 2)
   {
      if (myid ==0) printf("Must run with 2 processors!\n");
      MPI_Finalize();

      return(0);
   }

   /* 1. Set up a grid */
   {
      /* Create an empty 2D grid object */
      HYPRE_StructGridCreate(MPI_COMM_WORLD, 2, &grid);

      /* Processor 0 owns two boxes in the grid. */
      if (myid == 0)
      {
         /* Add a new box to the grid */
         {
            int ilower[2] = {-3, 1};
            int iupper[2] = {-1, 2};

            HYPRE_StructGridSetExtents(grid, ilower, iupper);
         }

         /* Add a new box to the grid */
         {
            int ilower[2] = {0, 1};
            int iupper[2] = {2, 4};

            HYPRE_StructGridSetExtents(grid, ilower, iupper);
         }
      }

      /* Processor 1 owns one box in the grid. */
      else if (myid == 1)
      {
         /* Add a new box to the grid */
         {
            int ilower[2] = {3, 1};
            int iupper[2] = {6, 4};

            HYPRE_StructGridSetExtents(grid, ilower, iupper);
         }
      }

      /* This is a collective call finalizing the grid assembly.
         The grid is now ``ready to be used'' */
      HYPRE_StructGridAssemble(grid);
   }

   /* 2. Define the discretization stencil */
   {
      /* Create an empty 2D, 5-pt stencil object */
      HYPRE_StructStencilCreate(2, 5, &stencil);

      /* Define the geometry of the stencil. Each represents a
         relative offset (in the index space). */
      {
         int entry;
         int offsets[5][2] = {{0,0}, {-1,0}, {1,0}, {0,-1}, {0,1}};

         /* Assign each of the 5 stencil entries */
         for (entry = 0; entry < 5; entry++)
            HYPRE_StructStencilSetElement(stencil, entry, offsets[entry]);
      }
   }

   /* 3. Set up a Struct Matrix */
   {
      /* Create an empty matrix object */
      HYPRE_StructMatrixCreate(MPI_COMM_WORLD, grid, stencil, &A);

      /* Indicate that the matrix coefficients are ready to be set */
      HYPRE_StructMatrixInitialize(A);

      if (myid == 0)
      {
         /* Set the matrix coefficients for some set of stencil entries
            over all the gridpoints in my first box (account for boundary
            grid points later) */
         {
            int ilower[2] = {-3, 1};
            int iupper[2] = {-1, 2};

            int nentries = 5;
            int nvalues  = 30; /* 6 grid points, each with 5 stencil entries */
            double values[30];

            int stencil_indices[5];
            for (j = 0; j < nentries; j++) /* label the stencil indices -
                                              these correspond to the offsets
                                              defined above */
               stencil_indices[j] = j;

            for (i = 0; i < nvalues; i += nentries)
            {
               values[i] = 4.0;
               for (j = 1; j < nentries; j++)
                  values[i+j] = -1.0;
            }

            HYPRE_StructMatrixSetBoxValues(A, ilower, iupper, nentries,
                                           stencil_indices, values);
         }

         /* Set the matrix coefficients for some set of stencil entries
            over the gridpoints in my second box */
         {
            int ilower[2] = {0, 1};
            int iupper[2] = {2, 4};

            int nentries = 5;
            int nvalues  = 60; /* 12 grid points, each with 5 stencil entries */
            double values[60];

            int stencil_indices[5];
            for (j = 0; j < nentries; j++)
               stencil_indices[j] = j;

            for (i = 0; i < nvalues; i += nentries)
            {
               values[i] = 4.0;
               for (j = 1; j < nentries; j++)
                  values[i+j] = -1.0;
            }

            HYPRE_StructMatrixSetBoxValues(A, ilower, iupper, nentries,
                                           stencil_indices, values);
         }
      }
      else if (myid == 1)
      {
         /* Set the matrix coefficients for some set of stencil entries
            over the gridpoints in my box */
         {
            int ilower[2] = {3, 1};
            int iupper[2] = {6, 4};

            int nentries = 5;
            int nvalues  = 80; /* 16 grid points, each with 5 stencil entries */
            double values[80];

            int stencil_indices[5];
            for (j = 0; j < nentries; j++)
               stencil_indices[j] = j;

            for (i = 0; i < nvalues; i += nentries)
            {
               values[i] = 4.0;
               for (j = 1; j < nentries; j++)
                  values[i+j] = -1.0;
            }

            HYPRE_StructMatrixSetBoxValues(A, ilower, iupper, nentries,
                                           stencil_indices, values);
         }
      }

      /* For each box, set any coefficients that reach ouside of the
         boundary to 0 */
      if (myid == 0)
      {
         int maxnvalues = 6;
         double values[6];

         for (i = 0; i < maxnvalues; i++)
            values[i] = 0.0;

         {
            /* Values below our first AND second box */
            int ilower[2] = {-3, 1};
            int iupper[2] = { 2, 1};

            int stencil_indices[1] = {3};

            HYPRE_StructMatrixSetBoxValues(A, ilower, iupper, 1,
                                           stencil_indices, values);
         }

         {
            /* Values to the left of our first box */
            int ilower[2] = {-3, 1};
            int iupper[2] = {-3, 2};

            int stencil_indices[1] = {1};

            HYPRE_StructMatrixSetBoxValues(A, ilower, iupper, 1,
                                           stencil_indices, values);
         }

         {
            /* Values above our first box */
            int ilower[2] = {-3, 2};
            int iupper[2] = {-1, 2};

            int stencil_indices[1] = {4};

            HYPRE_StructMatrixSetBoxValues(A, ilower, iupper, 1,
                                           stencil_indices, values);
         }

         {
            /* Values to the left of our second box (that do not border the
               first box). */
            int ilower[2] = { 0, 3};
            int iupper[2] = { 0, 4};

            int stencil_indices[1] = {1};

            HYPRE_StructMatrixSetBoxValues(A, ilower, iupper, 1,
                                           stencil_indices, values);
         }

         {
            /* Values above our second box */
            int ilower[2] = { 0, 4};
            int iupper[2] = { 2, 4};

            int stencil_indices[1] = {4};

            HYPRE_StructMatrixSetBoxValues(A, ilower, iupper, 1,
                                           stencil_indices, values);
         }
      }
      else if (myid == 1)
      {
         int maxnvalues = 4;
         double values[4];
         for (i = 0; i < maxnvalues; i++)
            values[i] = 0.0;

         {
            /* Values below our box */
            int ilower[2] = { 3, 1};
            int iupper[2] = { 6, 1};

            int stencil_indices[1] = {3};

            HYPRE_StructMatrixSetBoxValues(A, ilower, iupper, 1,
                                           stencil_indices, values);
         }

         {
            /* Values to the right of our box */
            int ilower[2] = { 6, 1};
            int iupper[2] = { 6, 4};

            int stencil_indices[1] = {2};

            HYPRE_StructMatrixSetBoxValues(A, ilower, iupper, 1,
                                           stencil_indices, values);
         }

         {
            /* Values above our box */
            int ilower[2] = { 3, 4};
            int iupper[2] = { 6, 4};

            int stencil_indices[1] = {4};

            HYPRE_StructMatrixSetBoxValues(A, ilower, iupper, 1,
                                           stencil_indices, values);
         }
      }

      /* This is a collective call finalizing the matrix assembly.
         The matrix is now ``ready to be used'' */
      HYPRE_StructMatrixAssemble(A);
   }

   /* 4. Set up Struct Vectors for b and x */
   {
      /* Create an empty vector object */
      HYPRE_StructVectorCreate(MPI_COMM_WORLD, grid, &b);
      HYPRE_StructVectorCreate(MPI_COMM_WORLD, grid, &x);

      /* Indicate that the vector coefficients are ready to be set */
      HYPRE_StructVectorInitialize(b);
      HYPRE_StructVectorInitialize(x);

      if (myid == 0)
      {
         /* Set the vector coefficients over the gridpoints in my first box */
         {
            int ilower[2] = {-3, 1};
            int iupper[2] = {-1, 2};

            int nvalues = 6;  /* 6 grid points */
            double values[6];

            for (i = 0; i < nvalues; i ++)
               values[i] = 1.0;
            HYPRE_StructVectorSetBoxValues(b, ilower, iupper, values);

            for (i = 0; i < nvalues; i ++)
               values[i] = 0.0;
            HYPRE_StructVectorSetBoxValues(x, ilower, iupper, values);
         }

         /* Set the vector coefficients over the gridpoints in my second box */
         {
            int ilower[2] = { 0, 1};
            int iupper[2] = { 2, 4};

            int nvalues = 12; /* 12 grid points */
            double values[12];

            for (i = 0; i < nvalues; i ++)
               values[i] = 1.0;
            HYPRE_StructVectorSetBoxValues(b, ilower, iupper, values);

            for (i = 0; i < nvalues; i ++)
               values[i] = 0.0;
            HYPRE_StructVectorSetBoxValues(x, ilower, iupper, values);
         }
      }
      else if (myid == 1)
      {
         /* Set the vector coefficients over the gridpoints in my box */
         {
            int ilower[2] = { 3, 1};
            int iupper[2] = { 6, 4};

            int nvalues = 16; /* 16 grid points */
            double values[16];

            for (i = 0; i < nvalues; i ++)
               values[i] = 1.0;
            HYPRE_StructVectorSetBoxValues(b, ilower, iupper, values);

            for (i = 0; i < nvalues; i ++)
               values[i] = 0.0;
            HYPRE_StructVectorSetBoxValues(x, ilower, iupper, values);
         }
      }

      /* This is a collective call finalizing the vector assembly.
         The vectors are now ``ready to be used'' */
      HYPRE_StructVectorAssemble(b);
      HYPRE_StructVectorAssemble(x);
   }


   /* 5. Set up and use a solver (See the Reference Manual for descriptions
      of all of the options.) */
   {
      /* Create an empty PCG Struct solver */
      HYPRE_StructPCGCreate(MPI_COMM_WORLD, &solver);

      /* Set PCG parameters */
      HYPRE_StructPCGSetTol(solver, 1.0e-06);
      HYPRE_StructPCGSetPrintLevel(solver, 2);
      HYPRE_StructPCGSetMaxIter(solver, 50);

      /* Use symmetric SMG as preconditioner */
      HYPRE_StructSMGCreate(MPI_COMM_WORLD, &precond);
      HYPRE_StructSMGSetMaxIter(precond, 1);
      HYPRE_StructSMGSetTol(precond, 0.0);
      HYPRE_StructSMGSetZeroGuess(precond);
      HYPRE_StructSMGSetNumPreRelax(precond, 1);
      HYPRE_StructSMGSetNumPostRelax(precond, 1);

      /* Set preconditioner and solve */
      HYPRE_StructPCGSetPrecond(solver, HYPRE_StructSMGSolve,
                                HYPRE_StructSMGSetup, precond);
      HYPRE_StructPCGSetup(solver, A, b, x);
      HYPRE_StructPCGSolve(solver, A, b, x);
   }

   /* Free memory */
   HYPRE_StructGridDestroy(grid);
   HYPRE_StructStencilDestroy(stencil);
   HYPRE_StructMatrixDestroy(A);
   HYPRE_StructVectorDestroy(b);
   HYPRE_StructVectorDestroy(x);
   HYPRE_StructPCGDestroy(solver);
   HYPRE_StructSMGDestroy(precond);

   /* Finalize MPI */
   MPI_Finalize();

   return (0);
}