void CCDivGradHypreLevelSolver::deallocateHypreData() { if (d_stencil) HYPRE_StructStencilDestroy(d_stencil); if (d_grid ) HYPRE_StructGridDestroy(d_grid); if (d_matrix ) HYPRE_StructMatrixDestroy(d_matrix); if (d_sol_vec) HYPRE_StructVectorDestroy(d_sol_vec); if (d_rhs_vec) HYPRE_StructVectorDestroy(d_rhs_vec); d_grid = NULL; d_stencil = NULL; d_matrix = NULL; d_sol_vec = NULL; d_rhs_vec = NULL; return; }// deallocateHypreData
void hypre_F90_IFACE(hypre_structvectordestroy, HYPRE_STRUCTVECTORDESTROY) ( hypre_F90_Obj *vector, hypre_F90_Int *ierr ) { *ierr = (hypre_F90_Int) ( HYPRE_StructVectorDestroy( hypre_F90_PassObj (HYPRE_StructVector, vector) ) ); }
double *solve(double *Ab, int solver_id, struct parms parms) { int i, j; double final_res_norm; int time_index, n_pre, n_post, num_iterations; n_pre = 1; n_post = 1; double *A_val, *b_val; A_val = (double *) calloc(parms.N*parms.nsten, sizeof(double)); b_val = (double *) calloc(parms.N, sizeof(double)); for (i = 0; i < (parms.N*parms.nsten); i++){ A_val[i] = Ab[i]; } for (i = 0; i < parms.N; i++){ b_val[i] = Ab[i+parms.N*parms.nsten]; } // HYPRE // HYPRE_StructGrid grid; HYPRE_StructStencil stencil; HYPRE_StructMatrix A; HYPRE_StructVector b; HYPRE_StructVector x; HYPRE_StructSolver solver; HYPRE_StructSolver precond; #if Dim == 2 HYPRE_Int ilower[2] = {parms.x0, parms.y0}; HYPRE_Int iupper[2] = {parms.x1, parms.y1}; #endif #if Dim == 3 HYPRE_Int ilower[3] = {parms.x0, parms.y0, 0}; HYPRE_Int iupper[3] = {parms.x1, parms.y1, parms.Nz-1}; #endif { // Create an empty 2D grid object HYPRE_StructGridCreate(MPI_COMM_WORLD, Dim, &grid); // Add a new box to the grid HYPRE_StructGridSetExtents(grid, ilower, iupper); // 1. Set up periodic boundary condition in y-direction and create the grid int pset[3]; pset[0] = 0; pset[1] = parms.Ny; pset[2] = 0; #if Dim == 3 pset[2] = parms.Nz; #endif //HYPRE_StructGridSetNumGhost(grid,pset) HYPRE_StructGridSetPeriodic(grid, pset); HYPRE_StructGridAssemble(grid); } // 2. Define the discretization stencil { if (Dim == 2){ // Create an empty 2D, 5-pt stencil object HYPRE_StructStencilCreate(2, parms.nsten, &stencil); // Define the geometry of the stencil { int offsets[5][2] = {{0,0}, {-1,0}, {0,-1}, {0,1}, {1,0}}; for (i = 0; i < parms.nsten; i++) HYPRE_StructStencilSetElement(stencil, i, offsets[i]); } } else { HYPRE_StructStencilCreate(3, parms.nsten, &stencil); // Define the geometry of the 3D stencil { int offsets[7][3] = {{0,0,0}, {-1,0,0}, {0,-1,0}, {0,1,0}, {1,0,0}, {0,0,-1}, {0,0,1}}; for (i = 0; i < parms.nsten; i++) HYPRE_StructStencilSetElement(stencil, i, offsets[i]); } } } // 3. Set up a Struct Matrix A from Aval { HYPRE_Int stencil_indices[parms.nsten]; // Create an empty matrix object HYPRE_StructMatrixCreate(MPI_COMM_WORLD, grid, stencil, &A); // Indicate that the matrix coefficients are ready to be set HYPRE_StructMatrixInitialize(A); for (j = 0; j < parms.nsten; j++) stencil_indices[j] = j; HYPRE_StructMatrixSetBoxValues(A, ilower, iupper, parms.nsten, stencil_indices, A_val); free(A_val); } // 4. Set up Struct Vectors for b from b_val and set x = 0 { double *values; HYPRE_StructVectorCreate(MPI_COMM_WORLD, grid, &b); HYPRE_StructVectorCreate(MPI_COMM_WORLD, grid, &x); HYPRE_StructVectorInitialize(b); HYPRE_StructVectorInitialize(x); values = calloc((parms.N), sizeof(double)); for (i = 0; i < (parms.N); i++) values[i] = 0.0; HYPRE_StructVectorSetBoxValues(x, ilower, iupper, values); HYPRE_StructVectorSetBoxValues(b, ilower, iupper, b_val); free(b_val); free(values); } //Finalize the vector and matrix assembly HYPRE_StructMatrixAssemble(A); HYPRE_StructVectorAssemble(b); HYPRE_StructVectorAssemble(x); #if DEBUG == 3 HYPRE_StructMatrixPrint("./poisson.matrix", A, 0); HYPRE_StructVectorPrint("./poisson.rhs", b, 0); /*char fname[64]; char Aname[64], bname[64]; sprintf(Aname,"data/A%d.",parms.cyc); sprintf(bname,"data/b%d.",parms.cyc); filename(fname, Aname, parms.wkdir, parms); HYPRE_StructMatrixPrint(fname, A, 0); filename(fname, bname, parms.wkdir, parms); HYPRE_StructVectorPrint(fname, b, 0);*/ #endif // 6. Set up and use a solver (SMG) if (solver_id == 0) { time_index = hypre_InitializeTiming("SMG Setup"); hypre_BeginTiming(time_index); HYPRE_StructSMGCreate(MPI_COMM_WORLD, &solver); HYPRE_StructSMGSetMemoryUse(solver, 0); HYPRE_StructSMGSetMaxIter(solver, 100); HYPRE_StructSMGSetTol(solver, 1.0e-12); HYPRE_StructSMGSetRelChange(solver, 0); HYPRE_StructSMGSetNumPreRelax(solver, n_pre); HYPRE_StructSMGSetNumPostRelax(solver, n_post); // Logging must be on to get iterations and residual norm info below HYPRE_StructSMGSetLogging(solver, 1); // Setup and print setup timings HYPRE_StructSMGSetup(solver, A, b, x); hypre_EndTiming(time_index); #if DEBUG == 3 hypre_PrintTiming("Setup phase times", MPI_COMM_WORLD); #endif hypre_FinalizeTiming(time_index); hypre_ClearTiming(); // Solve and print solve timings time_index = hypre_InitializeTiming("SMG Solve"); hypre_BeginTiming(time_index); HYPRE_StructSMGSolve(solver, A, b, x); hypre_EndTiming(time_index); #if DEBUG == 3 hypre_PrintTiming("Solve phase times", MPI_COMM_WORLD); #endif hypre_FinalizeTiming(time_index); hypre_ClearTiming(); // Get some info on the run HYPRE_StructSMGGetNumIterations(solver, &num_iterations); HYPRE_StructSMGGetFinalRelativeResidualNorm(solver, &final_res_norm); #if DEBUG == 2 if (parms.rank == 0){ fprintf(stdout, "Number of Iterations = %4d ; Final Relative Residual Norm = %e\n\n", num_iterations, final_res_norm); } #endif // Clean up HYPRE_StructSMGDestroy(solver); } // 6. Set up and use a solver (PCG) with SMG Preconditioner if (solver_id == 1) { HYPRE_StructPCGCreate(MPI_COMM_WORLD, &solver); //HYPRE_StructPCGSetMemoryUse(solver, 0); HYPRE_StructPCGSetMaxIter(solver, 100); HYPRE_StructPCGSetTol(solver, 1.0e-12); HYPRE_StructPCGSetTwoNorm(solver, 1); HYPRE_StructPCGSetRelChange(solver, 0); //HYPRE_StructPCGSetPrintLevel(solver, 2 ); /* print each CG iteration */ HYPRE_StructPCGSetLogging(solver, 1); /* Use symmetric SMG as preconditioner */ HYPRE_StructSMGCreate(MPI_COMM_WORLD, &precond); HYPRE_StructSMGSetMemoryUse(precond, 0); HYPRE_StructSMGSetMaxIter(precond, 32); HYPRE_StructSMGSetTol(precond, 0.0); HYPRE_StructSMGSetZeroGuess(precond); HYPRE_StructSMGSetNumPreRelax(precond, 1); HYPRE_StructSMGSetNumPostRelax(precond, 1); /* Set the preconditioner and solve */ HYPRE_StructPCGSetPrecond(solver, HYPRE_StructSMGSolve, HYPRE_StructSMGSetup, precond); HYPRE_StructPCGSetup(solver, A, b, x); HYPRE_StructPCGSolve(solver, A, b, x); /* Get some info on the run */ HYPRE_StructPCGGetNumIterations(solver, &num_iterations); HYPRE_StructPCGGetFinalRelativeResidualNorm(solver, &final_res_norm); #if DEBUG == 2 if (parms.rank == 0){ fprintf(stdout, "Number of Iterations = %4d ; Final Relative Residual Norm = %e\n\n", num_iterations, final_res_norm); } #endif /* Clean up */ HYPRE_StructSMGDestroy(precond); HYPRE_StructPCGDestroy(solver); } // get the local solution double *values = calloc(parms.N, sizeof(double)); HYPRE_StructVectorGetBoxValues(x, ilower, iupper, values); // Free memory HYPRE_StructGridDestroy(grid); HYPRE_StructStencilDestroy(stencil); HYPRE_StructMatrixDestroy(A); HYPRE_StructVectorDestroy(b); HYPRE_StructVectorDestroy(x); free(Ab); return(values); }
int main (int argc, char *argv[]) { int i, j, myid, num_procs; int vis = 0; HYPRE_StructGrid grid; HYPRE_StructStencil stencil; HYPRE_StructMatrix A; HYPRE_StructVector b; HYPRE_StructVector x; HYPRE_StructSolver solver; /* Initialize MPI */ MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &myid); MPI_Comm_size(MPI_COMM_WORLD, &num_procs); if (num_procs != 2) { if (myid == 0) printf("Must run with 2 processors!\n"); MPI_Finalize(); return(0); } /* Parse command line */ { int arg_index = 0; int print_usage = 0; while (arg_index < argc) { if ( strcmp(argv[arg_index], "-vis") == 0 ) { arg_index++; vis = 1; } else if ( strcmp(argv[arg_index], "-help") == 0 ) { print_usage = 1; break; } else { arg_index++; } } if ((print_usage) && (myid == 0)) { printf("\n"); printf("Usage: %s [<options>]\n", argv[0]); printf("\n"); printf(" -vis : save the solution for GLVis visualization\n"); printf("\n"); } if (print_usage) { MPI_Finalize(); return (0); } } /* 1. Set up a grid. Each processor describes the piece of the grid that it owns. */ { /* Create an empty 2D grid object */ HYPRE_StructGridCreate(MPI_COMM_WORLD, 2, &grid); /* Add boxes to the grid */ if (myid == 0) { int ilower[2]={-3,1}, iupper[2]={-1,2}; HYPRE_StructGridSetExtents(grid, ilower, iupper); } else if (myid == 1) { int ilower[2]={0,1}, iupper[2]={2,4}; HYPRE_StructGridSetExtents(grid, ilower, iupper); } /* This is a collective call finalizing the grid assembly. The grid is now ``ready to be used'' */ HYPRE_StructGridAssemble(grid); } /* 2. Define the discretization stencil */ { /* Create an empty 2D, 5-pt stencil object */ HYPRE_StructStencilCreate(2, 5, &stencil); /* Define the geometry of the stencil. Each represents a relative offset (in the index space). */ { int entry; int offsets[5][2] = {{0,0}, {-1,0}, {1,0}, {0,-1}, {0,1}}; /* Assign each of the 5 stencil entries */ for (entry = 0; entry < 5; entry++) HYPRE_StructStencilSetElement(stencil, entry, offsets[entry]); } } /* 3. Set up a Struct Matrix */ { /* Create an empty matrix object */ HYPRE_StructMatrixCreate(MPI_COMM_WORLD, grid, stencil, &A); /* Indicate that the matrix coefficients are ready to be set */ HYPRE_StructMatrixInitialize(A); /* Set the matrix coefficients. Each processor assigns coefficients for the boxes in the grid that it owns. Note that the coefficients associated with each stencil entry may vary from grid point to grid point if desired. Here, we first set the same stencil entries for each grid point. Then we make modifications to grid points near the boundary. */ if (myid == 0) { int ilower[2]={-3,1}, iupper[2]={-1,2}; int stencil_indices[5] = {0,1,2,3,4}; /* labels for the stencil entries - these correspond to the offsets defined above */ int nentries = 5; int nvalues = 30; /* 6 grid points, each with 5 stencil entries */ double values[30]; /* We have 6 grid points, each with 5 stencil entries */ for (i = 0; i < nvalues; i += nentries) { values[i] = 4.0; for (j = 1; j < nentries; j++) values[i+j] = -1.0; } HYPRE_StructMatrixSetBoxValues(A, ilower, iupper, nentries, stencil_indices, values); } else if (myid == 1) { int ilower[2]={0,1}, iupper[2]={2,4}; int stencil_indices[5] = {0,1,2,3,4}; int nentries = 5; int nvalues = 60; /* 12 grid points, each with 5 stencil entries */ double values[60]; for (i = 0; i < nvalues; i += nentries) { values[i] = 4.0; for (j = 1; j < nentries; j++) values[i+j] = -1.0; } HYPRE_StructMatrixSetBoxValues(A, ilower, iupper, nentries, stencil_indices, values); } /* Set the coefficients reaching outside of the boundary to 0 */ if (myid == 0) { double values[3]; for (i = 0; i < 3; i++) values[i] = 0.0; { /* values below our box */ int ilower[2]={-3,1}, iupper[2]={-1,1}; int stencil_indices[1] = {3}; HYPRE_StructMatrixSetBoxValues(A, ilower, iupper, 1, stencil_indices, values); } { /* values to the left of our box */ int ilower[2]={-3,1}, iupper[2]={-3,2}; int stencil_indices[1] = {1}; HYPRE_StructMatrixSetBoxValues(A, ilower, iupper, 1, stencil_indices, values); } { /* values above our box */ int ilower[2]={-3,2}, iupper[2]={-1,2}; int stencil_indices[1] = {4}; HYPRE_StructMatrixSetBoxValues(A, ilower, iupper, 1, stencil_indices, values); } } else if (myid == 1) { double values[4]; for (i = 0; i < 4; i++) values[i] = 0.0; { /* values below our box */ int ilower[2]={0,1}, iupper[2]={2,1}; int stencil_indices[1] = {3}; HYPRE_StructMatrixSetBoxValues(A, ilower, iupper, 1, stencil_indices, values); } { /* values to the right of our box */ int ilower[2]={2,1}, iupper[2]={2,4}; int stencil_indices[1] = {2}; HYPRE_StructMatrixSetBoxValues(A, ilower, iupper, 1, stencil_indices, values); } { /* values above our box */ int ilower[2]={0,4}, iupper[2]={2,4}; int stencil_indices[1] = {4}; HYPRE_StructMatrixSetBoxValues(A, ilower, iupper, 1, stencil_indices, values); } { /* values to the left of our box (that do not border the other box on proc. 0) */ int ilower[2]={0,3}, iupper[2]={0,4}; int stencil_indices[1] = {1}; HYPRE_StructMatrixSetBoxValues(A, ilower, iupper, 1, stencil_indices, values); } } /* This is a collective call finalizing the matrix assembly. The matrix is now ``ready to be used'' */ HYPRE_StructMatrixAssemble(A); } /* 4. Set up Struct Vectors for b and x. Each processor sets the vectors corresponding to its boxes. */ { /* Create an empty vector object */ HYPRE_StructVectorCreate(MPI_COMM_WORLD, grid, &b); HYPRE_StructVectorCreate(MPI_COMM_WORLD, grid, &x); /* Indicate that the vector coefficients are ready to be set */ HYPRE_StructVectorInitialize(b); HYPRE_StructVectorInitialize(x); /* Set the vector coefficients */ if (myid == 0) { int ilower[2]={-3,1}, iupper[2]={-1,2}; double values[6]; /* 6 grid points */ for (i = 0; i < 6; i ++) values[i] = 1.0; HYPRE_StructVectorSetBoxValues(b, ilower, iupper, values); for (i = 0; i < 6; i ++) values[i] = 0.0; HYPRE_StructVectorSetBoxValues(x, ilower, iupper, values); } else if (myid == 1) { int ilower[2]={0,1}, iupper[2]={2,4}; double values[12]; /* 12 grid points */ for (i = 0; i < 12; i ++) values[i] = 1.0; HYPRE_StructVectorSetBoxValues(b, ilower, iupper, values); for (i = 0; i < 12; i ++) values[i] = 0.0; HYPRE_StructVectorSetBoxValues(x, ilower, iupper, values); } /* This is a collective call finalizing the vector assembly. The vectors are now ``ready to be used'' */ HYPRE_StructVectorAssemble(b); HYPRE_StructVectorAssemble(x); } /* 5. Set up and use a solver (See the Reference Manual for descriptions of all of the options.) */ { /* Create an empty PCG Struct solver */ HYPRE_StructPCGCreate(MPI_COMM_WORLD, &solver); /* Set some parameters */ HYPRE_StructPCGSetTol(solver, 1.0e-06); /* convergence tolerance */ HYPRE_StructPCGSetPrintLevel(solver, 2); /* amount of info. printed */ /* Setup and solve */ HYPRE_StructPCGSetup(solver, A, b, x); HYPRE_StructPCGSolve(solver, A, b, x); } /* Save the solution for GLVis visualization, see vis/glvis-ex1.sh */ if (vis) { GLVis_PrintStructGrid(grid, "vis/ex1.mesh", myid, NULL, NULL); GLVis_PrintStructVector(x, "vis/ex1.sol", myid); GLVis_PrintData("vis/ex1.data", myid, num_procs); } /* Free memory */ HYPRE_StructGridDestroy(grid); HYPRE_StructStencilDestroy(stencil); HYPRE_StructMatrixDestroy(A); HYPRE_StructVectorDestroy(b); HYPRE_StructVectorDestroy(x); HYPRE_StructPCGDestroy(solver); /* Finalize MPI */ MPI_Finalize(); return (0); }
hypre_int main( hypre_int argc, char *argv[] ) { HYPRE_Int arg_index; HYPRE_Int print_usage; HYPRE_Int nx, ny, nz; HYPRE_Int P, Q, R; HYPRE_Int bx, by, bz; HYPRE_StructGrid from_grid, to_grid; HYPRE_StructVector from_vector, to_vector, check_vector; HYPRE_CommPkg comm_pkg; HYPRE_Int time_index; HYPRE_Int num_procs, myid; HYPRE_Int p, q, r; HYPRE_Int dim; HYPRE_Int nblocks ; HYPRE_Int **ilower, **iupper, **iupper2; HYPRE_Int istart[3]; HYPRE_Int i, ix, iy, iz, ib; HYPRE_Int print_system = 0; HYPRE_Real check; /*----------------------------------------------------------- * Initialize some stuff *-----------------------------------------------------------*/ /* Initialize MPI */ hypre_MPI_Init(&argc, &argv); hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &num_procs ); hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &myid ); /*----------------------------------------------------------- * Set defaults *-----------------------------------------------------------*/ dim = 3; nx = 2; ny = 2; nz = 2; P = num_procs; Q = 1; R = 1; bx = 1; by = 1; bz = 1; istart[0] = 1; istart[1] = 1; istart[2] = 1; /*----------------------------------------------------------- * Parse command line *-----------------------------------------------------------*/ print_usage = 0; arg_index = 1; while (arg_index < argc) { if ( strcmp(argv[arg_index], "-n") == 0 ) { arg_index++; nx = atoi(argv[arg_index++]); ny = atoi(argv[arg_index++]); nz = atoi(argv[arg_index++]); } else if ( strcmp(argv[arg_index], "-istart") == 0 ) { arg_index++; istart[0] = atoi(argv[arg_index++]); istart[1] = atoi(argv[arg_index++]); istart[2] = atoi(argv[arg_index++]); } else if ( strcmp(argv[arg_index], "-P") == 0 ) { arg_index++; P = atoi(argv[arg_index++]); Q = atoi(argv[arg_index++]); R = atoi(argv[arg_index++]); } else if ( strcmp(argv[arg_index], "-b") == 0 ) { arg_index++; bx = atoi(argv[arg_index++]); by = atoi(argv[arg_index++]); bz = atoi(argv[arg_index++]); } else if ( strcmp(argv[arg_index], "-d") == 0 ) { arg_index++; dim = atoi(argv[arg_index++]); } else if ( strcmp(argv[arg_index], "-print") == 0 ) { arg_index++; print_system = 1; } else if ( strcmp(argv[arg_index], "-help") == 0 ) { print_usage = 1; break; } else { arg_index++; } } /*----------------------------------------------------------- * Print usage info *-----------------------------------------------------------*/ if ( (print_usage) && (myid == 0) ) { hypre_printf("\n"); hypre_printf("Usage: %s [<options>]\n", argv[0]); hypre_printf("\n"); hypre_printf(" -n <nx> <ny> <nz> : problem size per block\n"); hypre_printf(" -istart <ix> <iy> <iz> : start of box\n"); hypre_printf(" -P <Px> <Py> <Pz> : processor topology\n"); hypre_printf(" -b <bx> <by> <bz> : blocking per processor\n"); hypre_printf(" -d <dim> : problem dimension (2 or 3)\n"); hypre_printf(" -print : print vectors\n"); hypre_printf("\n"); } if ( print_usage ) { exit(1); } /*----------------------------------------------------------- * Check a few things *-----------------------------------------------------------*/ if ((P*Q*R) > num_procs) { if (myid == 0) { hypre_printf("Error: PxQxR is more than the number of processors\n"); } exit(1); } else if ((P*Q*R) < num_procs) { if (myid == 0) { hypre_printf("Warning: PxQxR is less than the number of processors\n"); } } /*----------------------------------------------------------- * Print driver parameters *-----------------------------------------------------------*/ if (myid == 0) { hypre_printf("Running with these driver parameters:\n"); hypre_printf(" (nx, ny, nz) = (%d, %d, %d)\n", nx, ny, nz); hypre_printf(" (ix, iy, iz) = (%d, %d, %d)\n", istart[0],istart[1],istart[2]); hypre_printf(" (Px, Py, Pz) = (%d, %d, %d)\n", P, Q, R); hypre_printf(" (bx, by, bz) = (%d, %d, %d)\n", bx, by, bz); hypre_printf(" dim = %d\n", dim); } /*----------------------------------------------------------- * Set up the stencil structure (7 points) when matrix is NOT read from file * Set up the grid structure used when NO files are read *-----------------------------------------------------------*/ switch (dim) { case 1: nblocks = bx; p = myid % P; break; case 2: nblocks = bx*by; p = myid % P; q = (( myid - p)/P) % Q; break; case 3: nblocks = bx*by*bz; p = myid % P; q = (( myid - p)/P) % Q; r = ( myid - p - P*q)/( P*Q ); break; } if (myid >= (P*Q*R)) { /* My processor has no data on it */ nblocks = bx = by = bz = 0; } /*----------------------------------------------------------- * prepare space for the extents *-----------------------------------------------------------*/ ilower = hypre_CTAlloc(HYPRE_Int*, nblocks); iupper = hypre_CTAlloc(HYPRE_Int*, nblocks); iupper2 = hypre_CTAlloc(HYPRE_Int*, nblocks); for (i = 0; i < nblocks; i++) { ilower[i] = hypre_CTAlloc(HYPRE_Int, dim); iupper[i] = hypre_CTAlloc(HYPRE_Int, dim); iupper2[i] = hypre_CTAlloc(HYPRE_Int, dim); } ib = 0; switch (dim) { case 1: for (ix = 0; ix < bx; ix++) { ilower[ib][0] = istart[0]+ nx*(bx*p+ix); iupper[ib][0] = istart[0]+ nx*(bx*p+ix+1) - 1; iupper2[ib][0] = iupper[ib][0]; if ( (ix == (bx-1)) && (p < (P-1)) ) iupper2[ib][0] = iupper[ib][0] + 1; ib++; } break; case 2: for (iy = 0; iy < by; iy++) for (ix = 0; ix < bx; ix++) { ilower[ib][0] = istart[0]+ nx*(bx*p+ix); iupper[ib][0] = istart[0]+ nx*(bx*p+ix+1) - 1; ilower[ib][1] = istart[1]+ ny*(by*q+iy); iupper[ib][1] = istart[1]+ ny*(by*q+iy+1) - 1; iupper2[ib][0] = iupper[ib][0]; iupper2[ib][1] = iupper[ib][1]; if ( (ix == (bx-1)) && (p < (P-1)) ) iupper2[ib][0] = iupper[ib][0] + 1; if ( (iy == (by-1)) && (q < (Q-1)) ) iupper2[ib][1] = iupper[ib][1] + 1; ib++; } break; case 3: for (iz = 0; iz < bz; iz++) for (iy = 0; iy < by; iy++) for (ix = 0; ix < bx; ix++) { ilower[ib][0] = istart[0]+ nx*(bx*p+ix); iupper[ib][0] = istart[0]+ nx*(bx*p+ix+1) - 1; ilower[ib][1] = istart[1]+ ny*(by*q+iy); iupper[ib][1] = istart[1]+ ny*(by*q+iy+1) - 1; ilower[ib][2] = istart[2]+ nz*(bz*r+iz); iupper[ib][2] = istart[2]+ nz*(bz*r+iz+1) - 1; iupper2[ib][0] = iupper[ib][0]; iupper2[ib][1] = iupper[ib][1]; iupper2[ib][2] = iupper[ib][2]; if ( (ix == (bx-1)) && (p < (P-1)) ) iupper2[ib][0] = iupper[ib][0] + 1; if ( (iy == (by-1)) && (q < (Q-1)) ) iupper2[ib][1] = iupper[ib][1] + 1; if ( (iz == (bz-1)) && (r < (R-1)) ) iupper2[ib][2] = iupper[ib][2] + 1; ib++; } break; } HYPRE_StructGridCreate(hypre_MPI_COMM_WORLD, dim, &from_grid); HYPRE_StructGridCreate(hypre_MPI_COMM_WORLD, dim, &to_grid); for (ib = 0; ib < nblocks; ib++) { HYPRE_StructGridSetExtents(from_grid, ilower[ib], iupper[ib]); HYPRE_StructGridSetExtents(to_grid, ilower[ib], iupper2[ib]); } HYPRE_StructGridAssemble(from_grid); HYPRE_StructGridAssemble(to_grid); /*----------------------------------------------------------- * Set up the vectors *-----------------------------------------------------------*/ HYPRE_StructVectorCreate(hypre_MPI_COMM_WORLD, from_grid, &from_vector); HYPRE_StructVectorInitialize(from_vector); AddValuesVector(from_grid, from_vector, 1.0); HYPRE_StructVectorAssemble(from_vector); HYPRE_StructVectorCreate(hypre_MPI_COMM_WORLD, to_grid, &to_vector); HYPRE_StructVectorInitialize(to_vector); AddValuesVector(to_grid, to_vector, 0.0); HYPRE_StructVectorAssemble(to_vector); /* Vector used to check the migration */ HYPRE_StructVectorCreate(hypre_MPI_COMM_WORLD, to_grid, &check_vector); HYPRE_StructVectorInitialize(check_vector); AddValuesVector(to_grid, check_vector, 1.0); HYPRE_StructVectorAssemble(check_vector); /*----------------------------------------------------------- * Migrate *-----------------------------------------------------------*/ time_index = hypre_InitializeTiming("Struct Migrate"); hypre_BeginTiming(time_index); HYPRE_StructVectorGetMigrateCommPkg(from_vector, to_vector, &comm_pkg); HYPRE_StructVectorMigrate(comm_pkg, from_vector, to_vector); HYPRE_CommPkgDestroy(comm_pkg); hypre_EndTiming(time_index); hypre_PrintTiming("Struct Migrate", hypre_MPI_COMM_WORLD); hypre_FinalizeTiming(time_index); /*----------------------------------------------------------- * Check the migration and print the result *-----------------------------------------------------------*/ hypre_StructAxpy(-1.0, to_vector, check_vector); check = hypre_StructInnerProd (check_vector, check_vector); if (myid == 0) { printf("\nCheck = %1.0f (success = 0)\n\n", check); } /*----------------------------------------------------------- * Print out the vectors *-----------------------------------------------------------*/ if (print_system) { HYPRE_StructVectorPrint("struct_migrate.out.xfr", from_vector, 0); HYPRE_StructVectorPrint("struct_migrate.out.xto", to_vector, 0); } /*----------------------------------------------------------- * Finalize things *-----------------------------------------------------------*/ HYPRE_StructGridDestroy(from_grid); HYPRE_StructGridDestroy(to_grid); for (i = 0; i < nblocks; i++) { hypre_TFree(ilower[i]); hypre_TFree(iupper[i]); hypre_TFree(iupper2[i]); } hypre_TFree(ilower); hypre_TFree(iupper); hypre_TFree(iupper2); HYPRE_StructVectorDestroy(from_vector); HYPRE_StructVectorDestroy(to_vector); HYPRE_StructVectorDestroy(check_vector); /* Finalize MPI */ hypre_MPI_Finalize(); return (0); }
int main(int argc, char **argv) { int nmax, nprocs, me, me_plus; int g_a_data, g_a_i, g_a_j, isize; int gt_a_data, gt_a_i, gt_a_j; int g_b, g_c; int i, j, jj, k, one, jcnt; int chunk, kp1, ld; int *p_i, *p_j; double *p_data, *p_b, *p_c; double t_beg, t_beg2, t_ga_tot, t_get, t_mult, t_cnstrct, t_mpi_in, t_ga_in; double t_hypre_strct, t_ga_trans, t_gp_get; double t_get_blk_csr, t_trans_blk_csr, t_trans_blk, t_create_csr_ga, t_beg3; double t_gp_tget, t_gp_malloc, t_gp_assign, t_beg4; double prdot, dotga, dothypre, tempc; double prtot, gatot, hypretot, gatot2, hypretot2; double prdot2, prtot2; int status; int idim, jdim, kdim, idum, memsize; int lsize, ntot; int heap=200000, fudge=100, stack=200000, ma_heap; double *cbuf, *vector; int pdi, pdj, pdk, ip, jp, kp, ncells; int lo[3],hi[3]; int blo[3], bhi[3]; int ld_a, ld_b, ld_c, ld_i, ld_j, irows, ioff, joff, total_procs; int iproc, iblock, btot; double *amat, *bvec; int *ivec, *jvec; int *proclist, *proc_inv, *icnt; int *voffset, *offset, *mapc; int iloop, lo_bl, hi_bl; char *buf, **buf_ptr; int *iparams, *jval, *ival; double *rval, *rvalt; int imin, imax, jmin, jmax, irow, icol, nnz; int nrows, kmin, kmax, lmin, lmax, jdx; int LOOPNUM = 100; void **blk_ptr; void *blk; int blk_size, tsize, zero; int *iblk, *jblk, *blkidx; int *tblk_ptr; int *ivalt, *jvalt, *iparamst; int *iblk_t, *jblk_t, *blkidx_t; /* Hypre declarations */ int ierr; #if USE_HYPRE HYPRE_StructGrid grid; HYPRE_StructStencil stencil; HYPRE_StructMatrix matrix; HYPRE_StructVector vec_x, vec_y; int i4, j4, ndim, nelems, offsets[7][3]; int stencil_indices[7], hlo[3], hhi[3]; double weights[7]; double *values; double alpha, beta; int *rows, *cols; #endif /* *** Intitialize a message passing library */ zero = 0; one = 1; ierr = MPI_Init(&argc, &argv); /* *** Initialize GA There are 2 choices: ga_initialize or ga_initialize_ltd. In the first case, there is no explicit limit on memory usage. In the second, user can set limit (per processor) in bytes. */ t_beg = GA_Wtime(); NGA_Initialize(); GP_Initialize(); t_ga_in = GA_Wtime() - t_beg; NGA_Dgop(&t_ga_in,one,"+"); t_ga_tot = 0.0; t_ga_trans = 0.0; t_get_blk_csr = 0.0; t_create_csr_ga = 0.0; t_trans_blk_csr = 0.0; t_trans_blk = 0.0; t_gp_get = 0.0; t_gp_malloc = 0.0; t_gp_assign = 0.0; t_mult = 0.0; t_get = 0.0; t_gp_tget = 0.0; t_hypre_strct = 0.0; prtot = 0.0; prtot2 = 0.0; gatot = 0.0; hypretot = 0.0; me = NGA_Nodeid(); me_plus = me + 1; nprocs = NGA_Nnodes(); if (me == 0) { printf("Time to initialize GA: %12.4f\n", t_ga_in/((double)nprocs)); } /* we can also use GA_set_memory_limit BEFORE first ga_create call */ ma_heap = heap + fudge; /* call GA_set_memory_limit(util_mdtob(ma_heap)) */ if (me == 0) { printf("\nNumber of cores used: %d\n\nGA initialized\n\n",nprocs); } /* *** Initialize the MA package MA must be initialized before any global array is allocated */ if (!MA_init(MT_DBL, stack, ma_heap)) NGA_Error("ma_init failed",-1); /* create a sparse LMAX x LMAX matrix and two vectors of length LMAX. The matrix is stored in compressed row format. One of the vectors is filled with random data and the other is filled with zeros. */ idim = IMAX; jdim = JMAX; kdim = KMAX; ntot = idim*jdim*kdim; if (me == 0) { printf("\nDimension of matrix: %d\n\n",ntot); } t_beg = GA_Wtime(); grid_factor(nprocs,idim,jdim,kdim,&pdi,&pdj,&pdk); if (me == 0) { printf("\nProcessor grid configuration\n"); printf(" PDX: %d\n",pdi); printf(" PDY: %d\n",pdj); printf(" PDZ: %d\n\n",pdk); printf(" Number of Loops: %d\n",LOOPNUM); } create_laplace_mat(idim,jdim,kdim,pdi,pdj,pdk,&g_a_data,&g_a_j,&g_a_i,&mapc); t_cnstrct = GA_Wtime() - t_beg; g_b = NGA_Create_handle(); NGA_Set_data(g_b,one,&ntot,MT_DBL); NGA_Set_irreg_distr(g_b,mapc,&nprocs); status = NGA_Allocate(g_b); /* fill g_b with random values */ NGA_Distribution(g_b,me,blo,bhi); NGA_Access(g_b,blo,bhi,&p_b,&ld); ld = bhi[0]-blo[0]+1; btot = ld; vector = (double*)malloc(ld*sizeof(double)); for (i=0; i<ld; i++) { idum = 0; p_b[i] = ran3(&idum); vector[i] = p_b[i]; } NGA_Release(g_b,blo,bhi); NGA_Sync(); g_c = NGA_Create_handle(); NGA_Set_data(g_c,one,&ntot,MT_DBL); NGA_Set_irreg_distr(g_c,mapc,&nprocs); status = NGA_Allocate(g_c); NGA_Zero(g_c); #if USE_HYPRE /* Assemble HYPRE grid and use that to create matrix. Start by creating grid partition */ ndim = 3; i = me; ip = i%pdi; i = (i-ip)/pdi; jp = i%pdj; kp = (i-jp)/pdj; lo[0] = (int)(((double)idim)*((double)ip)/((double)pdi)); if (ip < pdi-1) { hi[0] = (int)(((double)idim)*((double)(ip+1))/((double)pdi)) - 1; } else { hi[0] = idim - 1; } lo[1] = (int)(((double)jdim)*((double)jp)/((double)pdj)); if (jp < pdj-1) { hi[1] = (int)(((double)jdim)*((double)(jp+1))/((double)pdj)) - 1; } else { hi[1] = jdim - 1; } lo[2] = (int)(((double)kdim)*((double)kp)/((double)pdk)); if (kp < pdk-1) { hi[2] = (int)(((double)kdim)*((double)(kp+1))/((double)pdk)) - 1; } else { hi[2] = kdim - 1; } /* Create grid */ hlo[0] = lo[0]; hlo[1] = lo[1]; hlo[2] = lo[2]; hhi[0] = hi[0]; hhi[1] = hi[1]; hhi[2] = hi[2]; ierr = HYPRE_StructGridCreate(MPI_COMM_WORLD, ndim, &grid); ierr = HYPRE_StructGridSetExtents(grid, hlo, hhi); ierr = HYPRE_StructGridAssemble(grid); /* Create stencil */ offsets[0][0] = 0; offsets[0][1] = 0; offsets[0][2] = 0; offsets[1][0] = 1; offsets[1][1] = 0; offsets[1][2] = 0; offsets[2][0] = 0; offsets[2][1] = 1; offsets[2][2] = 0; offsets[3][0] = 0; offsets[3][1] = 0; offsets[3][2] = 1; offsets[4][0] = -1; offsets[4][1] = 0; offsets[4][2] = 0; offsets[5][0] = 0; offsets[5][1] = -1; offsets[5][2] = 0; offsets[6][0] = 0; offsets[6][1] = 0; offsets[6][2] = -1; nelems = 7; ierr = HYPRE_StructStencilCreate(ndim, nelems, &stencil); for (i=0; i<nelems; i++) { ierr = HYPRE_StructStencilSetElement(stencil, i, offsets[i]); } ncells = (hi[0]-lo[0]+1)*(hi[1]-lo[1]+1)*(hi[2]-lo[2]+1); jcnt = 7*ncells; values = (double*)malloc(jcnt*sizeof(double)); jcnt = 0; weights[0] = 6.0; weights[1] = -1.0; weights[2] = -1.0; weights[3] = -1.0; weights[4] = -1.0; weights[5] = -1.0; weights[6] = -1.0; for (i=0; i<ncells; i++) { for (j=0; j<7; j++) { values[jcnt] = weights[j]; jcnt++; } } ierr = HYPRE_StructMatrixCreate(MPI_COMM_WORLD, grid, stencil, &matrix); ierr = HYPRE_StructMatrixInitialize(matrix); for (i=0; i<7; i++) { stencil_indices[i] = i; } ierr = HYPRE_StructMatrixSetBoxValues(matrix, hlo, hhi, 7, stencil_indices, values); free(values); /* Check all six sides of current box to see if any are boundaries. Set values to zero if they are. */ if (hi[0] == idim-1) { ncells = (hi[1]-lo[1]+1)*(hi[2]-lo[2]+1); hlo[0] = idim-1; hhi[0] = idim-1; hlo[1] = lo[1]; hhi[1] = hi[1]; hlo[2] = lo[2]; hhi[2] = hi[2]; values = (double*)malloc(ncells*sizeof(double)); for (i=0; i<ncells; i++) values[i] = 0.0; i4 = 1; j4 = 1; ierr = HYPRE_StructMatrixSetBoxValues(matrix, hlo, hhi, i4, &j4, values); free(values); } if (hi[1] == jdim-1) { ncells = (hi[0]-lo[0]+1)*(hi[2]-lo[2]+1); hlo[0] = lo[0]; hhi[0] = hi[0]; hlo[1] = jdim-1; hhi[1] = jdim-1; hlo[2] = lo[2]; hhi[2] = hi[2]; values = (double*)malloc(ncells*sizeof(double)); for (i=0; i<ncells; i++) values[i] = 0.0; i4 = 1; j4 = 2; ierr = HYPRE_StructMatrixSetBoxValues(matrix, hlo, hhi, i4, &j4, values); free(values); } if (hi[2] == kdim-1) { ncells = (hi[0]-lo[0]+1)*(hi[1]-lo[1]+1); hlo[0] = lo[0]; hhi[0] = hi[0]; hlo[1] = lo[1]; hhi[1] = hi[1]; hlo[2] = kdim-1; hhi[2] = kdim-1; values = (double*)malloc(ncells*sizeof(double)); for (i=0; i<ncells; i++) values[i] = 0.0; i4 = 1; j4 = 3; ierr = HYPRE_StructMatrixSetBoxValues(matrix, hlo, hhi, i4, &j4, values); free(values); } if (lo[0] == 0) { ncells = (hi[1]-lo[1]+1)*(hi[2]-lo[2]+1); hlo[0] = 0; hhi[0] = 0; hlo[1] = lo[1]; hhi[1] = hi[1]; hlo[2] = lo[2]; hhi[2] = hi[2]; values = (double*)malloc(ncells*sizeof(double)); for (i=0; i<ncells; i++) values[i] = 0.0; i4 = 1; j4 = 4; ierr = HYPRE_StructMatrixSetBoxValues(matrix, hlo, hhi, i4, &j4, values); free(values); } if (lo[1] == 0) { ncells = (hi[0]-lo[0]+1)*(hi[2]-lo[2]+1); hlo[0] = lo[0]; hhi[0] = hi[0]; hlo[1] = 0; hhi[1] = 0; hlo[2] = lo[2]; hhi[2] = hi[2]; values = (double*)malloc(ncells*sizeof(double)); for (i=0; i<ncells; i++) values[i] = 0.0; i4 = 1; j4 = 5; ierr = HYPRE_StructMatrixSetBoxValues(matrix, hlo, hhi, i4, &j4, values); free(values); } if (lo[2] == 1) { ncells = (hi[1]-lo[1]+1)*(hi[2]-lo[2]+1); hlo[0] = lo[0]; hhi[0] = hi[0]; hlo[1] = lo[1]; hhi[1] = hi[1]; hlo[2] = 0; hhi[2] = 0; values = (double*)malloc(ncells*sizeof(double)); for (i=0; i<ncells; i++) values[i] = 0.0; i4 = 1; j4 = 6; ierr = HYPRE_StructMatrixSetBoxValues(matrix, hlo, hhi, i4, &j4, values); free(values); } ierr = HYPRE_StructMatrixAssemble(matrix); /* Create vectors for matrix-vector multiply */ ierr = HYPRE_StructVectorCreate(MPI_COMM_WORLD, grid, &vec_x); ierr = HYPRE_StructVectorInitialize(vec_x); hlo[0] = lo[0]; hlo[1] = lo[1]; hlo[2] = lo[2]; hhi[0] = hi[0]; hhi[1] = hi[1]; hhi[2] = hi[2]; ierr = HYPRE_StructVectorSetBoxValues(vec_x, hlo, hhi, vector); ierr = HYPRE_StructVectorAssemble(vec_x); NGA_Distribution(g_a_i,me,blo,bhi); if (bhi[1] > ntot-1) { bhi[1] = ntot-1; } btot = (hi[0]-lo[0]+1)*(hi[1]-lo[1]+1)*(hi[2]-lo[2]+1); for (i=0; i<btot; i++) vector[i] = 0.0; hlo[0] = lo[0]; hlo[1] = lo[1]; hlo[2] = lo[2]; hhi[0] = hi[0]; hhi[1] = hi[1]; hhi[2] = hi[2]; ierr = HYPRE_StructVectorGetBoxValues(vec_x, hlo, hhi, vector); for (i=0; i<btot; i++) vector[i] = 0.0; ierr = HYPRE_StructVectorCreate(MPI_COMM_WORLD, grid, &vec_y); ierr = HYPRE_StructVectorInitialize(vec_y); ierr = HYPRE_StructVectorSetBoxValues(vec_y, hlo, hhi, vector); ierr = HYPRE_StructVectorAssemble(vec_y); #endif /* Multiply sparse matrix. Start by accessing pointers to local portions of g_a_data, g_a_j, g_a_i */ NGA_Sync(); for (iloop=0; iloop<LOOPNUM; iloop++) { t_beg2 = GA_Wtime(); NGA_Distribution(g_c,me,blo,bhi); NGA_Access(g_c,blo,bhi,&p_c,&ld_c); for (i = 0; i<bhi[0]-blo[0]+1; i++) { p_c[i] = 0.0; } /* get number of matrix blocks coupled to this process */ NGA_Get(g_a_i,&me,&me,&lo_bl,&one); #if 1 NGA_Get(g_a_i,&me_plus,&me_plus,&hi_bl,&one); hi_bl--; total_procs = hi_bl - lo_bl + 1; blk_ptr = (void**)malloc(sizeof(void*)); /* Loop through matrix blocks */ ioff = 0; for (iblock = 0; iblock<total_procs; iblock++) { t_beg = GA_Wtime(); jdx = lo_bl+iblock; #if 0 GP_Access_element(g_a_data, &jdx, &blk_ptr[0], &isize); #endif #if 1 GP_Get_size(g_a_data, &jdx, &jdx, &isize); #endif blk = (void*)malloc(isize); #if 1 GP_Get(g_a_data, &jdx, &jdx, blk, blk_ptr, &one, &blk_size, &one, &tsize, 0); #endif t_gp_get = t_gp_get + GA_Wtime() - t_beg; iparams = (int*)blk_ptr[0]; rval = (double*)(iparams+7); imin = iparams[0]; imax = iparams[1]; jmin = iparams[2]; jmax = iparams[3]; irow = iparams[4]; icol = iparams[5]; nnz = iparams[6]; jval = (int*)(rval+nnz); ival = (int*)(jval+nnz); nrows = imax - imin + 1; bvec = (double*)malloc((jmax-jmin+1)*sizeof(double)); j = 0; t_beg = GA_Wtime(); NGA_Get(g_b,&jmin,&jmax,bvec,&j); t_get = t_get + GA_Wtime() - t_beg; t_beg = GA_Wtime(); for (i=0; i<nrows; i++) { kmin = ival[i]; kmax = ival[i+1]-1; tempc = 0.0; for (j = kmin; j<=kmax; j++) { jj = jval[j]; tempc = tempc + rval[j]*bvec[jj]; } p_c[i] = p_c[i] + tempc; } t_mult = t_mult + GA_Wtime() - t_beg; free(bvec); free(blk); } NGA_Sync(); t_ga_tot = t_ga_tot + GA_Wtime() - t_beg2; NGA_Distribution(g_c,me,blo,bhi); NGA_Release(g_c,blo,bhi); #if USE_HYPRE alpha = 1.0; beta = 0.0; t_beg = GA_Wtime(); ierr = HYPRE_StructMatrixMatvec(alpha, matrix, vec_x, beta, vec_y); t_hypre_strct = t_hypre_strct + GA_Wtime() - t_beg; hlo[0] = lo[0]; hlo[1] = lo[1]; hlo[2] = lo[2]; hhi[0] = hi[0]; hhi[1] = hi[1]; hhi[2] = hi[2]; ierr = HYPRE_StructVectorGetBoxValues(vec_y, hlo, hhi, vector); NGA_Distribution(g_c,me,hlo,hhi); cbuf = (double*)malloc((hhi[0]-hlo[0]+1)*sizeof(double)); NGA_Get(g_c,hlo,hhi,cbuf,&one); prdot = 0.0; dotga = 0.0; dothypre = 0.0; for (i=0; i<(hhi[0]-hlo[0]+1); i++) { dothypre = dothypre + vector[i]*vector[i]; dotga = dotga + cbuf[i]*cbuf[i]; prdot = prdot + (vector[i]-cbuf[i])*(vector[i]-cbuf[i]); } NGA_Dgop(&dotga,1,"+"); NGA_Dgop(&dothypre,1,"+"); NGA_Dgop(&prdot,1,"+"); gatot += sqrt(dotga); hypretot += sqrt(dothypre); prtot += sqrt(prdot); free(cbuf); #endif /* Transpose matrix. Start by making local copies of ival and jval arrays for the sparse matrix of blocks stored in the GP array */ #if 1 t_beg2 = GA_Wtime(); t_beg3 = GA_Wtime(); iblk = (int*)malloc((nprocs+1)*sizeof(int)); iblk_t = (int*)malloc((nprocs+1)*sizeof(int)); #if 0 NGA_Get(g_a_i,&zero,&nprocs,iblk,&one); #else if (me == 0) { NGA_Get(g_a_i,&zero,&nprocs,iblk,&one); } else { for (i=0; i<nprocs+1; i++) { iblk[i] = 0; } } GA_Igop(iblk,nprocs+1,"+"); #endif jblk = (int*)malloc(iblk[nprocs]*sizeof(int)); jblk_t = (int*)malloc(iblk[nprocs]*sizeof(int)); iblock = iblk[nprocs]-1; #if 0 NGA_Get(g_a_j,&zero,&iblock,jblk,&one); #else if (me == 0) { NGA_Get(g_a_j,&zero,&iblock,jblk,&one); } else { for (i=0; i<iblock+1; i++) { jblk[i] = 0; } } GA_Igop(jblk,iblock+1,"+"); #endif iblock++; blkidx = (int*)malloc(iblk[nprocs]*sizeof(int)); blkidx_t = (int*)malloc(iblk[nprocs]*sizeof(int)); for (i=0; i<iblock; i++) { blkidx[i] = i; } iblock = nprocs; t_get_blk_csr = t_get_blk_csr + GA_Wtime() - t_beg3; t_beg3 = GA_Wtime(); stran(iblock, iblock, iblk, jblk, blkidx, iblk_t, jblk_t, blkidx_t); t_trans_blk_csr = t_trans_blk_csr + GA_Wtime() - t_beg3; t_beg3 = GA_Wtime(); gt_a_data = GP_Create_handle(); i = iblk_t[nprocs]; GP_Set_dimensions(gt_a_data, one, &i); GP_Set_irreg_distr(gt_a_data, iblk_t, &nprocs); GP_Allocate(gt_a_data); gt_a_j = NGA_Create_handle(); i = iblk_t[nprocs]; NGA_Set_data(gt_a_j, one, &i, C_INT); NGA_Set_irreg_distr(gt_a_j, iblk_t, &nprocs); NGA_Allocate(gt_a_j); gt_a_i = NGA_Create_handle(); i = nprocs+1; NGA_Set_data(gt_a_i,one,&i,C_INT); for (i=0; i<nprocs; i++) mapc[i] = i; NGA_Set_irreg_distr(gt_a_i, mapc, &nprocs); NGA_Allocate(gt_a_i); /* copy i and j arrays of transposed matrix into distributed arrays */ if (me==0) { lo_bl = 0; hi_bl = nprocs; NGA_Put(gt_a_i,&lo_bl,&hi_bl,iblk_t,&one); lo_bl = 0; hi_bl = iblk_t[nprocs]-1; NGA_Put(gt_a_j,&lo_bl,&hi_bl,jblk_t,&one); } NGA_Sync(); lo_bl = iblk[me]; hi_bl = iblk[me+1]; total_procs = hi_bl - lo_bl + 1; total_procs = hi_bl - lo_bl; t_create_csr_ga = t_create_csr_ga + GA_Wtime() - t_beg3; for (iblock = lo_bl; iblock < hi_bl; iblock++) { t_beg4 = GA_Wtime(); jdx = blkidx_t[iblock]; GP_Get_size(g_a_data, &jdx, &jdx, &isize); blk = (void*)malloc(isize); GP_Get(g_a_data, &jdx, &jdx, blk, blk_ptr, &one, &blk_size, &one, &tsize, 0); /* Parameters for original block */ iparams = (int*)blk_ptr[0]; rval = (double*)(iparams+7); imin = iparams[0]; imax = iparams[1]; jmin = iparams[2]; jmax = iparams[3]; irow = iparams[4]; icol = iparams[5]; nnz = iparams[6]; jval = (int*)(rval+nnz); ival = (int*)(jval+nnz); /* Create transposed block */ isize = 7*sizeof(int) + nnz*(sizeof(double)+sizeof(int)) + (jmax-jmin+2)*sizeof(int); t_gp_tget = t_gp_tget + GA_Wtime() - t_beg4; t_beg4 = GA_Wtime(); tblk_ptr = (int*)GP_Malloc(isize); t_gp_malloc = t_gp_malloc + GA_Wtime() - t_beg4; t_beg3 = GA_Wtime(); iparamst = (int*)tblk_ptr; rvalt = (double*)(iparamst+7); jvalt = (int*)(rvalt+nnz); ivalt = (int*)(jvalt+nnz); iparamst[0] = jmin; iparamst[1] = jmax; iparamst[2] = imin; iparamst[3] = imax; iparamst[4] = icol; iparamst[5] = irow; iparamst[6] = nnz; i = imax-imin+1; j = jmax-jmin+1; stranr(i, j, ival, jval, rval, ivalt, jvalt, rvalt); t_trans_blk = t_trans_blk + GA_Wtime() - t_beg3; t_beg4 = GA_Wtime(); GP_Assign_local_element(gt_a_data, &iblock, (void*)tblk_ptr, isize); t_gp_assign = t_gp_assign + GA_Wtime() - t_beg4; #if 1 free(blk); #endif } /* Clean up after transpose */ #if 1 free(iblk); free(iblk_t); free(jblk); free(jblk_t); free(blkidx); free(blkidx_t); #endif NGA_Sync(); t_ga_trans = t_ga_trans + GA_Wtime() - t_beg2; #if USE_HYPRE alpha = 1.0; beta = 0.0; ierr = HYPRE_StructMatrixMatvec(alpha, matrix, vec_x, beta, vec_y); hlo[0] = lo[0]; hlo[1] = lo[1]; hlo[2] = lo[2]; hhi[0] = hi[0]; hhi[1] = hi[1]; hhi[2] = hi[2]; ierr = HYPRE_StructVectorGetBoxValues(vec_y, hlo, hhi, vector); NGA_Distribution(g_c,me,hlo,hhi); cbuf = (double*)malloc((hhi[0]-hlo[0]+1)*sizeof(double)); NGA_Get(g_c,hlo,hhi,cbuf,&one); dothypre = 0.0; dotga = 0.0; prdot2 = 0.0; for (i=0; i<(hhi[0]-hlo[0]+1); i++) { dothypre = dothypre + vector[i]*vector[i]; dotga = dotga + cbuf[i]*cbuf[i]; if (fabs(vector[i]-cbuf[i]) > 1.0e-10) { printf("p[%d] i: %d vector: %f cbuf: %f\n",me,i,vector[i],cbuf[i]); } prdot2 = prdot2 + (vector[i]-cbuf[i])*(vector[i]-cbuf[i]); } NGA_Dgop(&dotga,1,"+"); NGA_Dgop(&dothypre,1,"+"); NGA_Dgop(&prdot2,1,"+"); prtot2 += sqrt(prdot2); gatot2 += sqrt(dotga); hypretot2 += sqrt(dothypre); free(cbuf); free(blk_ptr); #endif /* Clean up transposed matrix */ GP_Distribution(gt_a_data,me,blo,bhi); for (i=blo[0]; i<bhi[0]; i++) { GP_Free(GP_Free_local_element(gt_a_data,&i)); } GP_Destroy(gt_a_data); NGA_Destroy(gt_a_i); NGA_Destroy(gt_a_j); #endif #endif } free(vector); #if USE_HYPRE if (me == 0) { printf("Magnitude of GA solution: %e\n", gatot/((double)LOOPNUM)); printf("Magnitude of HYPRE solution: %e\n", hypretot/((double)LOOPNUM)); printf("Magnitude of GA solution(2): %e\n", gatot2/((double)LOOPNUM)); printf("Magnitude of HYPRE solution(2): %e\n", hypretot2/((double)LOOPNUM)); printf("Difference between GA and HYPRE (Struct) results: %e\n", prtot/((double)LOOPNUM)); printf("Difference between transpose and HYPRE results: %e\n", prtot2/((double)LOOPNUM)); } #endif /* Clean up arrays */ NGA_Destroy(g_b); NGA_Destroy(g_c); GP_Distribution(g_a_data,me,blo,bhi); for (i=blo[0]; i<bhi[0]; i++) { GP_Free(GP_Free_local_element(g_a_data,&i)); } GP_Destroy(g_a_data); NGA_Destroy(g_a_i); NGA_Destroy(g_a_j); #if USE_HYPRE ierr = HYPRE_StructStencilDestroy(stencil); ierr = HYPRE_StructGridDestroy(grid); ierr = HYPRE_StructMatrixDestroy(matrix); ierr = HYPRE_StructVectorDestroy(vec_x); ierr = HYPRE_StructVectorDestroy(vec_y); #endif NGA_Dgop(&t_cnstrct,1,"+"); NGA_Dgop(&t_get,1,"+"); NGA_Dgop(&t_gp_get,1,"+"); NGA_Dgop(&t_mult,1,"+"); NGA_Dgop(&t_ga_tot,1,"+"); NGA_Dgop(&t_ga_trans,1,"+"); NGA_Dgop(&t_get_blk_csr,1,"+"); NGA_Dgop(&t_trans_blk_csr,1,"+"); NGA_Dgop(&t_trans_blk,1,"+"); NGA_Dgop(&t_create_csr_ga,1,"+"); NGA_Dgop(&t_gp_tget,1,"+"); NGA_Dgop(&t_gp_malloc,1,"+"); NGA_Dgop(&t_gp_assign,1,"+"); #if USE_HYPRE NGA_Dgop(&t_hypre_strct,1,"+"); #endif free(mapc); if (me == 0) { printf("Time to create sparse matrix: %12.4f\n", t_cnstrct/((double)(nprocs*LOOPNUM))); printf("Time to get right hand side vector: %12.4f\n", t_get/((double)(nprocs*LOOPNUM))); printf("Time to get GP blocks: %12.4f\n", t_gp_get/((double)(nprocs*LOOPNUM))); printf("Time for sparse matrix block multiplication: %12.4f\n", t_mult/((double)(nprocs*LOOPNUM))); printf("Time for total sparse matrix multiplication: %12.4f\n", t_ga_tot/((double)(nprocs*LOOPNUM))); #if USE_HYPRE printf("Total time for HYPRE (Struct) matrix-vector multiply:%12.4f\n", t_hypre_strct/((double)(nprocs*LOOPNUM))); #endif printf("Time to get block CSR distribution: %12.4f\n", t_get_blk_csr/((double)(nprocs*LOOPNUM))); printf("Time for transposing block CSR distribution: %12.4f\n", t_trans_blk_csr/((double)(nprocs*LOOPNUM))); printf("Time for creating transposed block CSR GA: %12.4f\n", t_create_csr_ga/((double)(nprocs*LOOPNUM))); printf("Time for transposing blocks: %12.4f\n", t_trans_blk/((double)(nprocs*LOOPNUM))); printf("Time to get GP blocks for transpose: %12.4f\n", t_gp_tget/((double)(nprocs*LOOPNUM))); printf("Time to malloc GP blocks for transpose: %12.4f\n", t_gp_malloc/((double)(nprocs*LOOPNUM))); printf("Time to assign GP blocks for transpose: %12.4f\n", t_gp_assign/((double)(nprocs*LOOPNUM))); printf("Time for total sparse matrix transpose: %12.4f\n", t_ga_trans/((double)(nprocs*LOOPNUM))); } if (me==0) { printf("Terminating GA library\n"); } NGA_Terminate(); /* *** Tidy up after message-passing library */ ierr = MPI_Finalize(); }
int main (int argc, char *argv[]) { int i, j; int myid, num_procs; HYPRE_StructGrid grid; HYPRE_StructStencil stencil; HYPRE_StructMatrix A; HYPRE_StructVector b; HYPRE_StructVector x; HYPRE_StructSolver solver; HYPRE_StructSolver precond; /* Initialize MPI */ MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &myid); MPI_Comm_size(MPI_COMM_WORLD, &num_procs); if (num_procs != 2) { if (myid ==0) printf("Must run with 2 processors!\n"); MPI_Finalize(); return(0); } /* 1. Set up a grid */ { /* Create an empty 2D grid object */ HYPRE_StructGridCreate(MPI_COMM_WORLD, 2, &grid); /* Processor 0 owns two boxes in the grid. */ if (myid == 0) { /* Add a new box to the grid */ { int ilower[2] = {-3, 1}; int iupper[2] = {-1, 2}; HYPRE_StructGridSetExtents(grid, ilower, iupper); } /* Add a new box to the grid */ { int ilower[2] = {0, 1}; int iupper[2] = {2, 4}; HYPRE_StructGridSetExtents(grid, ilower, iupper); } } /* Processor 1 owns one box in the grid. */ else if (myid == 1) { /* Add a new box to the grid */ { int ilower[2] = {3, 1}; int iupper[2] = {6, 4}; HYPRE_StructGridSetExtents(grid, ilower, iupper); } } /* This is a collective call finalizing the grid assembly. The grid is now ``ready to be used'' */ HYPRE_StructGridAssemble(grid); } /* 2. Define the discretization stencil */ { /* Create an empty 2D, 5-pt stencil object */ HYPRE_StructStencilCreate(2, 5, &stencil); /* Define the geometry of the stencil. Each represents a relative offset (in the index space). */ { int entry; int offsets[5][2] = {{0,0}, {-1,0}, {1,0}, {0,-1}, {0,1}}; /* Assign each of the 5 stencil entries */ for (entry = 0; entry < 5; entry++) HYPRE_StructStencilSetElement(stencil, entry, offsets[entry]); } } /* 3. Set up a Struct Matrix */ { /* Create an empty matrix object */ HYPRE_StructMatrixCreate(MPI_COMM_WORLD, grid, stencil, &A); /* Indicate that the matrix coefficients are ready to be set */ HYPRE_StructMatrixInitialize(A); if (myid == 0) { /* Set the matrix coefficients for some set of stencil entries over all the gridpoints in my first box (account for boundary grid points later) */ { int ilower[2] = {-3, 1}; int iupper[2] = {-1, 2}; int nentries = 5; int nvalues = 30; /* 6 grid points, each with 5 stencil entries */ double values[30]; int stencil_indices[5]; for (j = 0; j < nentries; j++) /* label the stencil indices - these correspond to the offsets defined above */ stencil_indices[j] = j; for (i = 0; i < nvalues; i += nentries) { values[i] = 4.0; for (j = 1; j < nentries; j++) values[i+j] = -1.0; } HYPRE_StructMatrixSetBoxValues(A, ilower, iupper, nentries, stencil_indices, values); } /* Set the matrix coefficients for some set of stencil entries over the gridpoints in my second box */ { int ilower[2] = {0, 1}; int iupper[2] = {2, 4}; int nentries = 5; int nvalues = 60; /* 12 grid points, each with 5 stencil entries */ double values[60]; int stencil_indices[5]; for (j = 0; j < nentries; j++) stencil_indices[j] = j; for (i = 0; i < nvalues; i += nentries) { values[i] = 4.0; for (j = 1; j < nentries; j++) values[i+j] = -1.0; } HYPRE_StructMatrixSetBoxValues(A, ilower, iupper, nentries, stencil_indices, values); } } else if (myid == 1) { /* Set the matrix coefficients for some set of stencil entries over the gridpoints in my box */ { int ilower[2] = {3, 1}; int iupper[2] = {6, 4}; int nentries = 5; int nvalues = 80; /* 16 grid points, each with 5 stencil entries */ double values[80]; int stencil_indices[5]; for (j = 0; j < nentries; j++) stencil_indices[j] = j; for (i = 0; i < nvalues; i += nentries) { values[i] = 4.0; for (j = 1; j < nentries; j++) values[i+j] = -1.0; } HYPRE_StructMatrixSetBoxValues(A, ilower, iupper, nentries, stencil_indices, values); } } /* For each box, set any coefficients that reach ouside of the boundary to 0 */ if (myid == 0) { int maxnvalues = 6; double values[6]; for (i = 0; i < maxnvalues; i++) values[i] = 0.0; { /* Values below our first AND second box */ int ilower[2] = {-3, 1}; int iupper[2] = { 2, 1}; int stencil_indices[1] = {3}; HYPRE_StructMatrixSetBoxValues(A, ilower, iupper, 1, stencil_indices, values); } { /* Values to the left of our first box */ int ilower[2] = {-3, 1}; int iupper[2] = {-3, 2}; int stencil_indices[1] = {1}; HYPRE_StructMatrixSetBoxValues(A, ilower, iupper, 1, stencil_indices, values); } { /* Values above our first box */ int ilower[2] = {-3, 2}; int iupper[2] = {-1, 2}; int stencil_indices[1] = {4}; HYPRE_StructMatrixSetBoxValues(A, ilower, iupper, 1, stencil_indices, values); } { /* Values to the left of our second box (that do not border the first box). */ int ilower[2] = { 0, 3}; int iupper[2] = { 0, 4}; int stencil_indices[1] = {1}; HYPRE_StructMatrixSetBoxValues(A, ilower, iupper, 1, stencil_indices, values); } { /* Values above our second box */ int ilower[2] = { 0, 4}; int iupper[2] = { 2, 4}; int stencil_indices[1] = {4}; HYPRE_StructMatrixSetBoxValues(A, ilower, iupper, 1, stencil_indices, values); } } else if (myid == 1) { int maxnvalues = 4; double values[4]; for (i = 0; i < maxnvalues; i++) values[i] = 0.0; { /* Values below our box */ int ilower[2] = { 3, 1}; int iupper[2] = { 6, 1}; int stencil_indices[1] = {3}; HYPRE_StructMatrixSetBoxValues(A, ilower, iupper, 1, stencil_indices, values); } { /* Values to the right of our box */ int ilower[2] = { 6, 1}; int iupper[2] = { 6, 4}; int stencil_indices[1] = {2}; HYPRE_StructMatrixSetBoxValues(A, ilower, iupper, 1, stencil_indices, values); } { /* Values above our box */ int ilower[2] = { 3, 4}; int iupper[2] = { 6, 4}; int stencil_indices[1] = {4}; HYPRE_StructMatrixSetBoxValues(A, ilower, iupper, 1, stencil_indices, values); } } /* This is a collective call finalizing the matrix assembly. The matrix is now ``ready to be used'' */ HYPRE_StructMatrixAssemble(A); } /* 4. Set up Struct Vectors for b and x */ { /* Create an empty vector object */ HYPRE_StructVectorCreate(MPI_COMM_WORLD, grid, &b); HYPRE_StructVectorCreate(MPI_COMM_WORLD, grid, &x); /* Indicate that the vector coefficients are ready to be set */ HYPRE_StructVectorInitialize(b); HYPRE_StructVectorInitialize(x); if (myid == 0) { /* Set the vector coefficients over the gridpoints in my first box */ { int ilower[2] = {-3, 1}; int iupper[2] = {-1, 2}; int nvalues = 6; /* 6 grid points */ double values[6]; for (i = 0; i < nvalues; i ++) values[i] = 1.0; HYPRE_StructVectorSetBoxValues(b, ilower, iupper, values); for (i = 0; i < nvalues; i ++) values[i] = 0.0; HYPRE_StructVectorSetBoxValues(x, ilower, iupper, values); } /* Set the vector coefficients over the gridpoints in my second box */ { int ilower[2] = { 0, 1}; int iupper[2] = { 2, 4}; int nvalues = 12; /* 12 grid points */ double values[12]; for (i = 0; i < nvalues; i ++) values[i] = 1.0; HYPRE_StructVectorSetBoxValues(b, ilower, iupper, values); for (i = 0; i < nvalues; i ++) values[i] = 0.0; HYPRE_StructVectorSetBoxValues(x, ilower, iupper, values); } } else if (myid == 1) { /* Set the vector coefficients over the gridpoints in my box */ { int ilower[2] = { 3, 1}; int iupper[2] = { 6, 4}; int nvalues = 16; /* 16 grid points */ double values[16]; for (i = 0; i < nvalues; i ++) values[i] = 1.0; HYPRE_StructVectorSetBoxValues(b, ilower, iupper, values); for (i = 0; i < nvalues; i ++) values[i] = 0.0; HYPRE_StructVectorSetBoxValues(x, ilower, iupper, values); } } /* This is a collective call finalizing the vector assembly. The vectors are now ``ready to be used'' */ HYPRE_StructVectorAssemble(b); HYPRE_StructVectorAssemble(x); } /* 5. Set up and use a solver (See the Reference Manual for descriptions of all of the options.) */ { /* Create an empty PCG Struct solver */ HYPRE_StructPCGCreate(MPI_COMM_WORLD, &solver); /* Set PCG parameters */ HYPRE_StructPCGSetTol(solver, 1.0e-06); HYPRE_StructPCGSetPrintLevel(solver, 2); HYPRE_StructPCGSetMaxIter(solver, 50); /* Use symmetric SMG as preconditioner */ HYPRE_StructSMGCreate(MPI_COMM_WORLD, &precond); HYPRE_StructSMGSetMaxIter(precond, 1); HYPRE_StructSMGSetTol(precond, 0.0); HYPRE_StructSMGSetZeroGuess(precond); HYPRE_StructSMGSetNumPreRelax(precond, 1); HYPRE_StructSMGSetNumPostRelax(precond, 1); /* Set preconditioner and solve */ HYPRE_StructPCGSetPrecond(solver, HYPRE_StructSMGSolve, HYPRE_StructSMGSetup, precond); HYPRE_StructPCGSetup(solver, A, b, x); HYPRE_StructPCGSolve(solver, A, b, x); } /* Free memory */ HYPRE_StructGridDestroy(grid); HYPRE_StructStencilDestroy(stencil); HYPRE_StructMatrixDestroy(A); HYPRE_StructVectorDestroy(b); HYPRE_StructVectorDestroy(x); HYPRE_StructPCGDestroy(solver); HYPRE_StructSMGDestroy(precond); /* Finalize MPI */ MPI_Finalize(); return (0); }