void HypreSolver2D::set_x0(double *x0) { HYPRE_IJVectorInitialize(hv_x); HYPRE_IJVectorSetValues(hv_x, local_size, &rows[0], x0); HYPRE_IJVectorAssemble(hv_x); HYPRE_IJVectorGetObject(hv_x, (void **) &par_x); }
void HypreSolver2D::set_rhs(double *rhs) { for(int i=0; i<local_size; i++) rhs[i] = rhs[i]*h2; HYPRE_IJVectorInitialize(hv_b); HYPRE_IJVectorSetValues(hv_b, local_size, &rows[0], rhs); HYPRE_IJVectorAssemble(hv_b); HYPRE_IJVectorGetObject(hv_b, (void **) &par_b); }
int HYPRE_IJVectorRead( const char *filename, MPI_Comm comm, int type, HYPRE_IJVector *vector_ptr ) { HYPRE_IJVector vector; HYPRE_BigInt jlower, jupper, j; double value; int myid; char new_filename[255]; FILE *file; MPI_Comm_rank(comm, &myid); sprintf(new_filename,"%s.%05d", filename, myid); if ((file = fopen(new_filename, "r")) == NULL) { printf("Error: can't open input file %s\n", new_filename); hypre_error_in_arg(1); return hypre_error_flag; } #ifdef HYPRE_LONG_LONG fscanf(file, "%lld %lld", &jlower, &jupper); #else fscanf(file, "%d %d", &jlower, &jupper); #endif HYPRE_IJVectorCreate(comm, jlower, jupper, &vector); HYPRE_IJVectorSetObjectType(vector, type); HYPRE_IJVectorInitialize(vector); #ifdef HYPRE_LONG_LONG while ( fscanf(file, "%lld %le", &j, &value) != EOF ) #else while ( fscanf(file, "%d %le", &j, &value) != EOF ) #endif { HYPRE_IJVectorSetValues(vector, 1, &j, &value); } HYPRE_IJVectorAssemble(vector); fclose(file); *vector_ptr = vector; return hypre_error_flag; }
/* Set initial conditions in u */ static void SetInitialProfiles(HYPRE_IJVector Uij, UserData data, long int local_length, long int my_base) { int isubx, isuby, lx, ly, jx, jy; long int offset; realtype dx, dy, x, y, cx, cy, xmid, ymid; realtype *udata; HYPRE_Int *iglobal; /* Set pointer to data array in vector u */ udata = (realtype*) malloc(local_length*sizeof(realtype)); iglobal = (HYPRE_Int*) malloc(local_length*sizeof(HYPRE_Int)); /* Get mesh spacings, and subgrid indices for this PE */ dx = data->dx; dy = data->dy; isubx = data->isubx; isuby = data->isuby; /* Load initial profiles of c1 and c2 into local u vector. Here lx and ly are local mesh point indices on the local subgrid, and jx and jy are the global mesh point indices. */ offset = 0; xmid = RCONST(0.5)*(XMIN + XMAX); ymid = RCONST(0.5)*(YMIN + YMAX); for (ly = 0; ly < MYSUB; ly++) { jy = ly + isuby*MYSUB; y = YMIN + jy*dy; cy = SUNSQR(RCONST(0.1)*(y - ymid)); cy = RCONST(1.0) - cy + RCONST(0.5)*SUNSQR(cy); for (lx = 0; lx < MXSUB; lx++) { jx = lx + isubx*MXSUB; x = XMIN + jx*dx; cx = SUNSQR(RCONST(0.1)*(x - xmid)); cx = RCONST(1.0) - cx + RCONST(0.5)*SUNSQR(cx); iglobal[offset] = my_base + offset; udata[offset++] = C1_SCALE*cx*cy; iglobal[offset] = my_base + offset; udata[offset++] = C2_SCALE*cx*cy; } } HYPRE_IJVectorSetValues(Uij, local_length, iglobal, udata); free(iglobal); free(udata); }
static void SetIC(HYPRE_IJVector Uij, realtype dx, long int my_length, long int my_base) { int i; HYPRE_Int *iglobal; realtype x; realtype *udata; /* Set pointer to data array and get local length of u. */ udata = (realtype*) malloc(my_length*sizeof(realtype)); iglobal = (HYPRE_Int*) malloc(my_length*sizeof(HYPRE_Int)); /* Load initial profile into u vector */ for (i = 0; i < my_length; i++) { iglobal[i] = my_base + i; x = (iglobal[i] + 1)*dx; udata[i] = x*(XMAX - x)*SUNRexp(RCONST(2.0)*x); } HYPRE_IJVectorSetValues(Uij, my_length, iglobal, udata); free(iglobal); free(udata); }
int main (int argc, char *argv[]) { HYPRE_Int i; int myid, num_procs; int N, n; HYPRE_Int ilower, iupper; HYPRE_Int local_size, extra; int solver_id; int print_solution, print_system; double h, h2; HYPRE_IJMatrix A; HYPRE_ParCSRMatrix parcsr_A; HYPRE_IJVector b; HYPRE_ParVector par_b; HYPRE_IJVector x; HYPRE_ParVector par_x; HYPRE_Solver solver, precond; /* Initialize MPI */ MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &myid); MPI_Comm_size(MPI_COMM_WORLD, &num_procs); /* Default problem parameters */ n = 33; solver_id = 0; print_solution = 0; print_system = 0; /* Parse command line */ { int arg_index = 0; int print_usage = 0; while (arg_index < argc) { if ( strcmp(argv[arg_index], "-n") == 0 ) { arg_index++; n = atoi(argv[arg_index++]); } else if ( strcmp(argv[arg_index], "-solver") == 0 ) { arg_index++; solver_id = atoi(argv[arg_index++]); } else if ( strcmp(argv[arg_index], "-print_solution") == 0 ) { arg_index++; print_solution = 1; } else if ( strcmp(argv[arg_index], "-print_system") == 0 ) { arg_index++; print_system = 1; } else if ( strcmp(argv[arg_index], "-help") == 0 ) { print_usage = 1; break; } else { arg_index++; } } if ((print_usage) && (myid == 0)) { printf("\n"); printf("Usage: %s [<options>]\n", argv[0]); printf("\n"); printf(" -n <n> : problem size in each direction (default: 33)\n"); printf(" -solver <ID> : solver ID\n"); printf(" 0 - AMG (default) \n"); printf(" 1 - AMG-PCG\n"); printf(" 8 - ParaSails-PCG\n"); printf(" 50 - PCG\n"); printf(" 61 - AMG-FlexGMRES\n"); printf(" -print_solution : print the solution vector\n"); printf(" -print_system : print the matrix and rhs\n"); printf("\n"); } if (print_usage) { MPI_Finalize(); return (0); } } /* Preliminaries: want at least one processor per row */ if (n*n < num_procs) n = sqrt(num_procs) + 1; N = n*n; /* global number of rows */ h = 1.0/(n+1); /* mesh size*/ h2 = h*h; /* Each processor knows only of its own rows - the range is denoted by ilower and upper. Here we partition the rows. We account for the fact that N may not divide evenly by the number of processors. */ local_size = N/num_procs; extra = N - local_size*num_procs; ilower = local_size*myid; ilower += hypre_min(myid, extra); iupper = local_size*(myid+1); iupper += hypre_min(myid+1, extra); iupper = iupper - 1; /* How many rows do I have? */ local_size = iupper - ilower + 1; /* Create the matrix. Note that this is a square matrix, so we indicate the row partition size twice (since number of rows = number of cols) */ HYPRE_IJMatrixCreate(MPI_COMM_WORLD, ilower, iupper, ilower, iupper, &A); /* Choose a parallel csr format storage (see the User's Manual) */ HYPRE_IJMatrixSetObjectType(A, HYPRE_PARCSR); /* Initialize before setting coefficients */ HYPRE_IJMatrixInitialize(A); /* Now go through my local rows and set the matrix entries. Each row has at most 5 entries. For example, if n=3: A = [M -I 0; -I M -I; 0 -I M] M = [4 -1 0; -1 4 -1; 0 -1 4] Note that here we are setting one row at a time, though one could set all the rows together (see the User's Manual). */ { HYPRE_Int nnz; double values[5]; HYPRE_Int cols[5]; for (i = ilower; i <= iupper; i++) { nnz = 0; /* The left identity block:position i-n */ if ((i-n)>=0) { cols[nnz] = i-n; values[nnz] = -1.0; nnz++; } /* The left -1: position i-1 */ if (i%n) { cols[nnz] = i-1; values[nnz] = -1.0; nnz++; } /* Set the diagonal: position i */ cols[nnz] = i; values[nnz] = 4.0; nnz++; /* The right -1: position i+1 */ if ((i+1)%n) { cols[nnz] = i+1; values[nnz] = -1.0; nnz++; } /* The right identity block:position i+n */ if ((i+n)< N) { cols[nnz] = i+n; values[nnz] = -1.0; nnz++; } /* Set the values for row i */ HYPRE_IJMatrixSetValues(A, 1, &nnz, &i, cols, values); } } /* Assemble after setting the coefficients */ HYPRE_IJMatrixAssemble(A); /* Note: for the testing of small problems, one may wish to read in a matrix in IJ format (for the format, see the output files from the -print_system option). In this case, one would use the following routine: HYPRE_IJMatrixRead( <filename>, MPI_COMM_WORLD, HYPRE_PARCSR, &A ); <filename> = IJ.A.out to read in what has been printed out by -print_system (processor numbers are omitted). A call to HYPRE_IJMatrixRead is an *alternative* to the following sequence of HYPRE_IJMatrix calls: Create, SetObjectType, Initialize, SetValues, and Assemble */ /* Get the parcsr matrix object to use */ HYPRE_IJMatrixGetObject(A, (void**) &parcsr_A); /* Create the rhs and solution */ HYPRE_IJVectorCreate(MPI_COMM_WORLD, ilower, iupper,&b); HYPRE_IJVectorSetObjectType(b, HYPRE_PARCSR); HYPRE_IJVectorInitialize(b); HYPRE_IJVectorCreate(MPI_COMM_WORLD, ilower, iupper,&x); HYPRE_IJVectorSetObjectType(x, HYPRE_PARCSR); HYPRE_IJVectorInitialize(x); /* Set the rhs values to h^2 and the solution to zero */ { double *rhs_values, *x_values; HYPRE_Int *rows; rhs_values = calloc(local_size, sizeof(double)); x_values = calloc(local_size, sizeof(double)); rows = calloc(local_size, sizeof(HYPRE_Int)); for (i=0; i<local_size; i++) { rhs_values[i] = h2; x_values[i] = 0.0; rows[i] = ilower + i; } HYPRE_IJVectorSetValues(b, local_size, rows, rhs_values); HYPRE_IJVectorSetValues(x, local_size, rows, x_values); free(x_values); free(rhs_values); free(rows); } HYPRE_IJVectorAssemble(b); /* As with the matrix, for testing purposes, one may wish to read in a rhs: HYPRE_IJVectorRead( <filename>, MPI_COMM_WORLD, HYPRE_PARCSR, &b ); as an alternative to the following sequence of HYPRE_IJVectors calls: Create, SetObjectType, Initialize, SetValues, and Assemble */ HYPRE_IJVectorGetObject(b, (void **) &par_b); HYPRE_IJVectorAssemble(x); HYPRE_IJVectorGetObject(x, (void **) &par_x); /* Print out the system - files names will be IJ.out.A.XXXXX and IJ.out.b.XXXXX, where XXXXX = processor id */ if (print_system) { HYPRE_IJMatrixPrint(A, "IJ.out.A"); HYPRE_IJVectorPrint(b, "IJ.out.b"); } /* Choose a solver and solve the system */ /* AMG */ if (solver_id == 0) { HYPRE_Int num_iterations; double final_res_norm; /* Create solver */ HYPRE_BoomerAMGCreate(&solver); /* Set some parameters (See Reference Manual for more parameters) */ HYPRE_BoomerAMGSetPrintLevel(solver, 3); /* print solve info + parameters */ HYPRE_BoomerAMGSetCoarsenType(solver, 6); /* Falgout coarsening */ HYPRE_BoomerAMGSetRelaxType(solver, 3); /* G-S/Jacobi hybrid relaxation */ HYPRE_BoomerAMGSetNumSweeps(solver, 1); /* Sweeeps on each level */ HYPRE_BoomerAMGSetMaxLevels(solver, 20); /* maximum number of levels */ HYPRE_BoomerAMGSetTol(solver, 1e-7); /* conv. tolerance */ /* Now setup and solve! */ HYPRE_BoomerAMGSetup(solver, parcsr_A, par_b, par_x); HYPRE_BoomerAMGSolve(solver, parcsr_A, par_b, par_x); /* Run info - needed logging turned on */ HYPRE_BoomerAMGGetNumIterations(solver, &num_iterations); HYPRE_BoomerAMGGetFinalRelativeResidualNorm(solver, &final_res_norm); if (myid == 0) { printf("\n"); printf("Iterations = %lld\n", num_iterations); printf("Final Relative Residual Norm = %e\n", final_res_norm); printf("\n"); } /* Destroy solver */ HYPRE_BoomerAMGDestroy(solver); } /* PCG */ else if (solver_id == 50) { HYPRE_Int num_iterations; double final_res_norm; /* Create solver */ HYPRE_ParCSRPCGCreate(MPI_COMM_WORLD, &solver); /* Set some parameters (See Reference Manual for more parameters) */ HYPRE_PCGSetMaxIter(solver, 1000); /* max iterations */ HYPRE_PCGSetTol(solver, 1e-7); /* conv. tolerance */ HYPRE_PCGSetTwoNorm(solver, 1); /* use the two norm as the stopping criteria */ HYPRE_PCGSetPrintLevel(solver, 2); /* prints out the iteration info */ HYPRE_PCGSetLogging(solver, 1); /* needed to get run info later */ /* Now setup and solve! */ HYPRE_ParCSRPCGSetup(solver, parcsr_A, par_b, par_x); HYPRE_ParCSRPCGSolve(solver, parcsr_A, par_b, par_x); /* Run info - needed logging turned on */ HYPRE_PCGGetNumIterations(solver, &num_iterations); HYPRE_PCGGetFinalRelativeResidualNorm(solver, &final_res_norm); if (myid == 0) { printf("\n"); printf("Iterations = %lld\n", num_iterations); printf("Final Relative Residual Norm = %e\n", final_res_norm); printf("\n"); } /* Destroy solver */ HYPRE_ParCSRPCGDestroy(solver); } /* PCG with AMG preconditioner */ else if (solver_id == 1) { HYPRE_Int num_iterations; double final_res_norm; /* Create solver */ HYPRE_ParCSRPCGCreate(MPI_COMM_WORLD, &solver); /* Set some parameters (See Reference Manual for more parameters) */ HYPRE_PCGSetMaxIter(solver, 1000); /* max iterations */ HYPRE_PCGSetTol(solver, 1e-7); /* conv. tolerance */ HYPRE_PCGSetTwoNorm(solver, 1); /* use the two norm as the stopping criteria */ HYPRE_PCGSetPrintLevel(solver, 2); /* print solve info */ HYPRE_PCGSetLogging(solver, 1); /* needed to get run info later */ /* Now set up the AMG preconditioner and specify any parameters */ HYPRE_BoomerAMGCreate(&precond); HYPRE_BoomerAMGSetPrintLevel(precond, 1); /* print amg solution info */ HYPRE_BoomerAMGSetCoarsenType(precond, 6); HYPRE_BoomerAMGSetRelaxType(precond, 6); /* Sym G.S./Jacobi hybrid */ HYPRE_BoomerAMGSetNumSweeps(precond, 1); HYPRE_BoomerAMGSetTol(precond, 0.0); /* conv. tolerance zero */ HYPRE_BoomerAMGSetMaxIter(precond, 1); /* do only one iteration! */ /* Set the PCG preconditioner */ HYPRE_PCGSetPrecond(solver, (HYPRE_PtrToSolverFcn) HYPRE_BoomerAMGSolve, (HYPRE_PtrToSolverFcn) HYPRE_BoomerAMGSetup, precond); /* Now setup and solve! */ HYPRE_ParCSRPCGSetup(solver, parcsr_A, par_b, par_x); HYPRE_ParCSRPCGSolve(solver, parcsr_A, par_b, par_x); /* Run info - needed logging turned on */ HYPRE_PCGGetNumIterations(solver, &num_iterations); HYPRE_PCGGetFinalRelativeResidualNorm(solver, &final_res_norm); if (myid == 0) { printf("\n"); printf("Iterations = %lld\n", num_iterations); printf("Final Relative Residual Norm = %e\n", final_res_norm); printf("\n"); } /* Destroy solver and preconditioner */ HYPRE_ParCSRPCGDestroy(solver); HYPRE_BoomerAMGDestroy(precond); } /* PCG with Parasails Preconditioner */ else if (solver_id == 8) { HYPRE_Int num_iterations; double final_res_norm; int sai_max_levels = 1; double sai_threshold = 0.1; double sai_filter = 0.05; int sai_sym = 1; /* Create solver */ HYPRE_ParCSRPCGCreate(MPI_COMM_WORLD, &solver); /* Set some parameters (See Reference Manual for more parameters) */ HYPRE_PCGSetMaxIter(solver, 1000); /* max iterations */ HYPRE_PCGSetTol(solver, 1e-7); /* conv. tolerance */ HYPRE_PCGSetTwoNorm(solver, 1); /* use the two norm as the stopping criteria */ HYPRE_PCGSetPrintLevel(solver, 2); /* print solve info */ HYPRE_PCGSetLogging(solver, 1); /* needed to get run info later */ /* Now set up the ParaSails preconditioner and specify any parameters */ HYPRE_ParaSailsCreate(MPI_COMM_WORLD, &precond); /* Set some parameters (See Reference Manual for more parameters) */ HYPRE_ParaSailsSetParams(precond, sai_threshold, sai_max_levels); HYPRE_ParaSailsSetFilter(precond, sai_filter); HYPRE_ParaSailsSetSym(precond, sai_sym); HYPRE_ParaSailsSetLogging(precond, 3); /* Set the PCG preconditioner */ HYPRE_PCGSetPrecond(solver, (HYPRE_PtrToSolverFcn) HYPRE_ParaSailsSolve, (HYPRE_PtrToSolverFcn) HYPRE_ParaSailsSetup, precond); /* Now setup and solve! */ HYPRE_ParCSRPCGSetup(solver, parcsr_A, par_b, par_x); HYPRE_ParCSRPCGSolve(solver, parcsr_A, par_b, par_x); /* Run info - needed logging turned on */ HYPRE_PCGGetNumIterations(solver, &num_iterations); HYPRE_PCGGetFinalRelativeResidualNorm(solver, &final_res_norm); if (myid == 0) { printf("\n"); printf("Iterations = %lld\n", num_iterations); printf("Final Relative Residual Norm = %e\n", final_res_norm); printf("\n"); } /* Destory solver and preconditioner */ HYPRE_ParCSRPCGDestroy(solver); HYPRE_ParaSailsDestroy(precond); } /* Flexible GMRES with AMG Preconditioner */ else if (solver_id == 61) { HYPRE_Int num_iterations; double final_res_norm; int restart = 30; int modify = 1; /* Create solver */ HYPRE_ParCSRFlexGMRESCreate(MPI_COMM_WORLD, &solver); /* Set some parameters (See Reference Manual for more parameters) */ HYPRE_FlexGMRESSetKDim(solver, restart); HYPRE_FlexGMRESSetMaxIter(solver, 1000); /* max iterations */ HYPRE_FlexGMRESSetTol(solver, 1e-7); /* conv. tolerance */ HYPRE_FlexGMRESSetPrintLevel(solver, 2); /* print solve info */ HYPRE_FlexGMRESSetLogging(solver, 1); /* needed to get run info later */ /* Now set up the AMG preconditioner and specify any parameters */ HYPRE_BoomerAMGCreate(&precond); HYPRE_BoomerAMGSetPrintLevel(precond, 1); /* print amg solution info */ HYPRE_BoomerAMGSetCoarsenType(precond, 6); HYPRE_BoomerAMGSetRelaxType(precond, 6); /* Sym G.S./Jacobi hybrid */ HYPRE_BoomerAMGSetNumSweeps(precond, 1); HYPRE_BoomerAMGSetTol(precond, 0.0); /* conv. tolerance zero */ HYPRE_BoomerAMGSetMaxIter(precond, 1); /* do only one iteration! */ /* Set the FlexGMRES preconditioner */ HYPRE_FlexGMRESSetPrecond(solver, (HYPRE_PtrToSolverFcn) HYPRE_BoomerAMGSolve, (HYPRE_PtrToSolverFcn) HYPRE_BoomerAMGSetup, precond); if (modify) /* this is an optional call - if you don't call it, hypre_FlexGMRESModifyPCDefault is used - which does nothing. Otherwise, you can define your own, similar to the one used here */ HYPRE_FlexGMRESSetModifyPC( solver, (HYPRE_PtrToModifyPCFcn) hypre_FlexGMRESModifyPCAMGExample); /* Now setup and solve! */ HYPRE_ParCSRFlexGMRESSetup(solver, parcsr_A, par_b, par_x); HYPRE_ParCSRFlexGMRESSolve(solver, parcsr_A, par_b, par_x); /* Run info - needed logging turned on */ HYPRE_FlexGMRESGetNumIterations(solver, &num_iterations); HYPRE_FlexGMRESGetFinalRelativeResidualNorm(solver, &final_res_norm); if (myid == 0) { printf("\n"); printf("Iterations = %lld\n", num_iterations); printf("Final Relative Residual Norm = %e\n", final_res_norm); printf("\n"); } /* Destory solver and preconditioner */ HYPRE_ParCSRFlexGMRESDestroy(solver); HYPRE_BoomerAMGDestroy(precond); } else { if (myid ==0) printf("Invalid solver id specified.\n"); } /* Print the solution */ if (print_solution) HYPRE_IJVectorPrint(x, "ij.out.x"); /* Clean up */ HYPRE_IJMatrixDestroy(A); HYPRE_IJVectorDestroy(b); HYPRE_IJVectorDestroy(x); /* Finalize MPI*/ MPI_Finalize(); return(0); }
HYPRE_Int main (HYPRE_Int argc, char *argv[]) { HYPRE_Int i; HYPRE_Int myid, num_procs; HYPRE_Int N, n; HYPRE_Int ilower, iupper; HYPRE_Int local_size, extra; HYPRE_Int solver_id; HYPRE_Int print_solution; double h, h2; #ifdef HYPRE_FORTRAN hypre_F90_Obj A; hypre_F90_Obj parcsr_A; hypre_F90_Obj b; hypre_F90_Obj par_b; hypre_F90_Obj x; hypre_F90_Obj par_x; hypre_F90_Obj solver, precond; hypre_F90_Obj long_temp_COMM; HYPRE_Int temp_COMM; HYPRE_Int precond_id; HYPRE_Int one = 1; HYPRE_Int two = 2; HYPRE_Int three = 3; HYPRE_Int six = 6; HYPRE_Int twenty = 20; HYPRE_Int thousand = 1000; HYPRE_Int hypre_type = HYPRE_PARCSR; double oo1 = 1.e-3; double tol = 1.e-7; #else HYPRE_IJMatrix A; HYPRE_ParCSRMatrix parcsr_A; HYPRE_IJVector b; HYPRE_ParVector par_b; HYPRE_IJVector x; HYPRE_ParVector par_x; HYPRE_Solver solver, precond; #endif /* Initialize MPI */ hypre_MPI_Init(&argc, &argv); hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &myid); hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &num_procs); /* Default problem parameters */ n = 33; solver_id = 0; print_solution = 0; /* Parse command line */ { HYPRE_Int arg_index = 0; HYPRE_Int print_usage = 0; while (arg_index < argc) { if ( strcmp(argv[arg_index], "-n") == 0 ) { arg_index++; n = atoi(argv[arg_index++]); } else if ( strcmp(argv[arg_index], "-solver") == 0 ) { arg_index++; solver_id = atoi(argv[arg_index++]); } else if ( strcmp(argv[arg_index], "-print_solution") == 0 ) { arg_index++; print_solution = 1; } else if ( strcmp(argv[arg_index], "-help") == 0 ) { print_usage = 1; break; } else { arg_index++; } } if ((print_usage) && (myid == 0)) { hypre_printf("\n"); hypre_printf("Usage: %s [<options>]\n", argv[0]); hypre_printf("\n"); hypre_printf(" -n <n> : problem size in each direction (default: 33)\n"); hypre_printf(" -solver <ID> : solver ID\n"); hypre_printf(" 0 - AMG (default) \n"); hypre_printf(" 1 - AMG-PCG\n"); hypre_printf(" 8 - ParaSails-PCG\n"); hypre_printf(" 50 - PCG\n"); hypre_printf(" -print_solution : print the solution vector\n"); hypre_printf("\n"); } if (print_usage) { hypre_MPI_Finalize(); return (0); } } /* Preliminaries: want at least one processor per row */ if (n*n < num_procs) n = sqrt(num_procs) + 1; N = n*n; /* global number of rows */ h = 1.0/(n+1); /* mesh size*/ h2 = h*h; /* Each processor knows only of its own rows - the range is denoted by ilower and upper. Here we partition the rows. We account for the fact that N may not divide evenly by the number of processors. */ local_size = N/num_procs; extra = N - local_size*num_procs; ilower = local_size*myid; ilower += hypre_min(myid, extra); iupper = local_size*(myid+1); iupper += hypre_min(myid+1, extra); iupper = iupper - 1; /* How many rows do I have? */ local_size = iupper - ilower + 1; /* Create the matrix. Note that this is a square matrix, so we indicate the row partition size twice (since number of rows = number of cols) */ #ifdef HYPRE_FORTRAN long_temp_COMM = (hypre_F90_Obj) hypre_MPI_COMM_WORLD; temp_COMM = (HYPRE_Int) hypre_MPI_COMM_WORLD; HYPRE_IJMatrixCreate(&long_temp_COMM, &ilower, &iupper, &ilower, &iupper, &A); #else HYPRE_IJMatrixCreate(hypre_MPI_COMM_WORLD, ilower, iupper, ilower, iupper, &A); #endif /* Choose a parallel csr format storage (see the User's Manual) */ #ifdef HYPRE_FORTRAN HYPRE_IJMatrixSetObjectType(&A, &hypre_type); #else HYPRE_IJMatrixSetObjectType(A, HYPRE_PARCSR); #endif /* Initialize before setting coefficients */ #ifdef HYPRE_FORTRAN HYPRE_IJMatrixInitialize(&A); #else HYPRE_IJMatrixInitialize(A); #endif /* Now go through my local rows and set the matrix entries. Each row has at most 5 entries. For example, if n=3: A = [M -I 0; -I M -I; 0 -I M] M = [4 -1 0; -1 4 -1; 0 -1 4] Note that here we are setting one row at a time, though one could set all the rows together (see the User's Manual). */ { HYPRE_Int nnz; double values[5]; HYPRE_Int cols[5]; for (i = ilower; i <= iupper; i++) { nnz = 0; /* The left identity block:position i-n */ if ((i-n)>=0) { cols[nnz] = i-n; values[nnz] = -1.0; nnz++; } /* The left -1: position i-1 */ if (i%n) { cols[nnz] = i-1; values[nnz] = -1.0; nnz++; } /* Set the diagonal: position i */ cols[nnz] = i; values[nnz] = 4.0; nnz++; /* The right -1: position i+1 */ if ((i+1)%n) { cols[nnz] = i+1; values[nnz] = -1.0; nnz++; } /* The right identity block:position i+n */ if ((i+n)< N) { cols[nnz] = i+n; values[nnz] = -1.0; nnz++; } /* Set the values for row i */ #ifdef HYPRE_FORTRAN HYPRE_IJMatrixSetValues(&A, &one, &nnz, &i, &cols[0], &values[0]); #else HYPRE_IJMatrixSetValues(A, 1, &nnz, &i, cols, values); #endif } } /* Assemble after setting the coefficients */ #ifdef HYPRE_FORTRAN HYPRE_IJMatrixAssemble(&A); #else HYPRE_IJMatrixAssemble(A); #endif /* Get the parcsr matrix object to use */ #ifdef HYPRE_FORTRAN HYPRE_IJMatrixGetObject(&A, &parcsr_A); HYPRE_IJMatrixGetObject(&A, &parcsr_A); #else HYPRE_IJMatrixGetObject(A, (void**) &parcsr_A); HYPRE_IJMatrixGetObject(A, (void**) &parcsr_A); #endif /* Create the rhs and solution */ #ifdef HYPRE_FORTRAN HYPRE_IJVectorCreate(&temp_COMM, &ilower, &iupper, &b); HYPRE_IJVectorSetObjectType(&b, &hypre_type); HYPRE_IJVectorInitialize(&b); #else HYPRE_IJVectorCreate(hypre_MPI_COMM_WORLD, ilower, iupper,&b); HYPRE_IJVectorSetObjectType(b, HYPRE_PARCSR); HYPRE_IJVectorInitialize(b); #endif #ifdef HYPRE_FORTRAN HYPRE_IJVectorCreate(&temp_COMM, &ilower, &iupper, &x); HYPRE_IJVectorSetObjectType(&x, &hypre_type); HYPRE_IJVectorInitialize(&x); #else HYPRE_IJVectorCreate(hypre_MPI_COMM_WORLD, ilower, iupper,&x); HYPRE_IJVectorSetObjectType(x, HYPRE_PARCSR); HYPRE_IJVectorInitialize(x); #endif /* Set the rhs values to h^2 and the solution to zero */ { double *rhs_values, *x_values; HYPRE_Int *rows; rhs_values = calloc(local_size, sizeof(double)); x_values = calloc(local_size, sizeof(double)); rows = calloc(local_size, sizeof(HYPRE_Int)); for (i=0; i<local_size; i++) { rhs_values[i] = h2; x_values[i] = 0.0; rows[i] = ilower + i; } #ifdef HYPRE_FORTRAN HYPRE_IJVectorSetValues(&b, &local_size, &rows[0], &rhs_values[0]); HYPRE_IJVectorSetValues(&x, &local_size, &rows[0], &x_values[0]); #else HYPRE_IJVectorSetValues(b, local_size, rows, rhs_values); HYPRE_IJVectorSetValues(x, local_size, rows, x_values); #endif free(x_values); free(rhs_values); free(rows); } #ifdef HYPRE_FORTRAN HYPRE_IJVectorAssemble(&b); HYPRE_IJVectorGetObject(&b, &par_b); #else HYPRE_IJVectorAssemble(b); HYPRE_IJVectorGetObject(b, (void **) &par_b); #endif #ifdef HYPRE_FORTRAN HYPRE_IJVectorAssemble(&x); HYPRE_IJVectorGetObject(&x, &par_x); #else HYPRE_IJVectorAssemble(x); HYPRE_IJVectorGetObject(x, (void **) &par_x); #endif /* Choose a solver and solve the system */ /* AMG */ if (solver_id == 0) { HYPRE_Int num_iterations; double final_res_norm; /* Create solver */ #ifdef HYPRE_FORTRAN HYPRE_BoomerAMGCreate(&solver); #else HYPRE_BoomerAMGCreate(&solver); #endif /* Set some parameters (See Reference Manual for more parameters) */ #ifdef HYPRE_FORTRAN HYPRE_BoomerAMGSetPrintLevel(&solver, &three); /* print solve info + parameters */ HYPRE_BoomerAMGSetCoarsenType(&solver, &six); /* Falgout coarsening */ HYPRE_BoomerAMGSetRelaxType(&solver, &three); /* G-S/Jacobi hybrid relaxation */ HYPRE_BoomerAMGSetNumSweeps(&solver, &one); /* Sweeeps on each level */ HYPRE_BoomerAMGSetMaxLevels(&solver, &twenty); /* maximum number of levels */ HYPRE_BoomerAMGSetTol(&solver, &tol); /* conv. tolerance */ #else HYPRE_BoomerAMGSetPrintLevel(solver, 3); /* print solve info + parameters */ HYPRE_BoomerAMGSetCoarsenType(solver, 6); /* Falgout coarsening */ HYPRE_BoomerAMGSetRelaxType(solver, 3); /* G-S/Jacobi hybrid relaxation */ HYPRE_BoomerAMGSetNumSweeps(solver, 1); /* Sweeeps on each level */ HYPRE_BoomerAMGSetMaxLevels(solver, 20); /* maximum number of levels */ HYPRE_BoomerAMGSetTol(solver, 1e-7); /* conv. tolerance */ #endif /* Now setup and solve! */ #ifdef HYPRE_FORTRAN HYPRE_BoomerAMGSetup(&solver, &parcsr_A, &par_b, &par_x); HYPRE_BoomerAMGSolve(&solver, &parcsr_A, &par_b, &par_x); #else HYPRE_BoomerAMGSetup(solver, parcsr_A, par_b, par_x); HYPRE_BoomerAMGSolve(solver, parcsr_A, par_b, par_x); #endif /* Run info - needed logging turned on */ #ifdef HYPRE_FORTRAN HYPRE_BoomerAMGGetNumIterations(&solver, &num_iterations); HYPRE_BoomerAMGGetFinalRelativeResidualNorm(&solver, &final_res_norm); #else HYPRE_BoomerAMGGetNumIterations(solver, &num_iterations); HYPRE_BoomerAMGGetFinalRelativeResidualNorm(solver, &final_res_norm); #endif if (myid == 0) { hypre_printf("\n"); hypre_printf("Iterations = %d\n", num_iterations); hypre_printf("Final Relative Residual Norm = %e\n", final_res_norm); hypre_printf("\n"); } /* Destroy solver */ #ifdef HYPRE_FORTRAN HYPRE_BoomerAMGDestroy(&solver); #else HYPRE_BoomerAMGDestroy(solver); #endif } /* PCG */ else if (solver_id == 50) { HYPRE_Int num_iterations; double final_res_norm; /* Create solver */ #ifdef HYPRE_FORTRAN HYPRE_ParCSRPCGCreate(&temp_COMM, &solver); #else HYPRE_ParCSRPCGCreate(hypre_MPI_COMM_WORLD, &solver); #endif /* Set some parameters (See Reference Manual for more parameters) */ #ifdef HYPRE_FORTRAN HYPRE_ParCSRPCGSetMaxIter(&solver, &thousand); /* max iterations */ HYPRE_ParCSRPCGSetTol(&solver, &tol); /* conv. tolerance */ HYPRE_ParCSRPCGSetTwoNorm(&solver, &one); /* use the two norm as the stopping criteria */ HYPRE_ParCSRPCGSetPrintLevel(&solver, &two); /* prints out the iteration info */ #else HYPRE_PCGSetMaxIter(solver, 1000); /* max iterations */ HYPRE_PCGSetTol(solver, 1e-7); /* conv. tolerance */ HYPRE_PCGSetTwoNorm(solver, 1); /* use the two norm as the stopping criteria */ HYPRE_PCGSetPrintLevel(solver, 2); /* prints out the iteration info */ HYPRE_PCGSetLogging(solver, 1); /* needed to get run info later */ #endif /* Now setup and solve! */ #ifdef HYPRE_FORTRAN HYPRE_ParCSRPCGSetup(&solver, &parcsr_A, &par_b, &par_x); HYPRE_ParCSRPCGSolve(&solver, &parcsr_A, &par_b, &par_x); #else HYPRE_ParCSRPCGSetup(solver, parcsr_A, par_b, par_x); HYPRE_ParCSRPCGSolve(solver, parcsr_A, par_b, par_x); #endif /* Run info - needed logging turned on */ #ifdef HYPRE_FORTRAN HYPRE_ParCSRPCGGetNumIterations(&solver, &num_iterations); HYPRE_ParCSRPCGGetFinalRelativeResidualNorm(&solver, &final_res_norm); #else HYPRE_PCGGetNumIterations(solver, &num_iterations); HYPRE_PCGGetFinalRelativeResidualNorm(solver, &final_res_norm); #endif if (myid == 0) { hypre_printf("\n"); hypre_printf("Iterations = %d\n", num_iterations); hypre_printf("Final Relative Residual Norm = %e\n", final_res_norm); hypre_printf("\n"); } /* Destroy solver */ #ifdef HYPRE_FORTRAN HYPRE_ParCSRPCGDestroy(&solver); #else HYPRE_ParCSRPCGDestroy(solver); #endif } /* PCG with AMG preconditioner */ else if (solver_id == 1) { HYPRE_Int num_iterations; double final_res_norm; /* Create solver */ #ifdef HYPRE_FORTRAN HYPRE_ParCSRPCGCreate(&temp_COMM, &solver); #else HYPRE_ParCSRPCGCreate(hypre_MPI_COMM_WORLD, &solver); #endif /* Set some parameters (See Reference Manual for more parameters) */ #ifdef HYPRE_FORTRAN HYPRE_ParCSRPCGSetMaxIter(&solver, &thousand); /* max iterations */ HYPRE_ParCSRPCGSetTol(&solver, &tol); /* conv. tolerance */ HYPRE_ParCSRPCGSetTwoNorm(&solver, &one); /* use the two norm as the stopping criteria */ HYPRE_ParCSRPCGSetPrintLevel(&solver, &two); /* print solve info */ #else HYPRE_PCGSetMaxIter(solver, 1000); /* max iterations */ HYPRE_PCGSetTol(solver, 1e-7); /* conv. tolerance */ HYPRE_PCGSetTwoNorm(solver, 1); /* use the two norm as the stopping criteria */ HYPRE_PCGSetPrintLevel(solver, 2); /* print solve info */ HYPRE_PCGSetLogging(solver, 1); /* needed to get run info later */ #endif /* Now set up the AMG preconditioner and specify any parameters */ #ifdef HYPRE_FORTRAN HYPRE_BoomerAMGCreate(&precond); HYPRE_BoomerAMGSetPrintLevel(&precond, &one); /* print amg solution info*/ HYPRE_BoomerAMGSetCoarsenType(&precond, &six); HYPRE_BoomerAMGSetRelaxType(&precond, &three); HYPRE_BoomerAMGSetNumSweeps(&precond, &one); HYPRE_BoomerAMGSetTol(&precond, &oo1); #else HYPRE_BoomerAMGCreate(&precond); HYPRE_BoomerAMGSetPrintLevel(precond, 1); /* print amg solution info*/ HYPRE_BoomerAMGSetCoarsenType(precond, 6); HYPRE_BoomerAMGSetRelaxType(precond, 3); HYPRE_BoomerAMGSetNumSweeps(precond, 1); HYPRE_BoomerAMGSetTol(precond, 1e-3); #endif /* Set the PCG preconditioner */ #ifdef HYPRE_FORTRAN precond_id = 2; HYPRE_ParCSRPCGSetPrecond(&solver, &precond_id, &precond); #else HYPRE_PCGSetPrecond(solver, (HYPRE_PtrToSolverFcn) HYPRE_BoomerAMGSolve, (HYPRE_PtrToSolverFcn) HYPRE_BoomerAMGSetup, precond); #endif /* Now setup and solve! */ #ifdef HYPRE_FORTRAN HYPRE_ParCSRPCGSetup(&solver, &parcsr_A, &par_b, &par_x); HYPRE_ParCSRPCGSolve(&solver, &parcsr_A, &par_b, &par_x); #else HYPRE_ParCSRPCGSetup(solver, parcsr_A, par_b, par_x); HYPRE_ParCSRPCGSolve(solver, parcsr_A, par_b, par_x); #endif /* Run info - needed logging turned on */ #ifdef HYPRE_FORTRAN HYPRE_ParCSRPCGGetNumIterations(&solver, &num_iterations); HYPRE_ParCSRPCGGetFinalRelativeResidualNorm(&solver, &final_res_norm); #else HYPRE_PCGGetNumIterations(solver, &num_iterations); HYPRE_PCGGetFinalRelativeResidualNorm(solver, &final_res_norm); #endif if (myid == 0) { hypre_printf("\n"); hypre_printf("Iterations = %d\n", num_iterations); hypre_printf("Final Relative Residual Norm = %e\n", final_res_norm); hypre_printf("\n"); } /* Destroy solver and preconditioner */ #ifdef HYPRE_FORTRAN HYPRE_ParCSRPCGDestroy(&solver); HYPRE_BoomerAMGDestroy(&precond); #else HYPRE_ParCSRPCGDestroy(solver); HYPRE_BoomerAMGDestroy(precond); #endif } /* PCG with Parasails Preconditioner */ else if (solver_id == 8) { HYPRE_Int num_iterations; double final_res_norm; HYPRE_Int sai_max_levels = 1; double sai_threshold = 0.1; double sai_filter = 0.05; HYPRE_Int sai_sym = 1; /* Create solver */ #ifdef HYPRE_FORTRAN HYPRE_ParCSRPCGCreate(&temp_COMM, &solver); #else HYPRE_ParCSRPCGCreate(hypre_MPI_COMM_WORLD, &solver); #endif /* Set some parameters (See Reference Manual for more parameters) */ #ifdef HYPRE_FORTRAN HYPRE_ParCSRPCGSetMaxIter(&solver, &thousand); /* max iterations */ HYPRE_ParCSRPCGSetTol(&solver, &tol); /* conv. tolerance */ HYPRE_ParCSRPCGSetTwoNorm(&solver, &one); /* use the two norm as the stopping criteria */ HYPRE_ParCSRPCGSetPrintLevel(&solver, &two); /* print solve info */ #else HYPRE_PCGSetMaxIter(solver, 1000); /* max iterations */ HYPRE_PCGSetTol(solver, 1e-7); /* conv. tolerance */ HYPRE_PCGSetTwoNorm(solver, 1); /* use the two norm as the stopping criteria */ HYPRE_PCGSetPrintLevel(solver, 2); /* print solve info */ HYPRE_PCGSetLogging(solver, 1); /* needed to get run info later */ #endif /* Now set up the ParaSails preconditioner and specify any parameters */ #ifdef HYPRE_FORTRAN HYPRE_ParaSailsCreate(&temp_COMM, &precond); #else HYPRE_ParaSailsCreate(hypre_MPI_COMM_WORLD, &precond); #endif /* Set some parameters (See Reference Manual for more parameters) */ #ifdef HYPRE_FORTRAN HYPRE_ParaSailsSetParams(&precond, &sai_threshold, &sai_max_levels); HYPRE_ParaSailsSetFilter(&precond, &sai_filter); HYPRE_ParaSailsSetSym(&precond, &sai_sym); HYPRE_ParaSailsSetLogging(&precond, &three); #else HYPRE_ParaSailsSetParams(precond, sai_threshold, sai_max_levels); HYPRE_ParaSailsSetFilter(precond, sai_filter); HYPRE_ParaSailsSetSym(precond, sai_sym); HYPRE_ParaSailsSetLogging(precond, 3); #endif /* Set the PCG preconditioner */ #ifdef HYPRE_FORTRAN precond_id = 4; HYPRE_ParCSRPCGSetPrecond(&solver, &precond_id, &precond); #else HYPRE_PCGSetPrecond(solver, (HYPRE_PtrToSolverFcn) HYPRE_ParaSailsSolve, (HYPRE_PtrToSolverFcn) HYPRE_ParaSailsSetup, precond); #endif /* Now setup and solve! */ #ifdef HYPRE_FORTRAN HYPRE_ParCSRPCGSetup(&solver, &parcsr_A, &par_b, &par_x); HYPRE_ParCSRPCGSolve(&solver, &parcsr_A, &par_b, &par_x); #else HYPRE_ParCSRPCGSetup(solver, parcsr_A, par_b, par_x); HYPRE_ParCSRPCGSolve(solver, parcsr_A, par_b, par_x); #endif /* Run info - needed logging turned on */ #ifdef HYPRE_FORTRAN HYPRE_ParCSRPCGGetNumIterations(&solver, &num_iterations); HYPRE_ParCSRPCGGetFinalRelativeResidualNorm(&solver, &final_res_norm); #else HYPRE_PCGGetNumIterations(solver, &num_iterations); HYPRE_PCGGetFinalRelativeResidualNorm(solver, &final_res_norm); #endif if (myid == 0) { hypre_printf("\n"); hypre_printf("Iterations = %d\n", num_iterations); hypre_printf("Final Relative Residual Norm = %e\n", final_res_norm); hypre_printf("\n"); } /* Destory solver and preconditioner */ #ifdef HYPRE_FORTRAN HYPRE_ParCSRPCGDestroy(&solver); HYPRE_ParaSailsDestroy(&precond); #else HYPRE_ParCSRPCGDestroy(solver); HYPRE_ParaSailsDestroy(precond); #endif } else { if (myid ==0) hypre_printf("Invalid solver id specified.\n"); } /* Print the solution */ #ifdef HYPRE_FORTRAN if (print_solution) HYPRE_IJVectorPrint(&x, "ij.out.x"); #else if (print_solution) HYPRE_IJVectorPrint(x, "ij.out.x"); #endif /* Clean up */ #ifdef HYPRE_FORTRAN HYPRE_IJMatrixDestroy(&A); HYPRE_IJVectorDestroy(&b); HYPRE_IJVectorDestroy(&x); #else HYPRE_IJMatrixDestroy(A); HYPRE_IJVectorDestroy(b); HYPRE_IJVectorDestroy(x); #endif /* Finalize MPI*/ hypre_MPI_Finalize(); return(0); }
int main(int argc, char *argv[]) { GRID *g; DOF *u_h; MAT *A, *A0, *B; MAP *map; INT i; size_t nnz, mem, mem_peak; VEC *x, *y0, *y1, *y2; double t0, t1, dnz, dnz1, mflops, mop; char *fn = "../test/cube.dat"; FLOAT mem_max = 300; INT refine = 0; phgOptionsRegisterFilename("-mesh_file", "Mesh file", (char **)&fn); phgOptionsRegisterInt("-loop_count", "Loop count", &loop_count); phgOptionsRegisterInt("-refine", "Refinement level", &refine); phgOptionsRegisterFloat("-mem_max", "Maximum memory", &mem_max); phgInit(&argc, &argv); g = phgNewGrid(-1); if (!phgImport(g, fn, FALSE)) phgError(1, "can't read file \"%s\".\n", fn); phgRefineAllElements(g, refine); u_h = phgDofNew(g, DOF_DEFAULT, 1, "u_h", DofNoAction); while (TRUE) { phgPrintf("\n"); if (phgBalanceGrid(g, 1.2, 1, NULL, 0.)) phgPrintf("Repartition mesh, %d submeshes, load imbalance: %lg\n", g->nprocs, (double)g->lif); map = phgMapCreate(u_h, NULL); A = phgMapCreateMat(map, map); A->handle_bdry_eqns = TRUE; build_matrix(A, u_h); phgMatAssemble(A); /* Note: A is unsymmetric (A' != A) if boundary entries not removed */ phgMatRemoveBoundaryEntries(A); #if 0 /* test block matrix operation */ A0 = phgMatCreateBlockMatrix(g->comm, 1, 1, &A, NULL); #else A0 = A; #endif phgPrintf("%d DOF, %d elems, %d submeshes, matrix size: %d, LIF: %lg\n", DofGetDataCountGlobal(u_h), g->nleaf_global, g->nprocs, A->rmap->nglobal, (double)g->lif); /* test PHG mat-vec multiply */ x = phgMapCreateVec(A->cmap, 1); y1 = phgMapCreateVec(A->rmap, 1); phgVecRandomize(x, 123); phgMatVec(MAT_OP_N, 1.0, A0, x, 0.0, &y1); phgPerfGetMflops(g, NULL, NULL); /* reset flops counter */ t0 = phgGetTime(NULL); for (i = 0; i < loop_count; i++) { phgMatVec(MAT_OP_N, 1.0, A0, x, 0.0, &y1); } t1 = phgGetTime(NULL); mflops = phgPerfGetMflops(g, NULL, NULL); y0 = phgVecCopy(y1, NULL); nnz = A->nnz_d + A->nnz_o; #if USE_MPI dnz1 = nnz; MPI_Reduce(&dnz1, &dnz, 1, MPI_DOUBLE, MPI_SUM, 0, g->comm); #else dnz = nnz; #endif mop = loop_count * (dnz + dnz - A->rmap->nlocal) * 1e-6; phgPrintf("\n"); t1 -= t0; phgPrintf(" PHG: time %0.4lf, nnz %0.16lg, %0.2lfMF (%0.2lfMF)\n", t1, dnz, mop / (t1 == 0 ? 1. : t1), mflops); /* test trans(A)*x */ phgPerfGetMflops(g, NULL, NULL); /* reset flops counter */ t0 = phgGetTime(NULL); for (i = 0; i < loop_count; i++) { phgMatVec(MAT_OP_T, 1.0, A0, x, 0.0, &y1); } t1 = phgGetTime(NULL); mflops = phgPerfGetMflops(g, NULL, NULL); t1 -= t0; phgPrintf(" A'*x: time %0.4lf, nnz %0.16lg, %0.2lfMF (%0.2lfMF), " "err: %le\n", t1, dnz, mop / (t1 == 0 ? 1. : t1), mflops, (double)phgVecNorm2(phgVecAXPBY(-1.0, y0, 1.0, &y1), 0, NULL)); /* time A * trans(A) */ phgPerfGetMflops(g, NULL, NULL); /* reset flops counter */ t0 = phgGetTime(NULL); B = phgMatMat(MAT_OP_N, MAT_OP_N, 1.0, A, A, 0.0, NULL); t1 = phgGetTime(NULL); mflops = phgPerfGetMflops(g, NULL, NULL); nnz = B->nnz_d + B->nnz_o; #if USE_MPI dnz1 = nnz; MPI_Reduce(&dnz1, &dnz, 1, MPI_DOUBLE, MPI_SUM, 0, g->comm); #else dnz = nnz; #endif /* compare B*x <--> A*A*x */ y2 = phgMatVec(MAT_OP_N, 1.0, B, x, 0.0, NULL); phgMatVec(MAT_OP_N, 1.0, A0, y0, 0.0, &y1); phgMatDestroy(&B); t1 -= t0; phgPrintf(" A*A: time %0.4lf, nnz %0.16lg, %0.2lfMF, err: %le\n", t1, dnz, mflops, (double)phgVecNorm2(phgVecAXPBY(-1.0, y1, 1.0, &y2), 0, NULL)); #if USE_PETSC { Mat ma, mb; MatInfo info; Vec va, vb, vc; PetscScalar *vec; ma = phgPetscCreateMatAIJ(A); MatGetVecs(ma, PETSC_NULL, &va); VecDuplicate(va, &vb); VecGetArray(va, &vec); memcpy(vec, x->data, x->map->nlocal * sizeof(*vec)); VecRestoreArray(va, &vec); MatMult(ma, va, vb); phgPerfGetMflops(g, NULL, NULL); /* reset flops counter */ t0 = phgGetTime(NULL); for (i = 0; i < loop_count; i++) { MatMult(ma, va, vb); } t1 = phgGetTime(NULL); mflops = phgPerfGetMflops(g, NULL, NULL); VecGetArray(vb, &vec); memcpy(y1->data, vec, x->map->nlocal * sizeof(*vec)); VecRestoreArray(vb, &vec); MatGetInfo(ma, MAT_GLOBAL_SUM, &info); /*phgPrintf(" --------------------------------------------" "-------------------------\n");*/ phgPrintf("\n"); t1 -= t0; dnz = info.nz_used; phgPrintf(" PETSc: time %0.4lf, nnz %0.16lg, %0.2lfMF (%0.2lfMF), " "err: %le\n", t1, dnz, mop / (t1==0 ? 1.:t1), mflops, (double)phgVecNorm2(phgVecAXPBY(-1.0, y0, 1.0, &y1), 0, NULL)); phgPerfGetMflops(g, NULL, NULL); /* reset flops counter */ t0 = phgGetTime(NULL); for (i = 0; i < loop_count; i++) { MatMultTranspose(ma, va, vb); } t1 = phgGetTime(NULL); mflops = phgPerfGetMflops(g, NULL, NULL); VecGetArray(vb, &vec); memcpy(y1->data, vec, x->map->nlocal * sizeof(*vec)); VecRestoreArray(vb, &vec); t1 -= t0; phgPrintf(" A'*x: time %0.4lf, nnz %0.16lg, %0.2lfMF (%0.2lfMF), " "err: %le\n", t1, dnz, mop / (t1==0 ? 1.:t1), mflops, (double)phgVecNorm2(phgVecAXPBY(-1.0, y0, 1.0, &y1), 0, NULL)); phgPerfGetMflops(g, NULL, NULL); /* reset flops counter */ t0 = phgGetTime(NULL); MatMatMult(ma, ma, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &mb); t1 = phgGetTime(NULL); mflops = phgPerfGetMflops(g, NULL, NULL); t1 -= t0; MatGetInfo(mb, MAT_GLOBAL_SUM, &info); dnz = info.nz_used; VecDuplicate(va, &vc); /* compare B*x <--> A*A*x */ MatMult(ma, vb, vc); MatMult(mb, va, vb); VecGetArray(vb, &vec); memcpy(y1->data, vec, x->map->nlocal * sizeof(*vec)); VecRestoreArray(vb, &vec); VecGetArray(vc, &vec); memcpy(y2->data, vec, x->map->nlocal * sizeof(*vec)); VecRestoreArray(vc, &vec); phgPrintf(" A*A: time %0.4lf, nnz %0.16lg, %0.2lfMF, err: %le\n", t1, dnz, mflops, (double)phgVecNorm2(phgVecAXPBY(-1.0, y1, 1.0, &y2), 0, NULL)); phgPetscMatDestroy(&mb); phgPetscMatDestroy(&ma); phgPetscVecDestroy(&va); phgPetscVecDestroy(&vb); phgPetscVecDestroy(&vc); } #endif /* USE_PETSC */ #if USE_HYPRE { HYPRE_IJMatrix ma; HYPRE_IJVector va, vb, vc; HYPRE_ParCSRMatrix par_ma; hypre_ParCSRMatrix *par_mb; HYPRE_ParVector par_va, par_vb, par_vc; HYPRE_Int offset, *ni, start, end; assert(sizeof(INT)==sizeof(int) && sizeof(FLOAT)==sizeof(double)); setup_hypre_mat(A, &ma); ni = phgAlloc(2 * A->rmap->nlocal * sizeof(*ni)); offset = A->cmap->partition[A->cmap->rank]; for (i = 0; i < A->rmap->nlocal; i++) ni[i] = i + offset; HYPRE_IJVectorCreate(g->comm, offset, offset + A->rmap->nlocal - 1, &va); HYPRE_IJVectorCreate(g->comm, offset, offset + A->rmap->nlocal - 1, &vb); HYPRE_IJVectorCreate(g->comm, offset, offset + A->rmap->nlocal - 1, &vc); HYPRE_IJVectorSetObjectType(va, HYPRE_PARCSR); HYPRE_IJVectorSetObjectType(vb, HYPRE_PARCSR); HYPRE_IJVectorSetObjectType(vc, HYPRE_PARCSR); HYPRE_IJVectorSetMaxOffProcElmts(va, 0); HYPRE_IJVectorSetMaxOffProcElmts(vb, 0); HYPRE_IJVectorSetMaxOffProcElmts(vc, 0); HYPRE_IJVectorInitialize(va); HYPRE_IJVectorInitialize(vb); HYPRE_IJVectorInitialize(vc); HYPRE_IJMatrixGetObject(ma, (void **)(void *)&par_ma); HYPRE_IJVectorGetObject(va, (void **)(void *)&par_va); HYPRE_IJVectorGetObject(vb, (void **)(void *)&par_vb); HYPRE_IJVectorGetObject(vc, (void **)(void *)&par_vc); HYPRE_IJVectorSetValues(va, A->cmap->nlocal, ni, (double *)x->data); HYPRE_IJVectorAssemble(va); HYPRE_IJVectorAssemble(vb); HYPRE_IJVectorAssemble(vc); HYPRE_IJMatrixGetRowCounts(ma, A->cmap->nlocal, ni, ni + A->rmap->nlocal); for (i = 0, nnz = 0; i < A->rmap->nlocal; i++) nnz += ni[A->rmap->nlocal + i]; #if USE_MPI dnz1 = nnz; MPI_Reduce(&dnz1, &dnz, 1, MPI_DOUBLE, MPI_SUM, 0, g->comm); #else dnz = nnz; #endif HYPRE_ParCSRMatrixMatvec(1.0, par_ma, par_va, 0.0, par_vb); phgPerfGetMflops(g, NULL, NULL); /* reset flops counter */ t0 = phgGetTime(NULL); for (i = 0; i < loop_count; i++) { HYPRE_ParCSRMatrixMatvec(1.0, par_ma, par_va, 0.0, par_vb); } t1 = phgGetTime(NULL); mflops = phgPerfGetMflops(g, NULL, NULL); HYPRE_IJVectorGetValues(vb, A->rmap->nlocal, ni, (double*)y1->data); /*phgPrintf(" --------------------------------------------" "-------------------------\n");*/ phgPrintf("\n"); t1 -= t0; phgPrintf(" HYPRE: time %0.4lf, nnz %0.16lg, %0.2lfMF (%0.2lfMF), " "err: %le\n", t1, dnz, mop / (t1==0 ? 1.:t1), mflops, (double)phgVecNorm2(phgVecAXPBY(-1.0, y0, 1.0, &y1), 0, NULL)); phgPerfGetMflops(g, NULL, NULL); /* reset flops counter */ t0 = phgGetTime(NULL); for (i = 0; i < loop_count; i++) { HYPRE_ParCSRMatrixMatvecT(1.0, par_ma, par_va, 0.0, par_vb); } t1 = phgGetTime(NULL); mflops = phgPerfGetMflops(g, NULL, NULL); HYPRE_IJVectorGetValues(vb, A->rmap->nlocal, ni, (double*)y1->data); t1 -= t0; phgPrintf(" A'*x: time %0.4lf, nnz %0.16lg, %0.2lfMF (%0.2lfMF), " "err: %le\n", t1, dnz, mop / (t1==0 ? 1.:t1), mflops, (double)phgVecNorm2(phgVecAXPBY(-1.0, y0, 1.0, &y1), 0, NULL)); phgPerfGetMflops(g, NULL, NULL); /* reset flops counter */ t0 = phgGetTime(NULL); /* Note: 'HYPRE_ParCSRMatrix' is currently typedef'ed to * 'hypre_ParCSRMatrix *' */ par_mb = hypre_ParMatmul((hypre_ParCSRMatrix *)par_ma, (hypre_ParCSRMatrix *)par_ma); t1 = phgGetTime(NULL); mflops = phgPerfGetMflops(g, NULL, NULL); start = hypre_ParCSRMatrixFirstRowIndex(par_mb); end = hypre_ParCSRMatrixLastRowIndex(par_mb) + 1; for (i = start, nnz = 0; i < end; i++) { HYPRE_Int ncols; hypre_ParCSRMatrixGetRow(par_mb, i, &ncols, NULL, NULL); hypre_ParCSRMatrixRestoreRow(par_mb, i, &ncols, NULL, NULL); nnz += ncols; } #if USE_MPI dnz1 = nnz; MPI_Reduce(&dnz1, &dnz, 1, MPI_DOUBLE, MPI_SUM, 0, g->comm); #else dnz = nnz; #endif /* compare B*x <--> A*A*x */ HYPRE_ParCSRMatrixMatvec(1.0, par_ma, par_vb, 0.0, par_vc); HYPRE_ParCSRMatrixMatvec(1.0, (void *)par_mb, par_va, 0.0, par_vb); HYPRE_IJVectorGetValues(vb, A->rmap->nlocal, ni, (double*)y1->data); HYPRE_IJVectorGetValues(vc, A->rmap->nlocal, ni, (double*)y2->data); hypre_ParCSRMatrixDestroy((par_mb)); t1 -= t0; phgPrintf(" A*A: time %0.4lf, nnz %0.16lg, %0.2lfMF, err: %le\n", t1, dnz, mflops, (double)phgVecNorm2(phgVecAXPBY(-1.0, y1, 1.0, &y2), 0, NULL)); phgFree(ni); HYPRE_IJMatrixDestroy(ma); HYPRE_IJVectorDestroy(va); HYPRE_IJVectorDestroy(vb); HYPRE_IJVectorDestroy(vc); } #endif /* USE_HYPRE */ if (A0 != A) phgMatDestroy(&A0); #if 0 if (A->rmap->nglobal > 1000) { VEC *v = phgMapCreateVec(A->rmap, 3); for (i = 0; i < v->map->nlocal; i++) { v->data[i + 0 * v->map->nlocal] = 1 * (i + v->map->partition[g->rank]); v->data[i + 1 * v->map->nlocal] = 2 * (i + v->map->partition[g->rank]); v->data[i + 2 * v->map->nlocal] = 3 * (i + v->map->partition[g->rank]); } phgMatDumpMATLAB(A, "A", "A.m"); phgVecDumpMATLAB(v, "v", "v.m"); phgFinalize(); exit(0); } #endif phgMatDestroy(&A); phgVecDestroy(&x); phgVecDestroy(&y0); phgVecDestroy(&y1); phgVecDestroy(&y2); phgMapDestroy(&map); mem = phgMemoryUsage(g, &mem_peak); dnz = mem / (1024.0 * 1024.0); dnz1 = mem_peak / (1024.0 * 1024.0); /*phgPrintf(" --------------------------------------------" "-------------------------\n");*/ phgPrintf("\n"); phgPrintf(" Memory: current %0.4lgMB, peak %0.4lgMB\n", dnz, dnz1); #if 0 { static int loop_count = 0; if (++loop_count == 4) break; } #endif if (mem_peak > 1024 * (size_t)1024 * mem_max) break; phgRefineAllElements(g, 1); } phgDofFree(&u_h); phgFreeGrid(&g); phgFinalize(); return 0; }