void HypreSolver2D::set_x0(double *x0) { HYPRE_IJVectorInitialize(hv_x); HYPRE_IJVectorSetValues(hv_x, local_size, &rows[0], x0); HYPRE_IJVectorAssemble(hv_x); HYPRE_IJVectorGetObject(hv_x, (void **) &par_x); }
inline void solve(K* rhs) { hypre_Vector* loc = hypre_ParVectorLocalVector(reinterpret_cast<hypre_ParVector*>(hypre_IJVectorObject(reinterpret_cast<hypre_IJVector*>(_b)))); K* b = loc->data; loc->data = rhs; HYPRE_ParVector par_b; HYPRE_IJVectorGetObject(_b, reinterpret_cast<void**>(&par_b)); HYPRE_ParVector par_x; HYPRE_IJVectorGetObject(_x, reinterpret_cast<void**>(&par_x)); HYPRE_ParCSRMatrix parcsr_A; HYPRE_IJMatrixGetObject(_A, reinterpret_cast<void**>(&parcsr_A)); if(_strategy == 1) /* AMG */ HYPRE_BoomerAMGSolve(_solver, parcsr_A, par_b, par_x); else if(_strategy == 2) /* PCG with AMG */ HYPRE_ParCSRPCGSolve(_solver, parcsr_A, par_b, par_x); else /* GMRES with AMG */ HYPRE_ParCSRFlexGMRESSolve(_solver, parcsr_A, par_b, par_x); loc->data = b; loc = hypre_ParVectorLocalVector(reinterpret_cast<hypre_ParVector*>(hypre_IJVectorObject(reinterpret_cast<hypre_IJVector*>(_x)))); std::copy(loc->data, loc->data + _local, rhs); }
void HypreSolver2D::set_rhs(double *rhs) { for(int i=0; i<local_size; i++) rhs[i] = rhs[i]*h2; HYPRE_IJVectorInitialize(hv_b); HYPRE_IJVectorSetValues(hv_b, local_size, &rows[0], rhs); HYPRE_IJVectorAssemble(hv_b); HYPRE_IJVectorGetObject(hv_b, (void **) &par_b); }
HYPRE_Int hypre_BlockTridiagSetup(void *data, hypre_ParCSRMatrix *A, hypre_ParVector *b, hypre_ParVector *x) { HYPRE_Int i, j, *index_set1, print_level, nsweeps, relax_type; HYPRE_Int nrows, nrows1, nrows2, start1, start2, *index_set2; HYPRE_Int count, ierr; double threshold; hypre_ParCSRMatrix **submatrices; HYPRE_Solver precon1; HYPRE_Solver precon2; HYPRE_IJVector ij_u1, ij_u2, ij_f1, ij_f2; hypre_ParVector *vector; MPI_Comm comm; hypre_BlockTridiagData *b_data = (hypre_BlockTridiagData *) data; HYPRE_ParCSRMatrixGetComm((HYPRE_ParCSRMatrix) A, &comm); index_set1 = b_data->index_set1; nrows1 = index_set1[0]; nrows = hypre_ParCSRMatrixNumRows(A); nrows2 = nrows - nrows1; b_data->index_set2 = hypre_CTAlloc(HYPRE_Int, nrows2+1); index_set2 = b_data->index_set2; index_set2[0] = nrows2; count = 1; for (i = 0; i < index_set1[1]; i++) index_set2[count++] = i; for (i = 1; i < nrows1; i++) for (j = index_set1[i]+1; j < index_set1[i+1]; j++) index_set2[count++] = j; for (i = index_set1[nrows1]+1; i < nrows; i++) index_set2[count++] = i; submatrices = hypre_CTAlloc(hypre_ParCSRMatrix *, 4); hypre_ParCSRMatrixExtractSubmatrices(A, index_set1, &submatrices); nrows1 = hypre_ParCSRMatrixNumRows(submatrices[0]); nrows2 = hypre_ParCSRMatrixNumRows(submatrices[3]); start1 = hypre_ParCSRMatrixFirstRowIndex(submatrices[0]); start2 = hypre_ParCSRMatrixFirstRowIndex(submatrices[3]); HYPRE_IJVectorCreate(comm, start1, start1+nrows1-1, &ij_u1); HYPRE_IJVectorSetObjectType(ij_u1, HYPRE_PARCSR); ierr = HYPRE_IJVectorInitialize(ij_u1); ierr += HYPRE_IJVectorAssemble(ij_u1); hypre_assert(!ierr); HYPRE_IJVectorCreate(comm, start1, start1+nrows1-1, &ij_f1); HYPRE_IJVectorSetObjectType(ij_f1, HYPRE_PARCSR); ierr = HYPRE_IJVectorInitialize(ij_f1); ierr += HYPRE_IJVectorAssemble(ij_f1); hypre_assert(!ierr); HYPRE_IJVectorCreate(comm, start2, start2+nrows2-1, &ij_u2); HYPRE_IJVectorSetObjectType(ij_u2, HYPRE_PARCSR); ierr = HYPRE_IJVectorInitialize(ij_u2); ierr += HYPRE_IJVectorAssemble(ij_u2); hypre_assert(!ierr); HYPRE_IJVectorCreate(comm, start2, start2+nrows1-1, &ij_f2); HYPRE_IJVectorSetObjectType(ij_f2, HYPRE_PARCSR); ierr = HYPRE_IJVectorInitialize(ij_f2); ierr += HYPRE_IJVectorAssemble(ij_f2); hypre_assert(!ierr); HYPRE_IJVectorGetObject(ij_f1, (void **) &vector); b_data->F1 = vector; HYPRE_IJVectorGetObject(ij_u1, (void **) &vector); b_data->U1 = vector; HYPRE_IJVectorGetObject(ij_f2, (void **) &vector); b_data->F2 = vector; HYPRE_IJVectorGetObject(ij_u2, (void **) &vector); b_data->U2 = vector; print_level = b_data->print_level; threshold = b_data->threshold; nsweeps = b_data->num_sweeps; relax_type = b_data->relax_type; threshold = b_data->threshold; HYPRE_BoomerAMGCreate(&precon1); HYPRE_BoomerAMGSetMaxIter(precon1, 1); HYPRE_BoomerAMGSetCycleType(precon1, 1); HYPRE_BoomerAMGSetPrintLevel(precon1, print_level); HYPRE_BoomerAMGSetMaxLevels(precon1, 25); HYPRE_BoomerAMGSetMeasureType(precon1, 0); HYPRE_BoomerAMGSetCoarsenType(precon1, 0); HYPRE_BoomerAMGSetStrongThreshold(precon1, threshold); HYPRE_BoomerAMGSetNumFunctions(precon1, 1); HYPRE_BoomerAMGSetNumSweeps(precon1, nsweeps); HYPRE_BoomerAMGSetRelaxType(precon1, relax_type); hypre_BoomerAMGSetup(precon1, submatrices[0], b_data->U1, b_data->F1); HYPRE_BoomerAMGCreate(&precon2); HYPRE_BoomerAMGSetMaxIter(precon2, 1); HYPRE_BoomerAMGSetCycleType(precon2, 1); HYPRE_BoomerAMGSetPrintLevel(precon2, print_level); HYPRE_BoomerAMGSetMaxLevels(precon2, 25); HYPRE_BoomerAMGSetMeasureType(precon2, 0); HYPRE_BoomerAMGSetCoarsenType(precon2, 0); HYPRE_BoomerAMGSetMeasureType(precon2, 1); HYPRE_BoomerAMGSetStrongThreshold(precon2, threshold); HYPRE_BoomerAMGSetNumFunctions(precon2, 1); HYPRE_BoomerAMGSetNumSweeps(precon2, nsweeps); HYPRE_BoomerAMGSetRelaxType(precon2, relax_type); hypre_BoomerAMGSetup(precon2, submatrices[3], NULL, NULL); b_data->precon1 = precon1; b_data->precon2 = precon2; b_data->A11 = submatrices[0]; hypre_ParCSRMatrixDestroy(submatrices[1]); b_data->A21 = submatrices[2]; b_data->A22 = submatrices[3]; hypre_TFree(submatrices); return (0); }
//======================================================= EpetraExt_HypreIJMatrix::EpetraExt_HypreIJMatrix(HYPRE_IJMatrix matrix) : Epetra_BasicRowMatrix(Epetra_MpiComm(hypre_IJMatrixComm(matrix))), Matrix_(matrix), ParMatrix_(0), NumMyRows_(-1), NumGlobalRows_(-1), NumGlobalCols_(-1), MyRowStart_(-1), MyRowEnd_(-1), MatType_(-1), TransposeSolve_(false), SolveOrPrec_(Solver) { IsSolverSetup_ = new bool[1]; IsPrecondSetup_ = new bool[1]; IsSolverSetup_[0] = false; IsPrecondSetup_[0] = false; // Initialize default values for global variables int ierr = 0; ierr += InitializeDefaults(); TEUCHOS_TEST_FOR_EXCEPTION(ierr != 0, std::logic_error, "Couldn't initialize default values."); // Create array of global row ids Teuchos::Array<int> GlobalRowIDs; GlobalRowIDs.resize(NumMyRows_); for (int i = MyRowStart_; i <= MyRowEnd_; i++) { GlobalRowIDs[i-MyRowStart_] = i; } // Create array of global column ids int new_value = 0; int entries = 0; std::set<int> Columns; int num_entries; double *values; int *indices; for(int i = 0; i < NumMyRows_; i++){ ierr += HYPRE_ParCSRMatrixGetRow(ParMatrix_, i+MyRowStart_, &num_entries, &indices, &values); ierr += HYPRE_ParCSRMatrixRestoreRow(ParMatrix_, i+MyRowStart_,&num_entries,&indices,&values); TEUCHOS_TEST_FOR_EXCEPTION(ierr != 0, std::logic_error, "Couldn't get row of matrix."); entries = num_entries; for(int j = 0; j < num_entries; j++){ // Insert column ids from this row into set new_value = indices[j]; Columns.insert(new_value); } } int NumMyCols = Columns.size(); Teuchos::Array<int> GlobalColIDs; GlobalColIDs.resize(NumMyCols); std::set<int>::iterator it; int counter = 0; for (it = Columns.begin(); it != Columns.end(); it++) { // Get column ids in order GlobalColIDs[counter] = *it; counter = counter + 1; } //printf("Proc[%d] Rows from %d to %d, num = %d\n", Comm().MyPID(), MyRowStart_,MyRowEnd_, NumMyRows_); Epetra_Map RowMap(-1, NumMyRows_, &GlobalRowIDs[0], 0, Comm()); Epetra_Map ColMap(-1, NumMyCols, &GlobalColIDs[0], 0, Comm()); //Need to call SetMaps() SetMaps(RowMap, ColMap); // Get an MPI_Comm to create vectors. // The vectors will be reused in Multiply(), so that they aren't recreated every time. MPI_Comm comm; ierr += HYPRE_ParCSRMatrixGetComm(ParMatrix_, &comm); TEUCHOS_TEST_FOR_EXCEPTION(ierr != 0, std::logic_error, "Couldn't get communicator from Hypre Matrix."); ierr += HYPRE_IJVectorCreate(comm, MyRowStart_, MyRowEnd_, &X_hypre); ierr += HYPRE_IJVectorSetObjectType(X_hypre, HYPRE_PARCSR); ierr += HYPRE_IJVectorInitialize(X_hypre); ierr += HYPRE_IJVectorAssemble(X_hypre); ierr += HYPRE_IJVectorGetObject(X_hypre, (void**) &par_x); TEUCHOS_TEST_FOR_EXCEPTION(ierr != 0, std::logic_error, "Couldn't create Hypre X vector."); ierr += HYPRE_IJVectorCreate(comm, MyRowStart_, MyRowEnd_, &Y_hypre); ierr += HYPRE_IJVectorSetObjectType(Y_hypre, HYPRE_PARCSR); ierr += HYPRE_IJVectorInitialize(Y_hypre); ierr += HYPRE_IJVectorAssemble(Y_hypre); ierr += HYPRE_IJVectorGetObject(Y_hypre, (void**) &par_y); TEUCHOS_TEST_FOR_EXCEPTION(ierr != 0, std::logic_error, "Couldn't create Hypre Y vector."); x_vec = (hypre_ParVector *) hypre_IJVectorObject(((hypre_IJVector *) X_hypre)); x_local = hypre_ParVectorLocalVector(x_vec); y_vec = (hypre_ParVector *) hypre_IJVectorObject(((hypre_IJVector *) Y_hypre)); y_local = hypre_ParVectorLocalVector(y_vec); SolverCreatePtr_ = &EpetraExt_HypreIJMatrix::Hypre_ParCSRPCGCreate; SolverDestroyPtr_ = &HYPRE_ParCSRPCGDestroy; SolverSetupPtr_ = &HYPRE_ParCSRPCGSetup; SolverSolvePtr_ = &HYPRE_ParCSRPCGSolve; SolverPrecondPtr_ = &HYPRE_ParCSRPCGSetPrecond; CreateSolver(); PrecondCreatePtr_ = &EpetraExt_HypreIJMatrix::Hypre_EuclidCreate; PrecondDestroyPtr_ = &HYPRE_EuclidDestroy; PrecondSetupPtr_ = &HYPRE_EuclidSetup; PrecondSolvePtr_ = &HYPRE_EuclidSolve; CreatePrecond(); ComputeNumericConstants(); ComputeStructureConstants(); } //EpetraExt_HYPREIJMatrix(Hypre_IJMatrix) Constructor
int32_t impl_bHYPRE_ParCSRDiagScale_Apply( /* in */ bHYPRE_ParCSRDiagScale self, /* in */ bHYPRE_Vector b, /* inout */ bHYPRE_Vector* x, /* out */ sidl_BaseInterface *_ex) { *_ex = 0; { /* DO-NOT-DELETE splicer.begin(bHYPRE.ParCSRDiagScale.Apply) */ /* Insert the implementation of the Apply method here... */ int ierr = 0; MPI_Comm comm; HYPRE_Solver dummy; HYPRE_Solver * solver = &dummy; struct bHYPRE_ParCSRDiagScale__data * data; /* not used HYPRE_Matrix HYPRE_A;*/ bHYPRE_IJParCSRMatrix bH_A; HYPRE_ParCSRMatrix AA; HYPRE_IJMatrix ij_A; /* not used HYPRE_Vector HYPRE_x, HYPRE_b;*/ bHYPRE_IJParCSRVector bH_b, bH_x; HYPRE_ParVector bb, xx; HYPRE_IJVector ij_b, ij_x; struct bHYPRE_IJParCSRMatrix__data * dataA; struct bHYPRE_IJParCSRVector__data * datab, * datax; void * objectA, * objectb, * objectx; data = bHYPRE_ParCSRDiagScale__get_data( self ); comm = data->comm; /* SetCommunicator should have been called earlier */ hypre_assert( comm != MPI_COMM_NULL ); bH_A = data->matrix; hypre_assert( bH_A != NULL ); if ( *x==NULL ) { /* If vector not supplied, make one...*/ /* There's no good way to check the size of x. It would be good * to do something similar if x had zero length. Or * hypre_assert(x-has-the-right-size) */ bHYPRE_Vector_Clone( b, x, _ex ); SIDL_CHECK(*_ex); bHYPRE_Vector_Clear( *x, _ex ); SIDL_CHECK(*_ex); } bH_b = (bHYPRE_IJParCSRVector) bHYPRE_Vector__cast2( b, "bHYPRE.IJParCSRVector", _ex ); SIDL_CHECK(*_ex); hypre_assert( bH_b!=NULL ); datab = bHYPRE_IJParCSRVector__get_data( bH_b ); ij_b = datab -> ij_b; ierr += HYPRE_IJVectorGetObject( ij_b, &objectb ); bb = (HYPRE_ParVector) objectb; /* not used HYPRE_b = (HYPRE_Vector) bb;*/ bH_x = (bHYPRE_IJParCSRVector) bHYPRE_Vector__cast2( *x, "bHYPRE.IJParCSRVector", _ex ); SIDL_CHECK(*_ex); hypre_assert( bH_x!=NULL ); datax = bHYPRE_IJParCSRVector__get_data( bH_x ); ij_x = datax -> ij_b; ierr += HYPRE_IJVectorGetObject( ij_x, &objectx ); xx = (HYPRE_ParVector) objectx; /* not used HYPRE_b = (HYPRE_Vector) xx;*/ dataA = bHYPRE_IJParCSRMatrix__get_data( bH_A ); ij_A = dataA -> ij_A; ierr += HYPRE_IJMatrixGetObject( ij_A, &objectA ); AA = (HYPRE_ParCSRMatrix) objectA; /* not used HYPRE_A = (HYPRE_Matrix) AA;*/ /* does x = y/diagA as approximation to solving Ax=y for x ... */ ierr += HYPRE_ParCSRDiagScale( *solver, AA, bb, xx ); bHYPRE_IJParCSRVector_deleteRef( bH_b, _ex ); SIDL_CHECK(*_ex); bHYPRE_IJParCSRVector_deleteRef( bH_x, _ex ); SIDL_CHECK(*_ex); return ierr; hypre_babel_exception_return_error(_ex); /* DO-NOT-DELETE splicer.end(bHYPRE.ParCSRDiagScale.Apply) */ } }
inline void numfact(unsigned int ncol, int* I, int* loc2glob, int* J, K* C) { static_assert(std::is_same<double, K>::value, "Hypre only supports double-precision floating-point real numbers"); static_assert(S == 'G', "Hypre only supports nonsymmetric matrices"); HYPRE_IJMatrixCreate(DMatrix::_communicator, loc2glob[0], loc2glob[1], loc2glob[0], loc2glob[1], &_A); HYPRE_IJMatrixSetObjectType(_A, HYPRE_PARCSR); HYPRE_IJMatrixSetRowSizes(_A, I + 1); _local = ncol; int* rows = new int[3 * _local](); int* diag_sizes = rows + _local; int* offdiag_sizes = diag_sizes + _local; rows[0] = I[0]; for(unsigned int i = 0; i < _local; ++i) { std::for_each(J + rows[0], J + rows[0] + I[i + 1], [&](int& j) { (j < loc2glob[0] || loc2glob[1] < j) ? ++offdiag_sizes[i] : ++diag_sizes[i]; }); rows[0] += I[i + 1]; } HYPRE_IJMatrixSetDiagOffdSizes(_A, diag_sizes, offdiag_sizes); HYPRE_IJMatrixSetMaxOffProcElmts(_A, 0); HYPRE_IJMatrixInitialize(_A); std::iota(rows, rows + _local, loc2glob[0]); HYPRE_IJMatrixSetValues(_A, _local, I + 1, rows, J, C); HYPRE_IJMatrixAssemble(_A); HYPRE_IJVectorCreate(DMatrix::_communicator, loc2glob[0], loc2glob[1], &_b); HYPRE_IJVectorSetObjectType(_b, HYPRE_PARCSR); HYPRE_IJVectorInitialize(_b); HYPRE_IJVectorCreate(DMatrix::_communicator, loc2glob[0], loc2glob[1], &_x); HYPRE_IJVectorSetObjectType(_x, HYPRE_PARCSR); HYPRE_IJVectorInitialize(_x); delete [] rows; delete [] I; delete [] loc2glob; HYPRE_BoomerAMGCreate(_strategy == 1 ? &_solver : &_precond); HYPRE_BoomerAMGSetCoarsenType(_strategy == 1 ? _solver : _precond, 6); /* Falgout coarsening */ HYPRE_BoomerAMGSetRelaxType(_strategy == 1 ? _solver : _precond, 6); /* G-S/Jacobi hybrid relaxation */ HYPRE_BoomerAMGSetNumSweeps(_strategy == 1 ? _solver : _precond, 1); /* sweeps on each level */ HYPRE_BoomerAMGSetMaxLevels(_strategy == 1 ? _solver : _precond, 10); /* maximum number of levels */ HYPRE_ParCSRMatrix parcsr_A; HYPRE_IJMatrixGetObject(_A, reinterpret_cast<void**>(&parcsr_A)); HYPRE_ParVector par_b; HYPRE_IJVectorGetObject(_b, reinterpret_cast<void**>(&par_b)); HYPRE_ParVector par_x; HYPRE_IJVectorGetObject(_x, reinterpret_cast<void**>(&par_x)); if(_strategy == 1) { HYPRE_BoomerAMGSetTol(_solver, 1.0e-8); HYPRE_BoomerAMGSetMaxIter(_solver, 1000); HYPRE_BoomerAMGSetPrintLevel(_solver, 1); HYPRE_BoomerAMGSetup(_solver, parcsr_A, nullptr, nullptr); } else { HYPRE_BoomerAMGSetTol(_precond, 0.0); HYPRE_BoomerAMGSetMaxIter(_precond, 1); HYPRE_BoomerAMGSetPrintLevel(_precond, 1); if(_strategy == 2) { HYPRE_ParCSRPCGCreate(DMatrix::_communicator, &_solver); HYPRE_PCGSetMaxIter(_solver, 500); HYPRE_PCGSetTol(_solver, 1.0e-8); HYPRE_PCGSetTwoNorm(_solver, 1); HYPRE_PCGSetPrintLevel(_solver, 1); HYPRE_PCGSetLogging(_solver, 1); HYPRE_PCGSetPrecond(_solver, reinterpret_cast<HYPRE_PtrToSolverFcn>(HYPRE_BoomerAMGSolve), reinterpret_cast<HYPRE_PtrToSolverFcn>(HYPRE_BoomerAMGSetup), _precond); HYPRE_ParCSRPCGSetup(_solver, parcsr_A, par_b, par_x); } else { HYPRE_ParCSRFlexGMRESCreate(DMatrix::_communicator, &_solver); HYPRE_FlexGMRESSetKDim(_solver, 50); HYPRE_FlexGMRESSetMaxIter(_solver, 500); HYPRE_FlexGMRESSetTol(_solver, 1.0e-8); HYPRE_FlexGMRESSetPrintLevel(_solver, 1); HYPRE_FlexGMRESSetLogging(_solver, 1); HYPRE_FlexGMRESSetPrecond(_solver, reinterpret_cast<HYPRE_PtrToSolverFcn>(HYPRE_BoomerAMGSolve), reinterpret_cast<HYPRE_PtrToSolverFcn>(HYPRE_BoomerAMGSetup), _precond); HYPRE_ParCSRFlexGMRESSetup(_solver, parcsr_A, par_b, par_x); } } }
int main (int argc, char *argv[]) { HYPRE_Int i; int myid, num_procs; int N, n; HYPRE_Int ilower, iupper; HYPRE_Int local_size, extra; int solver_id; int print_solution, print_system; double h, h2; HYPRE_IJMatrix A; HYPRE_ParCSRMatrix parcsr_A; HYPRE_IJVector b; HYPRE_ParVector par_b; HYPRE_IJVector x; HYPRE_ParVector par_x; HYPRE_Solver solver, precond; /* Initialize MPI */ MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &myid); MPI_Comm_size(MPI_COMM_WORLD, &num_procs); /* Default problem parameters */ n = 33; solver_id = 0; print_solution = 0; print_system = 0; /* Parse command line */ { int arg_index = 0; int print_usage = 0; while (arg_index < argc) { if ( strcmp(argv[arg_index], "-n") == 0 ) { arg_index++; n = atoi(argv[arg_index++]); } else if ( strcmp(argv[arg_index], "-solver") == 0 ) { arg_index++; solver_id = atoi(argv[arg_index++]); } else if ( strcmp(argv[arg_index], "-print_solution") == 0 ) { arg_index++; print_solution = 1; } else if ( strcmp(argv[arg_index], "-print_system") == 0 ) { arg_index++; print_system = 1; } else if ( strcmp(argv[arg_index], "-help") == 0 ) { print_usage = 1; break; } else { arg_index++; } } if ((print_usage) && (myid == 0)) { printf("\n"); printf("Usage: %s [<options>]\n", argv[0]); printf("\n"); printf(" -n <n> : problem size in each direction (default: 33)\n"); printf(" -solver <ID> : solver ID\n"); printf(" 0 - AMG (default) \n"); printf(" 1 - AMG-PCG\n"); printf(" 8 - ParaSails-PCG\n"); printf(" 50 - PCG\n"); printf(" 61 - AMG-FlexGMRES\n"); printf(" -print_solution : print the solution vector\n"); printf(" -print_system : print the matrix and rhs\n"); printf("\n"); } if (print_usage) { MPI_Finalize(); return (0); } } /* Preliminaries: want at least one processor per row */ if (n*n < num_procs) n = sqrt(num_procs) + 1; N = n*n; /* global number of rows */ h = 1.0/(n+1); /* mesh size*/ h2 = h*h; /* Each processor knows only of its own rows - the range is denoted by ilower and upper. Here we partition the rows. We account for the fact that N may not divide evenly by the number of processors. */ local_size = N/num_procs; extra = N - local_size*num_procs; ilower = local_size*myid; ilower += hypre_min(myid, extra); iupper = local_size*(myid+1); iupper += hypre_min(myid+1, extra); iupper = iupper - 1; /* How many rows do I have? */ local_size = iupper - ilower + 1; /* Create the matrix. Note that this is a square matrix, so we indicate the row partition size twice (since number of rows = number of cols) */ HYPRE_IJMatrixCreate(MPI_COMM_WORLD, ilower, iupper, ilower, iupper, &A); /* Choose a parallel csr format storage (see the User's Manual) */ HYPRE_IJMatrixSetObjectType(A, HYPRE_PARCSR); /* Initialize before setting coefficients */ HYPRE_IJMatrixInitialize(A); /* Now go through my local rows and set the matrix entries. Each row has at most 5 entries. For example, if n=3: A = [M -I 0; -I M -I; 0 -I M] M = [4 -1 0; -1 4 -1; 0 -1 4] Note that here we are setting one row at a time, though one could set all the rows together (see the User's Manual). */ { HYPRE_Int nnz; double values[5]; HYPRE_Int cols[5]; for (i = ilower; i <= iupper; i++) { nnz = 0; /* The left identity block:position i-n */ if ((i-n)>=0) { cols[nnz] = i-n; values[nnz] = -1.0; nnz++; } /* The left -1: position i-1 */ if (i%n) { cols[nnz] = i-1; values[nnz] = -1.0; nnz++; } /* Set the diagonal: position i */ cols[nnz] = i; values[nnz] = 4.0; nnz++; /* The right -1: position i+1 */ if ((i+1)%n) { cols[nnz] = i+1; values[nnz] = -1.0; nnz++; } /* The right identity block:position i+n */ if ((i+n)< N) { cols[nnz] = i+n; values[nnz] = -1.0; nnz++; } /* Set the values for row i */ HYPRE_IJMatrixSetValues(A, 1, &nnz, &i, cols, values); } } /* Assemble after setting the coefficients */ HYPRE_IJMatrixAssemble(A); /* Note: for the testing of small problems, one may wish to read in a matrix in IJ format (for the format, see the output files from the -print_system option). In this case, one would use the following routine: HYPRE_IJMatrixRead( <filename>, MPI_COMM_WORLD, HYPRE_PARCSR, &A ); <filename> = IJ.A.out to read in what has been printed out by -print_system (processor numbers are omitted). A call to HYPRE_IJMatrixRead is an *alternative* to the following sequence of HYPRE_IJMatrix calls: Create, SetObjectType, Initialize, SetValues, and Assemble */ /* Get the parcsr matrix object to use */ HYPRE_IJMatrixGetObject(A, (void**) &parcsr_A); /* Create the rhs and solution */ HYPRE_IJVectorCreate(MPI_COMM_WORLD, ilower, iupper,&b); HYPRE_IJVectorSetObjectType(b, HYPRE_PARCSR); HYPRE_IJVectorInitialize(b); HYPRE_IJVectorCreate(MPI_COMM_WORLD, ilower, iupper,&x); HYPRE_IJVectorSetObjectType(x, HYPRE_PARCSR); HYPRE_IJVectorInitialize(x); /* Set the rhs values to h^2 and the solution to zero */ { double *rhs_values, *x_values; HYPRE_Int *rows; rhs_values = calloc(local_size, sizeof(double)); x_values = calloc(local_size, sizeof(double)); rows = calloc(local_size, sizeof(HYPRE_Int)); for (i=0; i<local_size; i++) { rhs_values[i] = h2; x_values[i] = 0.0; rows[i] = ilower + i; } HYPRE_IJVectorSetValues(b, local_size, rows, rhs_values); HYPRE_IJVectorSetValues(x, local_size, rows, x_values); free(x_values); free(rhs_values); free(rows); } HYPRE_IJVectorAssemble(b); /* As with the matrix, for testing purposes, one may wish to read in a rhs: HYPRE_IJVectorRead( <filename>, MPI_COMM_WORLD, HYPRE_PARCSR, &b ); as an alternative to the following sequence of HYPRE_IJVectors calls: Create, SetObjectType, Initialize, SetValues, and Assemble */ HYPRE_IJVectorGetObject(b, (void **) &par_b); HYPRE_IJVectorAssemble(x); HYPRE_IJVectorGetObject(x, (void **) &par_x); /* Print out the system - files names will be IJ.out.A.XXXXX and IJ.out.b.XXXXX, where XXXXX = processor id */ if (print_system) { HYPRE_IJMatrixPrint(A, "IJ.out.A"); HYPRE_IJVectorPrint(b, "IJ.out.b"); } /* Choose a solver and solve the system */ /* AMG */ if (solver_id == 0) { HYPRE_Int num_iterations; double final_res_norm; /* Create solver */ HYPRE_BoomerAMGCreate(&solver); /* Set some parameters (See Reference Manual for more parameters) */ HYPRE_BoomerAMGSetPrintLevel(solver, 3); /* print solve info + parameters */ HYPRE_BoomerAMGSetCoarsenType(solver, 6); /* Falgout coarsening */ HYPRE_BoomerAMGSetRelaxType(solver, 3); /* G-S/Jacobi hybrid relaxation */ HYPRE_BoomerAMGSetNumSweeps(solver, 1); /* Sweeeps on each level */ HYPRE_BoomerAMGSetMaxLevels(solver, 20); /* maximum number of levels */ HYPRE_BoomerAMGSetTol(solver, 1e-7); /* conv. tolerance */ /* Now setup and solve! */ HYPRE_BoomerAMGSetup(solver, parcsr_A, par_b, par_x); HYPRE_BoomerAMGSolve(solver, parcsr_A, par_b, par_x); /* Run info - needed logging turned on */ HYPRE_BoomerAMGGetNumIterations(solver, &num_iterations); HYPRE_BoomerAMGGetFinalRelativeResidualNorm(solver, &final_res_norm); if (myid == 0) { printf("\n"); printf("Iterations = %lld\n", num_iterations); printf("Final Relative Residual Norm = %e\n", final_res_norm); printf("\n"); } /* Destroy solver */ HYPRE_BoomerAMGDestroy(solver); } /* PCG */ else if (solver_id == 50) { HYPRE_Int num_iterations; double final_res_norm; /* Create solver */ HYPRE_ParCSRPCGCreate(MPI_COMM_WORLD, &solver); /* Set some parameters (See Reference Manual for more parameters) */ HYPRE_PCGSetMaxIter(solver, 1000); /* max iterations */ HYPRE_PCGSetTol(solver, 1e-7); /* conv. tolerance */ HYPRE_PCGSetTwoNorm(solver, 1); /* use the two norm as the stopping criteria */ HYPRE_PCGSetPrintLevel(solver, 2); /* prints out the iteration info */ HYPRE_PCGSetLogging(solver, 1); /* needed to get run info later */ /* Now setup and solve! */ HYPRE_ParCSRPCGSetup(solver, parcsr_A, par_b, par_x); HYPRE_ParCSRPCGSolve(solver, parcsr_A, par_b, par_x); /* Run info - needed logging turned on */ HYPRE_PCGGetNumIterations(solver, &num_iterations); HYPRE_PCGGetFinalRelativeResidualNorm(solver, &final_res_norm); if (myid == 0) { printf("\n"); printf("Iterations = %lld\n", num_iterations); printf("Final Relative Residual Norm = %e\n", final_res_norm); printf("\n"); } /* Destroy solver */ HYPRE_ParCSRPCGDestroy(solver); } /* PCG with AMG preconditioner */ else if (solver_id == 1) { HYPRE_Int num_iterations; double final_res_norm; /* Create solver */ HYPRE_ParCSRPCGCreate(MPI_COMM_WORLD, &solver); /* Set some parameters (See Reference Manual for more parameters) */ HYPRE_PCGSetMaxIter(solver, 1000); /* max iterations */ HYPRE_PCGSetTol(solver, 1e-7); /* conv. tolerance */ HYPRE_PCGSetTwoNorm(solver, 1); /* use the two norm as the stopping criteria */ HYPRE_PCGSetPrintLevel(solver, 2); /* print solve info */ HYPRE_PCGSetLogging(solver, 1); /* needed to get run info later */ /* Now set up the AMG preconditioner and specify any parameters */ HYPRE_BoomerAMGCreate(&precond); HYPRE_BoomerAMGSetPrintLevel(precond, 1); /* print amg solution info */ HYPRE_BoomerAMGSetCoarsenType(precond, 6); HYPRE_BoomerAMGSetRelaxType(precond, 6); /* Sym G.S./Jacobi hybrid */ HYPRE_BoomerAMGSetNumSweeps(precond, 1); HYPRE_BoomerAMGSetTol(precond, 0.0); /* conv. tolerance zero */ HYPRE_BoomerAMGSetMaxIter(precond, 1); /* do only one iteration! */ /* Set the PCG preconditioner */ HYPRE_PCGSetPrecond(solver, (HYPRE_PtrToSolverFcn) HYPRE_BoomerAMGSolve, (HYPRE_PtrToSolverFcn) HYPRE_BoomerAMGSetup, precond); /* Now setup and solve! */ HYPRE_ParCSRPCGSetup(solver, parcsr_A, par_b, par_x); HYPRE_ParCSRPCGSolve(solver, parcsr_A, par_b, par_x); /* Run info - needed logging turned on */ HYPRE_PCGGetNumIterations(solver, &num_iterations); HYPRE_PCGGetFinalRelativeResidualNorm(solver, &final_res_norm); if (myid == 0) { printf("\n"); printf("Iterations = %lld\n", num_iterations); printf("Final Relative Residual Norm = %e\n", final_res_norm); printf("\n"); } /* Destroy solver and preconditioner */ HYPRE_ParCSRPCGDestroy(solver); HYPRE_BoomerAMGDestroy(precond); } /* PCG with Parasails Preconditioner */ else if (solver_id == 8) { HYPRE_Int num_iterations; double final_res_norm; int sai_max_levels = 1; double sai_threshold = 0.1; double sai_filter = 0.05; int sai_sym = 1; /* Create solver */ HYPRE_ParCSRPCGCreate(MPI_COMM_WORLD, &solver); /* Set some parameters (See Reference Manual for more parameters) */ HYPRE_PCGSetMaxIter(solver, 1000); /* max iterations */ HYPRE_PCGSetTol(solver, 1e-7); /* conv. tolerance */ HYPRE_PCGSetTwoNorm(solver, 1); /* use the two norm as the stopping criteria */ HYPRE_PCGSetPrintLevel(solver, 2); /* print solve info */ HYPRE_PCGSetLogging(solver, 1); /* needed to get run info later */ /* Now set up the ParaSails preconditioner and specify any parameters */ HYPRE_ParaSailsCreate(MPI_COMM_WORLD, &precond); /* Set some parameters (See Reference Manual for more parameters) */ HYPRE_ParaSailsSetParams(precond, sai_threshold, sai_max_levels); HYPRE_ParaSailsSetFilter(precond, sai_filter); HYPRE_ParaSailsSetSym(precond, sai_sym); HYPRE_ParaSailsSetLogging(precond, 3); /* Set the PCG preconditioner */ HYPRE_PCGSetPrecond(solver, (HYPRE_PtrToSolverFcn) HYPRE_ParaSailsSolve, (HYPRE_PtrToSolverFcn) HYPRE_ParaSailsSetup, precond); /* Now setup and solve! */ HYPRE_ParCSRPCGSetup(solver, parcsr_A, par_b, par_x); HYPRE_ParCSRPCGSolve(solver, parcsr_A, par_b, par_x); /* Run info - needed logging turned on */ HYPRE_PCGGetNumIterations(solver, &num_iterations); HYPRE_PCGGetFinalRelativeResidualNorm(solver, &final_res_norm); if (myid == 0) { printf("\n"); printf("Iterations = %lld\n", num_iterations); printf("Final Relative Residual Norm = %e\n", final_res_norm); printf("\n"); } /* Destory solver and preconditioner */ HYPRE_ParCSRPCGDestroy(solver); HYPRE_ParaSailsDestroy(precond); } /* Flexible GMRES with AMG Preconditioner */ else if (solver_id == 61) { HYPRE_Int num_iterations; double final_res_norm; int restart = 30; int modify = 1; /* Create solver */ HYPRE_ParCSRFlexGMRESCreate(MPI_COMM_WORLD, &solver); /* Set some parameters (See Reference Manual for more parameters) */ HYPRE_FlexGMRESSetKDim(solver, restart); HYPRE_FlexGMRESSetMaxIter(solver, 1000); /* max iterations */ HYPRE_FlexGMRESSetTol(solver, 1e-7); /* conv. tolerance */ HYPRE_FlexGMRESSetPrintLevel(solver, 2); /* print solve info */ HYPRE_FlexGMRESSetLogging(solver, 1); /* needed to get run info later */ /* Now set up the AMG preconditioner and specify any parameters */ HYPRE_BoomerAMGCreate(&precond); HYPRE_BoomerAMGSetPrintLevel(precond, 1); /* print amg solution info */ HYPRE_BoomerAMGSetCoarsenType(precond, 6); HYPRE_BoomerAMGSetRelaxType(precond, 6); /* Sym G.S./Jacobi hybrid */ HYPRE_BoomerAMGSetNumSweeps(precond, 1); HYPRE_BoomerAMGSetTol(precond, 0.0); /* conv. tolerance zero */ HYPRE_BoomerAMGSetMaxIter(precond, 1); /* do only one iteration! */ /* Set the FlexGMRES preconditioner */ HYPRE_FlexGMRESSetPrecond(solver, (HYPRE_PtrToSolverFcn) HYPRE_BoomerAMGSolve, (HYPRE_PtrToSolverFcn) HYPRE_BoomerAMGSetup, precond); if (modify) /* this is an optional call - if you don't call it, hypre_FlexGMRESModifyPCDefault is used - which does nothing. Otherwise, you can define your own, similar to the one used here */ HYPRE_FlexGMRESSetModifyPC( solver, (HYPRE_PtrToModifyPCFcn) hypre_FlexGMRESModifyPCAMGExample); /* Now setup and solve! */ HYPRE_ParCSRFlexGMRESSetup(solver, parcsr_A, par_b, par_x); HYPRE_ParCSRFlexGMRESSolve(solver, parcsr_A, par_b, par_x); /* Run info - needed logging turned on */ HYPRE_FlexGMRESGetNumIterations(solver, &num_iterations); HYPRE_FlexGMRESGetFinalRelativeResidualNorm(solver, &final_res_norm); if (myid == 0) { printf("\n"); printf("Iterations = %lld\n", num_iterations); printf("Final Relative Residual Norm = %e\n", final_res_norm); printf("\n"); } /* Destory solver and preconditioner */ HYPRE_ParCSRFlexGMRESDestroy(solver); HYPRE_BoomerAMGDestroy(precond); } else { if (myid ==0) printf("Invalid solver id specified.\n"); } /* Print the solution */ if (print_solution) HYPRE_IJVectorPrint(x, "ij.out.x"); /* Clean up */ HYPRE_IJMatrixDestroy(A); HYPRE_IJVectorDestroy(b); HYPRE_IJVectorDestroy(x); /* Finalize MPI*/ MPI_Finalize(); return(0); }
int main(int argc, char *argv[]) { realtype dx, reltol, abstol, t, tout, umax; N_Vector u; UserData data; void *cvode_mem; int iout, flag, my_pe, npes; long int nst; HYPRE_Int local_N, nperpe, nrem, my_base; HYPRE_ParVector Upar; /* Declare HYPRE parallel vector */ HYPRE_IJVector Uij; /* Declare "IJ" interface to HYPRE vector */ MPI_Comm comm; u = NULL; data = NULL; cvode_mem = NULL; /* Get processor number, total number of pe's, and my_pe. */ MPI_Init(&argc, &argv); comm = MPI_COMM_WORLD; MPI_Comm_size(comm, &npes); MPI_Comm_rank(comm, &my_pe); /* Set partitioning. */ nperpe = NEQ/npes; nrem = NEQ - npes*nperpe; local_N = (my_pe < nrem) ? nperpe+1 : nperpe; my_base = (my_pe < nrem) ? my_pe*local_N : my_pe*nperpe + nrem; /* Allocate hypre vector */ HYPRE_IJVectorCreate(comm, my_base, my_base + local_N - 1, &Uij); HYPRE_IJVectorSetObjectType(Uij, HYPRE_PARCSR); HYPRE_IJVectorInitialize(Uij); /* Allocate user defined data */ data = (UserData) malloc(sizeof *data); /* Allocate data memory */ if(check_flag((void *)data, "malloc", 2, my_pe)) MPI_Abort(comm, 1); data->comm = comm; data->npes = npes; data->my_pe = my_pe; reltol = ZERO; /* Set the tolerances */ abstol = ATOL; dx = data->dx = XMAX/((realtype)(MX+1)); /* Set grid coefficients in data */ data->hdcoef = RCONST(1.0)/(dx*dx); data->hacoef = RCONST(0.5)/(RCONST(2.0)*dx); /* Initialize solutin vector. */ SetIC(Uij, dx, local_N, my_base); HYPRE_IJVectorAssemble(Uij); HYPRE_IJVectorGetObject(Uij, (void**) &Upar); u = N_VMake_ParHyp(Upar); /* Create wrapper u around hypre vector */ if(check_flag((void *)u, "N_VNew", 0, my_pe)) MPI_Abort(comm, 1); /* Call CVodeCreate to create the solver memory and specify the * Adams-Moulton LMM and the use of a functional iteration */ cvode_mem = CVodeCreate(CV_ADAMS, CV_FUNCTIONAL); if(check_flag((void *)cvode_mem, "CVodeCreate", 0, my_pe)) MPI_Abort(comm, 1); flag = CVodeSetUserData(cvode_mem, data); if(check_flag(&flag, "CVodeSetUserData", 1, my_pe)) MPI_Abort(comm, 1); /* Call CVodeInit to initialize the integrator memory and specify the * user's right hand side function in u'=f(t,u), the inital time T0, and * the initial dependent variable vector u. */ flag = CVodeInit(cvode_mem, f, T0, u); if(check_flag(&flag, "CVodeInit", 1, my_pe)) return(1); /* Call CVodeSStolerances to specify the scalar relative tolerance * and scalar absolute tolerances */ flag = CVodeSStolerances(cvode_mem, reltol, abstol); if (check_flag(&flag, "CVodeSStolerances", 1, my_pe)) return(1); if (my_pe == 0) PrintIntro(npes); umax = N_VMaxNorm(u); if (my_pe == 0) { t = T0; PrintData(t, umax, 0); } /* In loop over output points, call CVode, print results, test for error */ for (iout=1, tout=T1; iout <= NOUT; iout++, tout += DTOUT) { flag = CVode(cvode_mem, tout, u, &t, CV_NORMAL); if(check_flag(&flag, "CVode", 1, my_pe)) break; umax = N_VMaxNorm(u); flag = CVodeGetNumSteps(cvode_mem, &nst); check_flag(&flag, "CVodeGetNumSteps", 1, my_pe); if (my_pe == 0) PrintData(t, umax, nst); } if (my_pe == 0) PrintFinalStats(cvode_mem); /* Print some final statistics */ N_VDestroy(u); /* Free hypre vector wrapper */ HYPRE_IJVectorDestroy(Uij); /* Free the underlying hypre vector */ CVodeFree(&cvode_mem); /* Free the integrator memory */ free(data); /* Free user data */ MPI_Finalize(); return(0); }
/***************************** Main Program ******************************/ int main(int argc, char *argv[]) { realtype abstol, reltol, t, tout; N_Vector u; UserData data; void *arkode_mem; int iout, flag; MPI_Comm comm; HYPRE_Int local_N, npes, my_pe; HYPRE_ParVector Upar; /* Declare HYPRE parallel vector */ HYPRE_IJVector Uij; /* Declare "IJ" interface to HYPRE vector */ u = NULL; data = NULL; arkode_mem = NULL; /* Set problem size neq */ /* neq = NVARS*MX*MY; */ /* Get processor number and total number of pe's */ MPI_Init(&argc, &argv); comm = MPI_COMM_WORLD; MPI_Comm_size(comm, &npes); MPI_Comm_rank(comm, &my_pe); if (npes != NPEX*NPEY) { if (my_pe == 0) fprintf(stderr, "\nMPI_ERROR(0): npes = %d is not equal to NPEX*NPEY = %d\n\n", npes,NPEX*NPEY); MPI_Finalize(); return(1); } /* Set local length */ local_N = NVARS*MXSUB*MYSUB; /* Allocate hypre vector */ HYPRE_IJVectorCreate(comm, my_pe*local_N, (my_pe + 1)*local_N - 1, &Uij); HYPRE_IJVectorSetObjectType(Uij, HYPRE_PARCSR); HYPRE_IJVectorInitialize(Uij); /* Allocate and load user data block; allocate preconditioner block */ data = (UserData) malloc(sizeof *data); if (check_flag((void *)data, "malloc", 2, my_pe)) MPI_Abort(comm, 1); InitUserData(my_pe, comm, data); /* Set initial values and allocate u */ SetInitialProfiles(Uij, data, local_N, my_pe*local_N); HYPRE_IJVectorAssemble(Uij); HYPRE_IJVectorGetObject(Uij, (void**) &Upar); u = N_VMake_ParHyp(Upar); /* Create wrapper u around hypre vector */ if (check_flag((void *)u, "N_VNew", 0, my_pe)) MPI_Abort(comm, 1); /* Set tolerances */ abstol = ATOL; reltol = RTOL; /* Call ARKodeCreate to create the solver memory */ arkode_mem = ARKodeCreate(); if (check_flag((void *)arkode_mem, "ARKodeCreate", 0, my_pe)) MPI_Abort(comm, 1); /* Set the pointer to user-defined data */ flag = ARKodeSetUserData(arkode_mem, data); if (check_flag(&flag, "ARKodeSetUserData", 1, my_pe)) MPI_Abort(comm, 1); /* Call ARKodeInit to initialize the integrator memory and specify the user's right hand side functions in u'=fe(t,u)+fi(t,u) [here fe is NULL], the inital time T0, and the initial dependent variable vector u. */ flag = ARKodeInit(arkode_mem, NULL, f, T0, u); if(check_flag(&flag, "ARKodeInit", 1, my_pe)) return(1); /* Call ARKodeSetMaxNumSteps to increase default */ flag = ARKodeSetMaxNumSteps(arkode_mem, 1000000); if (check_flag(&flag, "ARKodeSetMaxNumSteps", 1, my_pe)) return(1); /* Call ARKodeSStolerances to specify the scalar relative tolerance and scalar absolute tolerances */ flag = ARKodeSStolerances(arkode_mem, reltol, abstol); if (check_flag(&flag, "ARKodeSStolerances", 1, my_pe)) return(1); /* Call ARKSpgmr to specify the linear solver ARKSPGMR with left preconditioning and the default Krylov dimension maxl */ flag = ARKSpgmr(arkode_mem, PREC_LEFT, 0); if (check_flag(&flag, "ARKSpgmr", 1, my_pe)) MPI_Abort(comm, 1); /* Set preconditioner setup and solve routines Precond and PSolve, and the pointer to the user-defined block data */ flag = ARKSpilsSetPreconditioner(arkode_mem, Precond, PSolve); if (check_flag(&flag, "ARKSpilsSetPreconditioner", 1, my_pe)) MPI_Abort(comm, 1); if (my_pe == 0) printf("\n2-species diurnal advection-diffusion problem\n\n"); /* In loop over output points, call ARKode, print results, test for error */ for (iout=1, tout=TWOHR; iout<=NOUT; iout++, tout+=TWOHR) { flag = ARKode(arkode_mem, tout, u, &t, ARK_NORMAL); if (check_flag(&flag, "ARKode", 1, my_pe)) break; PrintOutput(arkode_mem, my_pe, comm, u, t); } /* Print final statistics */ if (my_pe == 0) PrintFinalStats(arkode_mem); /* Free memory */ N_VDestroy(u); /* Free hypre vector wrapper */ HYPRE_IJVectorDestroy(Uij); /* Free the underlying hypre vector */ FreeUserData(data); ARKodeFree(&arkode_mem); MPI_Finalize(); return(0); }
HYPRE_Int main (HYPRE_Int argc, char *argv[]) { HYPRE_Int i; HYPRE_Int myid, num_procs; HYPRE_Int N, n; HYPRE_Int ilower, iupper; HYPRE_Int local_size, extra; HYPRE_Int solver_id; HYPRE_Int print_solution; double h, h2; #ifdef HYPRE_FORTRAN hypre_F90_Obj A; hypre_F90_Obj parcsr_A; hypre_F90_Obj b; hypre_F90_Obj par_b; hypre_F90_Obj x; hypre_F90_Obj par_x; hypre_F90_Obj solver, precond; hypre_F90_Obj long_temp_COMM; HYPRE_Int temp_COMM; HYPRE_Int precond_id; HYPRE_Int one = 1; HYPRE_Int two = 2; HYPRE_Int three = 3; HYPRE_Int six = 6; HYPRE_Int twenty = 20; HYPRE_Int thousand = 1000; HYPRE_Int hypre_type = HYPRE_PARCSR; double oo1 = 1.e-3; double tol = 1.e-7; #else HYPRE_IJMatrix A; HYPRE_ParCSRMatrix parcsr_A; HYPRE_IJVector b; HYPRE_ParVector par_b; HYPRE_IJVector x; HYPRE_ParVector par_x; HYPRE_Solver solver, precond; #endif /* Initialize MPI */ hypre_MPI_Init(&argc, &argv); hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &myid); hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &num_procs); /* Default problem parameters */ n = 33; solver_id = 0; print_solution = 0; /* Parse command line */ { HYPRE_Int arg_index = 0; HYPRE_Int print_usage = 0; while (arg_index < argc) { if ( strcmp(argv[arg_index], "-n") == 0 ) { arg_index++; n = atoi(argv[arg_index++]); } else if ( strcmp(argv[arg_index], "-solver") == 0 ) { arg_index++; solver_id = atoi(argv[arg_index++]); } else if ( strcmp(argv[arg_index], "-print_solution") == 0 ) { arg_index++; print_solution = 1; } else if ( strcmp(argv[arg_index], "-help") == 0 ) { print_usage = 1; break; } else { arg_index++; } } if ((print_usage) && (myid == 0)) { hypre_printf("\n"); hypre_printf("Usage: %s [<options>]\n", argv[0]); hypre_printf("\n"); hypre_printf(" -n <n> : problem size in each direction (default: 33)\n"); hypre_printf(" -solver <ID> : solver ID\n"); hypre_printf(" 0 - AMG (default) \n"); hypre_printf(" 1 - AMG-PCG\n"); hypre_printf(" 8 - ParaSails-PCG\n"); hypre_printf(" 50 - PCG\n"); hypre_printf(" -print_solution : print the solution vector\n"); hypre_printf("\n"); } if (print_usage) { hypre_MPI_Finalize(); return (0); } } /* Preliminaries: want at least one processor per row */ if (n*n < num_procs) n = sqrt(num_procs) + 1; N = n*n; /* global number of rows */ h = 1.0/(n+1); /* mesh size*/ h2 = h*h; /* Each processor knows only of its own rows - the range is denoted by ilower and upper. Here we partition the rows. We account for the fact that N may not divide evenly by the number of processors. */ local_size = N/num_procs; extra = N - local_size*num_procs; ilower = local_size*myid; ilower += hypre_min(myid, extra); iupper = local_size*(myid+1); iupper += hypre_min(myid+1, extra); iupper = iupper - 1; /* How many rows do I have? */ local_size = iupper - ilower + 1; /* Create the matrix. Note that this is a square matrix, so we indicate the row partition size twice (since number of rows = number of cols) */ #ifdef HYPRE_FORTRAN long_temp_COMM = (hypre_F90_Obj) hypre_MPI_COMM_WORLD; temp_COMM = (HYPRE_Int) hypre_MPI_COMM_WORLD; HYPRE_IJMatrixCreate(&long_temp_COMM, &ilower, &iupper, &ilower, &iupper, &A); #else HYPRE_IJMatrixCreate(hypre_MPI_COMM_WORLD, ilower, iupper, ilower, iupper, &A); #endif /* Choose a parallel csr format storage (see the User's Manual) */ #ifdef HYPRE_FORTRAN HYPRE_IJMatrixSetObjectType(&A, &hypre_type); #else HYPRE_IJMatrixSetObjectType(A, HYPRE_PARCSR); #endif /* Initialize before setting coefficients */ #ifdef HYPRE_FORTRAN HYPRE_IJMatrixInitialize(&A); #else HYPRE_IJMatrixInitialize(A); #endif /* Now go through my local rows and set the matrix entries. Each row has at most 5 entries. For example, if n=3: A = [M -I 0; -I M -I; 0 -I M] M = [4 -1 0; -1 4 -1; 0 -1 4] Note that here we are setting one row at a time, though one could set all the rows together (see the User's Manual). */ { HYPRE_Int nnz; double values[5]; HYPRE_Int cols[5]; for (i = ilower; i <= iupper; i++) { nnz = 0; /* The left identity block:position i-n */ if ((i-n)>=0) { cols[nnz] = i-n; values[nnz] = -1.0; nnz++; } /* The left -1: position i-1 */ if (i%n) { cols[nnz] = i-1; values[nnz] = -1.0; nnz++; } /* Set the diagonal: position i */ cols[nnz] = i; values[nnz] = 4.0; nnz++; /* The right -1: position i+1 */ if ((i+1)%n) { cols[nnz] = i+1; values[nnz] = -1.0; nnz++; } /* The right identity block:position i+n */ if ((i+n)< N) { cols[nnz] = i+n; values[nnz] = -1.0; nnz++; } /* Set the values for row i */ #ifdef HYPRE_FORTRAN HYPRE_IJMatrixSetValues(&A, &one, &nnz, &i, &cols[0], &values[0]); #else HYPRE_IJMatrixSetValues(A, 1, &nnz, &i, cols, values); #endif } } /* Assemble after setting the coefficients */ #ifdef HYPRE_FORTRAN HYPRE_IJMatrixAssemble(&A); #else HYPRE_IJMatrixAssemble(A); #endif /* Get the parcsr matrix object to use */ #ifdef HYPRE_FORTRAN HYPRE_IJMatrixGetObject(&A, &parcsr_A); HYPRE_IJMatrixGetObject(&A, &parcsr_A); #else HYPRE_IJMatrixGetObject(A, (void**) &parcsr_A); HYPRE_IJMatrixGetObject(A, (void**) &parcsr_A); #endif /* Create the rhs and solution */ #ifdef HYPRE_FORTRAN HYPRE_IJVectorCreate(&temp_COMM, &ilower, &iupper, &b); HYPRE_IJVectorSetObjectType(&b, &hypre_type); HYPRE_IJVectorInitialize(&b); #else HYPRE_IJVectorCreate(hypre_MPI_COMM_WORLD, ilower, iupper,&b); HYPRE_IJVectorSetObjectType(b, HYPRE_PARCSR); HYPRE_IJVectorInitialize(b); #endif #ifdef HYPRE_FORTRAN HYPRE_IJVectorCreate(&temp_COMM, &ilower, &iupper, &x); HYPRE_IJVectorSetObjectType(&x, &hypre_type); HYPRE_IJVectorInitialize(&x); #else HYPRE_IJVectorCreate(hypre_MPI_COMM_WORLD, ilower, iupper,&x); HYPRE_IJVectorSetObjectType(x, HYPRE_PARCSR); HYPRE_IJVectorInitialize(x); #endif /* Set the rhs values to h^2 and the solution to zero */ { double *rhs_values, *x_values; HYPRE_Int *rows; rhs_values = calloc(local_size, sizeof(double)); x_values = calloc(local_size, sizeof(double)); rows = calloc(local_size, sizeof(HYPRE_Int)); for (i=0; i<local_size; i++) { rhs_values[i] = h2; x_values[i] = 0.0; rows[i] = ilower + i; } #ifdef HYPRE_FORTRAN HYPRE_IJVectorSetValues(&b, &local_size, &rows[0], &rhs_values[0]); HYPRE_IJVectorSetValues(&x, &local_size, &rows[0], &x_values[0]); #else HYPRE_IJVectorSetValues(b, local_size, rows, rhs_values); HYPRE_IJVectorSetValues(x, local_size, rows, x_values); #endif free(x_values); free(rhs_values); free(rows); } #ifdef HYPRE_FORTRAN HYPRE_IJVectorAssemble(&b); HYPRE_IJVectorGetObject(&b, &par_b); #else HYPRE_IJVectorAssemble(b); HYPRE_IJVectorGetObject(b, (void **) &par_b); #endif #ifdef HYPRE_FORTRAN HYPRE_IJVectorAssemble(&x); HYPRE_IJVectorGetObject(&x, &par_x); #else HYPRE_IJVectorAssemble(x); HYPRE_IJVectorGetObject(x, (void **) &par_x); #endif /* Choose a solver and solve the system */ /* AMG */ if (solver_id == 0) { HYPRE_Int num_iterations; double final_res_norm; /* Create solver */ #ifdef HYPRE_FORTRAN HYPRE_BoomerAMGCreate(&solver); #else HYPRE_BoomerAMGCreate(&solver); #endif /* Set some parameters (See Reference Manual for more parameters) */ #ifdef HYPRE_FORTRAN HYPRE_BoomerAMGSetPrintLevel(&solver, &three); /* print solve info + parameters */ HYPRE_BoomerAMGSetCoarsenType(&solver, &six); /* Falgout coarsening */ HYPRE_BoomerAMGSetRelaxType(&solver, &three); /* G-S/Jacobi hybrid relaxation */ HYPRE_BoomerAMGSetNumSweeps(&solver, &one); /* Sweeeps on each level */ HYPRE_BoomerAMGSetMaxLevels(&solver, &twenty); /* maximum number of levels */ HYPRE_BoomerAMGSetTol(&solver, &tol); /* conv. tolerance */ #else HYPRE_BoomerAMGSetPrintLevel(solver, 3); /* print solve info + parameters */ HYPRE_BoomerAMGSetCoarsenType(solver, 6); /* Falgout coarsening */ HYPRE_BoomerAMGSetRelaxType(solver, 3); /* G-S/Jacobi hybrid relaxation */ HYPRE_BoomerAMGSetNumSweeps(solver, 1); /* Sweeeps on each level */ HYPRE_BoomerAMGSetMaxLevels(solver, 20); /* maximum number of levels */ HYPRE_BoomerAMGSetTol(solver, 1e-7); /* conv. tolerance */ #endif /* Now setup and solve! */ #ifdef HYPRE_FORTRAN HYPRE_BoomerAMGSetup(&solver, &parcsr_A, &par_b, &par_x); HYPRE_BoomerAMGSolve(&solver, &parcsr_A, &par_b, &par_x); #else HYPRE_BoomerAMGSetup(solver, parcsr_A, par_b, par_x); HYPRE_BoomerAMGSolve(solver, parcsr_A, par_b, par_x); #endif /* Run info - needed logging turned on */ #ifdef HYPRE_FORTRAN HYPRE_BoomerAMGGetNumIterations(&solver, &num_iterations); HYPRE_BoomerAMGGetFinalRelativeResidualNorm(&solver, &final_res_norm); #else HYPRE_BoomerAMGGetNumIterations(solver, &num_iterations); HYPRE_BoomerAMGGetFinalRelativeResidualNorm(solver, &final_res_norm); #endif if (myid == 0) { hypre_printf("\n"); hypre_printf("Iterations = %d\n", num_iterations); hypre_printf("Final Relative Residual Norm = %e\n", final_res_norm); hypre_printf("\n"); } /* Destroy solver */ #ifdef HYPRE_FORTRAN HYPRE_BoomerAMGDestroy(&solver); #else HYPRE_BoomerAMGDestroy(solver); #endif } /* PCG */ else if (solver_id == 50) { HYPRE_Int num_iterations; double final_res_norm; /* Create solver */ #ifdef HYPRE_FORTRAN HYPRE_ParCSRPCGCreate(&temp_COMM, &solver); #else HYPRE_ParCSRPCGCreate(hypre_MPI_COMM_WORLD, &solver); #endif /* Set some parameters (See Reference Manual for more parameters) */ #ifdef HYPRE_FORTRAN HYPRE_ParCSRPCGSetMaxIter(&solver, &thousand); /* max iterations */ HYPRE_ParCSRPCGSetTol(&solver, &tol); /* conv. tolerance */ HYPRE_ParCSRPCGSetTwoNorm(&solver, &one); /* use the two norm as the stopping criteria */ HYPRE_ParCSRPCGSetPrintLevel(&solver, &two); /* prints out the iteration info */ #else HYPRE_PCGSetMaxIter(solver, 1000); /* max iterations */ HYPRE_PCGSetTol(solver, 1e-7); /* conv. tolerance */ HYPRE_PCGSetTwoNorm(solver, 1); /* use the two norm as the stopping criteria */ HYPRE_PCGSetPrintLevel(solver, 2); /* prints out the iteration info */ HYPRE_PCGSetLogging(solver, 1); /* needed to get run info later */ #endif /* Now setup and solve! */ #ifdef HYPRE_FORTRAN HYPRE_ParCSRPCGSetup(&solver, &parcsr_A, &par_b, &par_x); HYPRE_ParCSRPCGSolve(&solver, &parcsr_A, &par_b, &par_x); #else HYPRE_ParCSRPCGSetup(solver, parcsr_A, par_b, par_x); HYPRE_ParCSRPCGSolve(solver, parcsr_A, par_b, par_x); #endif /* Run info - needed logging turned on */ #ifdef HYPRE_FORTRAN HYPRE_ParCSRPCGGetNumIterations(&solver, &num_iterations); HYPRE_ParCSRPCGGetFinalRelativeResidualNorm(&solver, &final_res_norm); #else HYPRE_PCGGetNumIterations(solver, &num_iterations); HYPRE_PCGGetFinalRelativeResidualNorm(solver, &final_res_norm); #endif if (myid == 0) { hypre_printf("\n"); hypre_printf("Iterations = %d\n", num_iterations); hypre_printf("Final Relative Residual Norm = %e\n", final_res_norm); hypre_printf("\n"); } /* Destroy solver */ #ifdef HYPRE_FORTRAN HYPRE_ParCSRPCGDestroy(&solver); #else HYPRE_ParCSRPCGDestroy(solver); #endif } /* PCG with AMG preconditioner */ else if (solver_id == 1) { HYPRE_Int num_iterations; double final_res_norm; /* Create solver */ #ifdef HYPRE_FORTRAN HYPRE_ParCSRPCGCreate(&temp_COMM, &solver); #else HYPRE_ParCSRPCGCreate(hypre_MPI_COMM_WORLD, &solver); #endif /* Set some parameters (See Reference Manual for more parameters) */ #ifdef HYPRE_FORTRAN HYPRE_ParCSRPCGSetMaxIter(&solver, &thousand); /* max iterations */ HYPRE_ParCSRPCGSetTol(&solver, &tol); /* conv. tolerance */ HYPRE_ParCSRPCGSetTwoNorm(&solver, &one); /* use the two norm as the stopping criteria */ HYPRE_ParCSRPCGSetPrintLevel(&solver, &two); /* print solve info */ #else HYPRE_PCGSetMaxIter(solver, 1000); /* max iterations */ HYPRE_PCGSetTol(solver, 1e-7); /* conv. tolerance */ HYPRE_PCGSetTwoNorm(solver, 1); /* use the two norm as the stopping criteria */ HYPRE_PCGSetPrintLevel(solver, 2); /* print solve info */ HYPRE_PCGSetLogging(solver, 1); /* needed to get run info later */ #endif /* Now set up the AMG preconditioner and specify any parameters */ #ifdef HYPRE_FORTRAN HYPRE_BoomerAMGCreate(&precond); HYPRE_BoomerAMGSetPrintLevel(&precond, &one); /* print amg solution info*/ HYPRE_BoomerAMGSetCoarsenType(&precond, &six); HYPRE_BoomerAMGSetRelaxType(&precond, &three); HYPRE_BoomerAMGSetNumSweeps(&precond, &one); HYPRE_BoomerAMGSetTol(&precond, &oo1); #else HYPRE_BoomerAMGCreate(&precond); HYPRE_BoomerAMGSetPrintLevel(precond, 1); /* print amg solution info*/ HYPRE_BoomerAMGSetCoarsenType(precond, 6); HYPRE_BoomerAMGSetRelaxType(precond, 3); HYPRE_BoomerAMGSetNumSweeps(precond, 1); HYPRE_BoomerAMGSetTol(precond, 1e-3); #endif /* Set the PCG preconditioner */ #ifdef HYPRE_FORTRAN precond_id = 2; HYPRE_ParCSRPCGSetPrecond(&solver, &precond_id, &precond); #else HYPRE_PCGSetPrecond(solver, (HYPRE_PtrToSolverFcn) HYPRE_BoomerAMGSolve, (HYPRE_PtrToSolverFcn) HYPRE_BoomerAMGSetup, precond); #endif /* Now setup and solve! */ #ifdef HYPRE_FORTRAN HYPRE_ParCSRPCGSetup(&solver, &parcsr_A, &par_b, &par_x); HYPRE_ParCSRPCGSolve(&solver, &parcsr_A, &par_b, &par_x); #else HYPRE_ParCSRPCGSetup(solver, parcsr_A, par_b, par_x); HYPRE_ParCSRPCGSolve(solver, parcsr_A, par_b, par_x); #endif /* Run info - needed logging turned on */ #ifdef HYPRE_FORTRAN HYPRE_ParCSRPCGGetNumIterations(&solver, &num_iterations); HYPRE_ParCSRPCGGetFinalRelativeResidualNorm(&solver, &final_res_norm); #else HYPRE_PCGGetNumIterations(solver, &num_iterations); HYPRE_PCGGetFinalRelativeResidualNorm(solver, &final_res_norm); #endif if (myid == 0) { hypre_printf("\n"); hypre_printf("Iterations = %d\n", num_iterations); hypre_printf("Final Relative Residual Norm = %e\n", final_res_norm); hypre_printf("\n"); } /* Destroy solver and preconditioner */ #ifdef HYPRE_FORTRAN HYPRE_ParCSRPCGDestroy(&solver); HYPRE_BoomerAMGDestroy(&precond); #else HYPRE_ParCSRPCGDestroy(solver); HYPRE_BoomerAMGDestroy(precond); #endif } /* PCG with Parasails Preconditioner */ else if (solver_id == 8) { HYPRE_Int num_iterations; double final_res_norm; HYPRE_Int sai_max_levels = 1; double sai_threshold = 0.1; double sai_filter = 0.05; HYPRE_Int sai_sym = 1; /* Create solver */ #ifdef HYPRE_FORTRAN HYPRE_ParCSRPCGCreate(&temp_COMM, &solver); #else HYPRE_ParCSRPCGCreate(hypre_MPI_COMM_WORLD, &solver); #endif /* Set some parameters (See Reference Manual for more parameters) */ #ifdef HYPRE_FORTRAN HYPRE_ParCSRPCGSetMaxIter(&solver, &thousand); /* max iterations */ HYPRE_ParCSRPCGSetTol(&solver, &tol); /* conv. tolerance */ HYPRE_ParCSRPCGSetTwoNorm(&solver, &one); /* use the two norm as the stopping criteria */ HYPRE_ParCSRPCGSetPrintLevel(&solver, &two); /* print solve info */ #else HYPRE_PCGSetMaxIter(solver, 1000); /* max iterations */ HYPRE_PCGSetTol(solver, 1e-7); /* conv. tolerance */ HYPRE_PCGSetTwoNorm(solver, 1); /* use the two norm as the stopping criteria */ HYPRE_PCGSetPrintLevel(solver, 2); /* print solve info */ HYPRE_PCGSetLogging(solver, 1); /* needed to get run info later */ #endif /* Now set up the ParaSails preconditioner and specify any parameters */ #ifdef HYPRE_FORTRAN HYPRE_ParaSailsCreate(&temp_COMM, &precond); #else HYPRE_ParaSailsCreate(hypre_MPI_COMM_WORLD, &precond); #endif /* Set some parameters (See Reference Manual for more parameters) */ #ifdef HYPRE_FORTRAN HYPRE_ParaSailsSetParams(&precond, &sai_threshold, &sai_max_levels); HYPRE_ParaSailsSetFilter(&precond, &sai_filter); HYPRE_ParaSailsSetSym(&precond, &sai_sym); HYPRE_ParaSailsSetLogging(&precond, &three); #else HYPRE_ParaSailsSetParams(precond, sai_threshold, sai_max_levels); HYPRE_ParaSailsSetFilter(precond, sai_filter); HYPRE_ParaSailsSetSym(precond, sai_sym); HYPRE_ParaSailsSetLogging(precond, 3); #endif /* Set the PCG preconditioner */ #ifdef HYPRE_FORTRAN precond_id = 4; HYPRE_ParCSRPCGSetPrecond(&solver, &precond_id, &precond); #else HYPRE_PCGSetPrecond(solver, (HYPRE_PtrToSolverFcn) HYPRE_ParaSailsSolve, (HYPRE_PtrToSolverFcn) HYPRE_ParaSailsSetup, precond); #endif /* Now setup and solve! */ #ifdef HYPRE_FORTRAN HYPRE_ParCSRPCGSetup(&solver, &parcsr_A, &par_b, &par_x); HYPRE_ParCSRPCGSolve(&solver, &parcsr_A, &par_b, &par_x); #else HYPRE_ParCSRPCGSetup(solver, parcsr_A, par_b, par_x); HYPRE_ParCSRPCGSolve(solver, parcsr_A, par_b, par_x); #endif /* Run info - needed logging turned on */ #ifdef HYPRE_FORTRAN HYPRE_ParCSRPCGGetNumIterations(&solver, &num_iterations); HYPRE_ParCSRPCGGetFinalRelativeResidualNorm(&solver, &final_res_norm); #else HYPRE_PCGGetNumIterations(solver, &num_iterations); HYPRE_PCGGetFinalRelativeResidualNorm(solver, &final_res_norm); #endif if (myid == 0) { hypre_printf("\n"); hypre_printf("Iterations = %d\n", num_iterations); hypre_printf("Final Relative Residual Norm = %e\n", final_res_norm); hypre_printf("\n"); } /* Destory solver and preconditioner */ #ifdef HYPRE_FORTRAN HYPRE_ParCSRPCGDestroy(&solver); HYPRE_ParaSailsDestroy(&precond); #else HYPRE_ParCSRPCGDestroy(solver); HYPRE_ParaSailsDestroy(precond); #endif } else { if (myid ==0) hypre_printf("Invalid solver id specified.\n"); } /* Print the solution */ #ifdef HYPRE_FORTRAN if (print_solution) HYPRE_IJVectorPrint(&x, "ij.out.x"); #else if (print_solution) HYPRE_IJVectorPrint(x, "ij.out.x"); #endif /* Clean up */ #ifdef HYPRE_FORTRAN HYPRE_IJMatrixDestroy(&A); HYPRE_IJVectorDestroy(&b); HYPRE_IJVectorDestroy(&x); #else HYPRE_IJMatrixDestroy(A); HYPRE_IJVectorDestroy(b); HYPRE_IJVectorDestroy(x); #endif /* Finalize MPI*/ hypre_MPI_Finalize(); return(0); }
int main(int argc, char *argv[]) { GRID *g; DOF *u_h; MAT *A, *A0, *B; MAP *map; INT i; size_t nnz, mem, mem_peak; VEC *x, *y0, *y1, *y2; double t0, t1, dnz, dnz1, mflops, mop; char *fn = "../test/cube.dat"; FLOAT mem_max = 300; INT refine = 0; phgOptionsRegisterFilename("-mesh_file", "Mesh file", (char **)&fn); phgOptionsRegisterInt("-loop_count", "Loop count", &loop_count); phgOptionsRegisterInt("-refine", "Refinement level", &refine); phgOptionsRegisterFloat("-mem_max", "Maximum memory", &mem_max); phgInit(&argc, &argv); g = phgNewGrid(-1); if (!phgImport(g, fn, FALSE)) phgError(1, "can't read file \"%s\".\n", fn); phgRefineAllElements(g, refine); u_h = phgDofNew(g, DOF_DEFAULT, 1, "u_h", DofNoAction); while (TRUE) { phgPrintf("\n"); if (phgBalanceGrid(g, 1.2, 1, NULL, 0.)) phgPrintf("Repartition mesh, %d submeshes, load imbalance: %lg\n", g->nprocs, (double)g->lif); map = phgMapCreate(u_h, NULL); A = phgMapCreateMat(map, map); A->handle_bdry_eqns = TRUE; build_matrix(A, u_h); phgMatAssemble(A); /* Note: A is unsymmetric (A' != A) if boundary entries not removed */ phgMatRemoveBoundaryEntries(A); #if 0 /* test block matrix operation */ A0 = phgMatCreateBlockMatrix(g->comm, 1, 1, &A, NULL); #else A0 = A; #endif phgPrintf("%d DOF, %d elems, %d submeshes, matrix size: %d, LIF: %lg\n", DofGetDataCountGlobal(u_h), g->nleaf_global, g->nprocs, A->rmap->nglobal, (double)g->lif); /* test PHG mat-vec multiply */ x = phgMapCreateVec(A->cmap, 1); y1 = phgMapCreateVec(A->rmap, 1); phgVecRandomize(x, 123); phgMatVec(MAT_OP_N, 1.0, A0, x, 0.0, &y1); phgPerfGetMflops(g, NULL, NULL); /* reset flops counter */ t0 = phgGetTime(NULL); for (i = 0; i < loop_count; i++) { phgMatVec(MAT_OP_N, 1.0, A0, x, 0.0, &y1); } t1 = phgGetTime(NULL); mflops = phgPerfGetMflops(g, NULL, NULL); y0 = phgVecCopy(y1, NULL); nnz = A->nnz_d + A->nnz_o; #if USE_MPI dnz1 = nnz; MPI_Reduce(&dnz1, &dnz, 1, MPI_DOUBLE, MPI_SUM, 0, g->comm); #else dnz = nnz; #endif mop = loop_count * (dnz + dnz - A->rmap->nlocal) * 1e-6; phgPrintf("\n"); t1 -= t0; phgPrintf(" PHG: time %0.4lf, nnz %0.16lg, %0.2lfMF (%0.2lfMF)\n", t1, dnz, mop / (t1 == 0 ? 1. : t1), mflops); /* test trans(A)*x */ phgPerfGetMflops(g, NULL, NULL); /* reset flops counter */ t0 = phgGetTime(NULL); for (i = 0; i < loop_count; i++) { phgMatVec(MAT_OP_T, 1.0, A0, x, 0.0, &y1); } t1 = phgGetTime(NULL); mflops = phgPerfGetMflops(g, NULL, NULL); t1 -= t0; phgPrintf(" A'*x: time %0.4lf, nnz %0.16lg, %0.2lfMF (%0.2lfMF), " "err: %le\n", t1, dnz, mop / (t1 == 0 ? 1. : t1), mflops, (double)phgVecNorm2(phgVecAXPBY(-1.0, y0, 1.0, &y1), 0, NULL)); /* time A * trans(A) */ phgPerfGetMflops(g, NULL, NULL); /* reset flops counter */ t0 = phgGetTime(NULL); B = phgMatMat(MAT_OP_N, MAT_OP_N, 1.0, A, A, 0.0, NULL); t1 = phgGetTime(NULL); mflops = phgPerfGetMflops(g, NULL, NULL); nnz = B->nnz_d + B->nnz_o; #if USE_MPI dnz1 = nnz; MPI_Reduce(&dnz1, &dnz, 1, MPI_DOUBLE, MPI_SUM, 0, g->comm); #else dnz = nnz; #endif /* compare B*x <--> A*A*x */ y2 = phgMatVec(MAT_OP_N, 1.0, B, x, 0.0, NULL); phgMatVec(MAT_OP_N, 1.0, A0, y0, 0.0, &y1); phgMatDestroy(&B); t1 -= t0; phgPrintf(" A*A: time %0.4lf, nnz %0.16lg, %0.2lfMF, err: %le\n", t1, dnz, mflops, (double)phgVecNorm2(phgVecAXPBY(-1.0, y1, 1.0, &y2), 0, NULL)); #if USE_PETSC { Mat ma, mb; MatInfo info; Vec va, vb, vc; PetscScalar *vec; ma = phgPetscCreateMatAIJ(A); MatGetVecs(ma, PETSC_NULL, &va); VecDuplicate(va, &vb); VecGetArray(va, &vec); memcpy(vec, x->data, x->map->nlocal * sizeof(*vec)); VecRestoreArray(va, &vec); MatMult(ma, va, vb); phgPerfGetMflops(g, NULL, NULL); /* reset flops counter */ t0 = phgGetTime(NULL); for (i = 0; i < loop_count; i++) { MatMult(ma, va, vb); } t1 = phgGetTime(NULL); mflops = phgPerfGetMflops(g, NULL, NULL); VecGetArray(vb, &vec); memcpy(y1->data, vec, x->map->nlocal * sizeof(*vec)); VecRestoreArray(vb, &vec); MatGetInfo(ma, MAT_GLOBAL_SUM, &info); /*phgPrintf(" --------------------------------------------" "-------------------------\n");*/ phgPrintf("\n"); t1 -= t0; dnz = info.nz_used; phgPrintf(" PETSc: time %0.4lf, nnz %0.16lg, %0.2lfMF (%0.2lfMF), " "err: %le\n", t1, dnz, mop / (t1==0 ? 1.:t1), mflops, (double)phgVecNorm2(phgVecAXPBY(-1.0, y0, 1.0, &y1), 0, NULL)); phgPerfGetMflops(g, NULL, NULL); /* reset flops counter */ t0 = phgGetTime(NULL); for (i = 0; i < loop_count; i++) { MatMultTranspose(ma, va, vb); } t1 = phgGetTime(NULL); mflops = phgPerfGetMflops(g, NULL, NULL); VecGetArray(vb, &vec); memcpy(y1->data, vec, x->map->nlocal * sizeof(*vec)); VecRestoreArray(vb, &vec); t1 -= t0; phgPrintf(" A'*x: time %0.4lf, nnz %0.16lg, %0.2lfMF (%0.2lfMF), " "err: %le\n", t1, dnz, mop / (t1==0 ? 1.:t1), mflops, (double)phgVecNorm2(phgVecAXPBY(-1.0, y0, 1.0, &y1), 0, NULL)); phgPerfGetMflops(g, NULL, NULL); /* reset flops counter */ t0 = phgGetTime(NULL); MatMatMult(ma, ma, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &mb); t1 = phgGetTime(NULL); mflops = phgPerfGetMflops(g, NULL, NULL); t1 -= t0; MatGetInfo(mb, MAT_GLOBAL_SUM, &info); dnz = info.nz_used; VecDuplicate(va, &vc); /* compare B*x <--> A*A*x */ MatMult(ma, vb, vc); MatMult(mb, va, vb); VecGetArray(vb, &vec); memcpy(y1->data, vec, x->map->nlocal * sizeof(*vec)); VecRestoreArray(vb, &vec); VecGetArray(vc, &vec); memcpy(y2->data, vec, x->map->nlocal * sizeof(*vec)); VecRestoreArray(vc, &vec); phgPrintf(" A*A: time %0.4lf, nnz %0.16lg, %0.2lfMF, err: %le\n", t1, dnz, mflops, (double)phgVecNorm2(phgVecAXPBY(-1.0, y1, 1.0, &y2), 0, NULL)); phgPetscMatDestroy(&mb); phgPetscMatDestroy(&ma); phgPetscVecDestroy(&va); phgPetscVecDestroy(&vb); phgPetscVecDestroy(&vc); } #endif /* USE_PETSC */ #if USE_HYPRE { HYPRE_IJMatrix ma; HYPRE_IJVector va, vb, vc; HYPRE_ParCSRMatrix par_ma; hypre_ParCSRMatrix *par_mb; HYPRE_ParVector par_va, par_vb, par_vc; HYPRE_Int offset, *ni, start, end; assert(sizeof(INT)==sizeof(int) && sizeof(FLOAT)==sizeof(double)); setup_hypre_mat(A, &ma); ni = phgAlloc(2 * A->rmap->nlocal * sizeof(*ni)); offset = A->cmap->partition[A->cmap->rank]; for (i = 0; i < A->rmap->nlocal; i++) ni[i] = i + offset; HYPRE_IJVectorCreate(g->comm, offset, offset + A->rmap->nlocal - 1, &va); HYPRE_IJVectorCreate(g->comm, offset, offset + A->rmap->nlocal - 1, &vb); HYPRE_IJVectorCreate(g->comm, offset, offset + A->rmap->nlocal - 1, &vc); HYPRE_IJVectorSetObjectType(va, HYPRE_PARCSR); HYPRE_IJVectorSetObjectType(vb, HYPRE_PARCSR); HYPRE_IJVectorSetObjectType(vc, HYPRE_PARCSR); HYPRE_IJVectorSetMaxOffProcElmts(va, 0); HYPRE_IJVectorSetMaxOffProcElmts(vb, 0); HYPRE_IJVectorSetMaxOffProcElmts(vc, 0); HYPRE_IJVectorInitialize(va); HYPRE_IJVectorInitialize(vb); HYPRE_IJVectorInitialize(vc); HYPRE_IJMatrixGetObject(ma, (void **)(void *)&par_ma); HYPRE_IJVectorGetObject(va, (void **)(void *)&par_va); HYPRE_IJVectorGetObject(vb, (void **)(void *)&par_vb); HYPRE_IJVectorGetObject(vc, (void **)(void *)&par_vc); HYPRE_IJVectorSetValues(va, A->cmap->nlocal, ni, (double *)x->data); HYPRE_IJVectorAssemble(va); HYPRE_IJVectorAssemble(vb); HYPRE_IJVectorAssemble(vc); HYPRE_IJMatrixGetRowCounts(ma, A->cmap->nlocal, ni, ni + A->rmap->nlocal); for (i = 0, nnz = 0; i < A->rmap->nlocal; i++) nnz += ni[A->rmap->nlocal + i]; #if USE_MPI dnz1 = nnz; MPI_Reduce(&dnz1, &dnz, 1, MPI_DOUBLE, MPI_SUM, 0, g->comm); #else dnz = nnz; #endif HYPRE_ParCSRMatrixMatvec(1.0, par_ma, par_va, 0.0, par_vb); phgPerfGetMflops(g, NULL, NULL); /* reset flops counter */ t0 = phgGetTime(NULL); for (i = 0; i < loop_count; i++) { HYPRE_ParCSRMatrixMatvec(1.0, par_ma, par_va, 0.0, par_vb); } t1 = phgGetTime(NULL); mflops = phgPerfGetMflops(g, NULL, NULL); HYPRE_IJVectorGetValues(vb, A->rmap->nlocal, ni, (double*)y1->data); /*phgPrintf(" --------------------------------------------" "-------------------------\n");*/ phgPrintf("\n"); t1 -= t0; phgPrintf(" HYPRE: time %0.4lf, nnz %0.16lg, %0.2lfMF (%0.2lfMF), " "err: %le\n", t1, dnz, mop / (t1==0 ? 1.:t1), mflops, (double)phgVecNorm2(phgVecAXPBY(-1.0, y0, 1.0, &y1), 0, NULL)); phgPerfGetMflops(g, NULL, NULL); /* reset flops counter */ t0 = phgGetTime(NULL); for (i = 0; i < loop_count; i++) { HYPRE_ParCSRMatrixMatvecT(1.0, par_ma, par_va, 0.0, par_vb); } t1 = phgGetTime(NULL); mflops = phgPerfGetMflops(g, NULL, NULL); HYPRE_IJVectorGetValues(vb, A->rmap->nlocal, ni, (double*)y1->data); t1 -= t0; phgPrintf(" A'*x: time %0.4lf, nnz %0.16lg, %0.2lfMF (%0.2lfMF), " "err: %le\n", t1, dnz, mop / (t1==0 ? 1.:t1), mflops, (double)phgVecNorm2(phgVecAXPBY(-1.0, y0, 1.0, &y1), 0, NULL)); phgPerfGetMflops(g, NULL, NULL); /* reset flops counter */ t0 = phgGetTime(NULL); /* Note: 'HYPRE_ParCSRMatrix' is currently typedef'ed to * 'hypre_ParCSRMatrix *' */ par_mb = hypre_ParMatmul((hypre_ParCSRMatrix *)par_ma, (hypre_ParCSRMatrix *)par_ma); t1 = phgGetTime(NULL); mflops = phgPerfGetMflops(g, NULL, NULL); start = hypre_ParCSRMatrixFirstRowIndex(par_mb); end = hypre_ParCSRMatrixLastRowIndex(par_mb) + 1; for (i = start, nnz = 0; i < end; i++) { HYPRE_Int ncols; hypre_ParCSRMatrixGetRow(par_mb, i, &ncols, NULL, NULL); hypre_ParCSRMatrixRestoreRow(par_mb, i, &ncols, NULL, NULL); nnz += ncols; } #if USE_MPI dnz1 = nnz; MPI_Reduce(&dnz1, &dnz, 1, MPI_DOUBLE, MPI_SUM, 0, g->comm); #else dnz = nnz; #endif /* compare B*x <--> A*A*x */ HYPRE_ParCSRMatrixMatvec(1.0, par_ma, par_vb, 0.0, par_vc); HYPRE_ParCSRMatrixMatvec(1.0, (void *)par_mb, par_va, 0.0, par_vb); HYPRE_IJVectorGetValues(vb, A->rmap->nlocal, ni, (double*)y1->data); HYPRE_IJVectorGetValues(vc, A->rmap->nlocal, ni, (double*)y2->data); hypre_ParCSRMatrixDestroy((par_mb)); t1 -= t0; phgPrintf(" A*A: time %0.4lf, nnz %0.16lg, %0.2lfMF, err: %le\n", t1, dnz, mflops, (double)phgVecNorm2(phgVecAXPBY(-1.0, y1, 1.0, &y2), 0, NULL)); phgFree(ni); HYPRE_IJMatrixDestroy(ma); HYPRE_IJVectorDestroy(va); HYPRE_IJVectorDestroy(vb); HYPRE_IJVectorDestroy(vc); } #endif /* USE_HYPRE */ if (A0 != A) phgMatDestroy(&A0); #if 0 if (A->rmap->nglobal > 1000) { VEC *v = phgMapCreateVec(A->rmap, 3); for (i = 0; i < v->map->nlocal; i++) { v->data[i + 0 * v->map->nlocal] = 1 * (i + v->map->partition[g->rank]); v->data[i + 1 * v->map->nlocal] = 2 * (i + v->map->partition[g->rank]); v->data[i + 2 * v->map->nlocal] = 3 * (i + v->map->partition[g->rank]); } phgMatDumpMATLAB(A, "A", "A.m"); phgVecDumpMATLAB(v, "v", "v.m"); phgFinalize(); exit(0); } #endif phgMatDestroy(&A); phgVecDestroy(&x); phgVecDestroy(&y0); phgVecDestroy(&y1); phgVecDestroy(&y2); phgMapDestroy(&map); mem = phgMemoryUsage(g, &mem_peak); dnz = mem / (1024.0 * 1024.0); dnz1 = mem_peak / (1024.0 * 1024.0); /*phgPrintf(" --------------------------------------------" "-------------------------\n");*/ phgPrintf("\n"); phgPrintf(" Memory: current %0.4lgMB, peak %0.4lgMB\n", dnz, dnz1); #if 0 { static int loop_count = 0; if (++loop_count == 4) break; } #endif if (mem_peak > 1024 * (size_t)1024 * mem_max) break; phgRefineAllElements(g, 1); } phgDofFree(&u_h); phgFreeGrid(&g); phgFinalize(); return 0; }
int32_t impl_bHYPRE_IJParCSRMatrix_ApplyAdjoint( /* in */ bHYPRE_IJParCSRMatrix self, /* in */ bHYPRE_Vector b, /* inout */ bHYPRE_Vector* x, /* out */ sidl_BaseInterface *_ex) { *_ex = 0; { /* DO-NOT-DELETE splicer.begin(bHYPRE.IJParCSRMatrix.ApplyAdjoint) */ /* Insert-Code-Here {bHYPRE.IJParCSRMatrix.ApplyAdjoint} (ApplyAdjoint method) */ /* ApplyAdjoing means to multiply by a vector, y = A'*x , where A' is the * adjoint of A (=transpose, this is a real matrix). Here, we call * the HYPRE Matvec function which performs y = a*A*x + b*y (we set * a=1 and b=0). */ int ierr=0; void * object; struct bHYPRE_IJParCSRMatrix__data * data; struct bHYPRE_IJParCSRVector__data * data_x, * data_b; bHYPRE_IJParCSRVector bHYPREP_b, bHYPREP_x; HYPRE_IJMatrix ij_A; HYPRE_IJVector ij_x, ij_b; HYPRE_ParVector xx, bb; HYPRE_ParCSRMatrix A; data = bHYPRE_IJParCSRMatrix__get_data( self ); ij_A = data -> ij_A; ierr += HYPRE_IJMatrixGetObject( ij_A, &object ); A = (HYPRE_ParCSRMatrix) object; /* A bHYPRE_Vector is just an interface, we have no knowledge of its * contents. Check whether it's something we know how to handle. * If not, die. */ bHYPREP_b = (bHYPRE_IJParCSRVector) bHYPRE_Vector__cast2(b, "bHYPRE.IJParCSRVector", _ex ); SIDL_CHECK(*_ex); hypre_assert( bHYPREP_b!=NULL ); bHYPREP_x = (bHYPRE_IJParCSRVector) bHYPRE_Vector__cast2( *x, "bHYPRE.IJParCSRVector", _ex ); SIDL_CHECK(*_ex); hypre_assert( bHYPREP_x!=NULL ); data_x = bHYPRE_IJParCSRVector__get_data( bHYPREP_x ); ij_x = data_x -> ij_b; ierr += HYPRE_IJVectorGetObject( ij_x, &object ); xx = (HYPRE_ParVector) object; data_b = bHYPRE_IJParCSRVector__get_data( bHYPREP_b ); ij_b = data_b -> ij_b; ierr += HYPRE_IJVectorGetObject( ij_b, &object ); bb = (HYPRE_ParVector) object; ierr += HYPRE_ParCSRMatrixMatvecT( 1.0, A, bb, 0.0, xx ); bHYPRE_IJParCSRVector_deleteRef( bHYPREP_b, _ex ); SIDL_CHECK(*_ex); bHYPRE_IJParCSRVector_deleteRef( bHYPREP_x, _ex ); SIDL_CHECK(*_ex); return( ierr ); hypre_babel_exception_return_error(_ex); /* DO-NOT-DELETE splicer.end(bHYPRE.IJParCSRMatrix.ApplyAdjoint) */ } }