clsparseStatus cg(cldenseVectorPrivate *pX, const clsparseCsrMatrixPrivate* pA, const cldenseVectorPrivate *pB, PTYPE& M, clSParseSolverControl solverControl, clsparseControl control) { assert( pA->num_cols == pB->num_values ); assert( pA->num_rows == pX->num_values ); if( ( pA->num_cols != pB->num_values ) || ( pA->num_rows != pX->num_values ) ) { return clsparseInvalidSystemSize; } //opaque input parameters with clsparse::array type; clsparse::vector<T> x(control, pX->values, pX->num_values); clsparse::vector<T> b(control, pB->values, pB->num_values); cl_int status; T scalarOne = 1; T scalarZero = 0; //clsparse::vector<T> norm_b(control, 1, 0, CL_MEM_WRITE_ONLY, true); clsparse::scalar<T> norm_b(control, 0, CL_MEM_WRITE_ONLY, false); //norm of rhs of equation status = Norm1<T>(norm_b, b, control); CLSPARSE_V(status, "Norm B Failed"); //norm_b is calculated once T h_norm_b = norm_b[0]; #ifndef NDEBUG std::cout << "norm_b " << h_norm_b << std::endl; #endif if (h_norm_b == 0) //special case b is zero so solution is x = 0 { solverControl->nIters = 0; solverControl->absoluteTolerance = 0.0; solverControl->relativeTolerance = 0.0; //we can either fill the x with zeros or cpy b to x; x = b; return clsparseSuccess; } //continuing "normal" execution of cg algorithm const auto N = pA->num_cols; //helper containers, all need to be zeroed clsparse::vector<T> y(control, N, 0, CL_MEM_READ_WRITE, true); clsparse::vector<T> z(control, N, 0, CL_MEM_READ_WRITE, true); clsparse::vector<T> r(control, N, 0, CL_MEM_READ_WRITE, true); clsparse::vector<T> p(control, N, 0, CL_MEM_READ_WRITE, true); clsparse::scalar<T> one(control, 1, CL_MEM_READ_ONLY, true); clsparse::scalar<T> zero(control, 0, CL_MEM_READ_ONLY, true); // y = A*x status = csrmv<T>(one, pA, x, zero, y, control); CLSPARSE_V(status, "csrmv Failed"); //r = b - y status = r.sub(b, y, control); //status = elementwise_transform<T, EW_MINUS>(r, b, y, control); CLSPARSE_V(status, "b - y Failed"); clsparse::scalar<T> norm_r(control, 0, CL_MEM_WRITE_ONLY, false); status = Norm1<T>(norm_r, r, control); CLSPARSE_V(status, "norm r Failed"); //T residuum = 0; clsparse::scalar<T> residuum(control, 0, CL_MEM_WRITE_ONLY, false); //residuum = norm_r[0] / h_norm_b; residuum.div(norm_r, norm_b, control); solverControl->initialResidual = residuum[0]; #ifndef NDEBUG std::cout << "initial residuum = " << solverControl->initialResidual << std::endl; #endif if (solverControl->finished(solverControl->initialResidual)) { solverControl->nIters = 0; return clsparseSuccess; } //apply preconditioner z = M*r M(r, z, control); //copy inital z to p p = z; //rz = <r, z>, here actually should be conjugate(r)) but we do not support complex type. clsparse::scalar<T> rz(control, 0, CL_MEM_WRITE_ONLY, false); status = dot<T>(rz, r, z, control); CLSPARSE_V(status, "<r, z> Failed"); int iteration = 0; bool converged = false; clsparse::scalar<T> alpha (control, 0, CL_MEM_READ_WRITE, false); clsparse::scalar<T> beta (control, 0, CL_MEM_READ_WRITE, false); //yp buffer for inner product of y and p vectors; clsparse::scalar<T> yp(control, 0, CL_MEM_WRITE_ONLY, false); clsparse::scalar<T> rz_old(control, 0, CL_MEM_WRITE_ONLY, false); while(!converged) { solverControl->nIters = iteration; //y = A*p status = csrmv<T>(one, pA, p, zero, y, control); CLSPARSE_V(status, "csrmv Failed"); status = dot<T>(yp, y, p, control); CLSPARSE_V(status, "<y,p> Failed"); // alpha = <r,z> / <y,p> //alpha[0] = rz[0] / yp[0]; alpha.div(rz, yp, control); #ifndef NDEBUG std::cout << "alpha = " << alpha[0] << std::endl; #endif //x = x + alpha*p status = axpy<T>(x, alpha, p, x, control); CLSPARSE_V(status, "x = x + alpha * p Failed"); //r = r - alpha * y; status = axpy<T, EW_MINUS>(r, alpha, y, r, control); CLSPARSE_V(status, "r = r - alpha * y Failed"); //apply preconditioner z = M*r M(r, z, control); //store old value of rz //improve that by move or swap rz_old = rz; //rz = <r,z> status = dot<T>(rz, r, z, control); CLSPARSE_V(status, "<r,z> Failed"); // beta = <r^(i), r^(i)>/<r^(i-1),r^(i-1)> // i: iteration index; // beta is ratio of dot product in current iteration compared //beta[0] = rz[0] / rz_old[0]; beta.div(rz, rz_old, control); #ifndef NDEBUG std::cout << "beta = " << beta[0] << std::endl; #endif //p = z + beta*p; status = axpby<T>(p, one, z, beta, p, control ); CLSPARSE_V(status, "p = z + beta*p Failed"); //calculate norm of r status = Norm1<T>(norm_r, r, control); CLSPARSE_V(status, "norm r Failed"); //residuum = norm_r[0] / h_norm_b; status = residuum.div(norm_r, norm_b, control); CLSPARSE_V(status, "residuum"); iteration++; converged = solverControl->finished(residuum[0]); solverControl->print(); } return clsparseSuccess; }
int main(int argc, char* argv[]) { // // Get a default output stream from the Teuchos::VerboseObjectBase // Teuchos::RCP<Teuchos::FancyOStream> out = Teuchos::VerboseObjectBase::getDefaultOStream(); // // Set the parameters for the Belos LOWS Factory and create a parameter list. // int blockSize = 2; int maxIterations = 400; int maxRestarts = 25; int gmresKrylovLength = 25; int outputFrequency = 1; bool outputMaxResOnly = true; double maxResid = 1e-6; Teuchos::RCP<Teuchos::ParameterList> belosLOWSFPL = Teuchos::rcp( new Teuchos::ParameterList() ); belosLOWSFPL->set("Solver Type","Block GMRES"); Teuchos::ParameterList& belosLOWSFPL_solver = belosLOWSFPL->sublist("Solver Types"); Teuchos::ParameterList& belosLOWSFPL_gmres = belosLOWSFPL_solver.sublist("Block GMRES"); belosLOWSFPL_gmres.set("Maximum Iterations",int(maxIterations)); belosLOWSFPL_gmres.set("Convergence Tolerance",double(maxResid)); belosLOWSFPL_gmres.set("Maximum Restarts",int(maxRestarts)); belosLOWSFPL_gmres.set("Block Size",int(blockSize)); belosLOWSFPL_gmres.set("Num Blocks",int(gmresKrylovLength)); belosLOWSFPL_gmres.set("Output Frequency",int(outputFrequency)); belosLOWSFPL_gmres.set("Show Maximum Residual Norm Only",bool(outputMaxResOnly)); #ifdef HAVE_BELOS_IFPACK // // Set the parameters for the Ifpack Preconditioner Factory and create parameter list // Teuchos::ParameterList &ifpackPFSL = belosLOWSFPL->sublist("IfpackPreconditionerFactory"); ifpackPFSL.set("Overlap",int(2)); ifpackPFSL.set("Prec Type","ILUT"); #endif // Whether the linear solver succeeded. // (this will be set during the residual check at the end) bool success = true; // Number of random right-hand sides we will be solving for. int numRhs = 5; // Name of input matrix file std::string matrixFile = "orsirr1.hb"; // Read in the matrix file (can be *.mtx, *.hb, etc.) Epetra_SerialComm comm; Teuchos::RCP<Epetra_CrsMatrix> epetra_A; EpetraExt::readEpetraLinearSystem( matrixFile, comm, &epetra_A ); // Create a Thyra linear operator (A) using the Epetra_CrsMatrix (epetra_A). Teuchos::RCP<const Thyra::LinearOpBase<double> > A = Thyra::epetraLinearOp(epetra_A); // Get the domain space for the Thyra linear operator Teuchos::RCP<const Thyra::VectorSpaceBase<double> > domain = A->domain(); // Create the Belos LOWS factory. Teuchos::RCP<Thyra::LinearOpWithSolveFactoryBase<double> > belosLOWSFactory = Teuchos::rcp(new Thyra::BelosLinearOpWithSolveFactory<double>()); #ifdef HAVE_BELOS_IFPACK // Set the preconditioner factory for the LOWS factory. belosLOWSFactory->setPreconditionerFactory( Teuchos::rcp(new Thyra::IfpackPreconditionerFactory()) ,"IfpackPreconditionerFactory" ); #endif // Set the parameter list to specify the behavior of the factory. belosLOWSFactory->setParameterList( belosLOWSFPL ); // Set the output stream and the verbosity level (prints to std::cout by defualt) belosLOWSFactory->setVerbLevel(Teuchos::VERB_LOW); // Create a BelosLinearOpWithSolve object from the Belos LOWS factory. Teuchos::RCP<Thyra::LinearOpWithSolveBase<double> > nsA = belosLOWSFactory->createOp(); // Initialize the BelosLinearOpWithSolve object with the Thyra linear operator. Thyra::initializeOp<double>( *belosLOWSFactory, A, &*nsA ); // Create a right-hand side with numRhs vectors in it and randomize it. Teuchos::RCP< Thyra::MultiVectorBase<double> > b = Thyra::createMembers(domain, numRhs); Thyra::seed_randomize<double>(0); Thyra::randomize(-1.0, 1.0, &*b); // Create an initial std::vector with numRhs vectors in it and initialize it to zero. Teuchos::RCP< Thyra::MultiVectorBase<double> > x = Thyra::createMembers(domain, numRhs); Thyra::assign(&*x, 0.0); // Perform solve using the linear operator to get the approximate solution of Ax=b, // where b is the right-hand side and x is the left-hand side. Thyra::SolveStatus<double> solveStatus; solveStatus = Thyra::solve( *nsA, Thyra::NONCONJ_ELE, *b, &*x ); // Print out status of solve. *out << "\nBelos LOWS Status: "<< solveStatus << std::endl; // // Compute residual and double check convergence. // std::vector<double> norm_b(numRhs), norm_res(numRhs); Teuchos::RCP< Thyra::MultiVectorBase<double> > y = Thyra::createMembers(domain, numRhs); // Compute the column norms of the right-hand side b. Thyra::norms_2( *b, &norm_b[0] ); // Compute y=A*x, where x is the solution from the linear solver. A->apply( Thyra::NONCONJ_ELE, *x, &*y ); // Compute A*x-b = y-b Thyra::update( -1.0, *b, &*y ); // Compute the column norms of A*x-b. Thyra::norms_2( *y, &norm_res[0] ); // Print out the final relative residual norms. double rel_res = 0.0; *out << "Final relative residual norms" << std::endl; for (int i=0; i<numRhs; ++i) { rel_res = norm_res[i]/norm_b[i]; if (rel_res > maxResid) success = false; *out << "RHS " << i+1 << " : " << std::setw(16) << std::right << rel_res << std::endl; } return ( success ? 0 : 1 ); }
int main(int argc, char* argv[]) { // // Get a default output stream from the Teuchos::VerboseObjectBase // Teuchos::RCP<Teuchos::FancyOStream> out = Teuchos::VerboseObjectBase::getDefaultOStream(); Teuchos::GlobalMPISession mpiSession(&argc,&argv); #ifdef HAVE_COMPLEX typedef std::complex<double> ST; // Scalar-type typedef #elif HAVE_COMPLEX_H typedef std::complex<double> ST; // Scalar-type typedef #else typedef double ST; // Scalar-type typedef #endif typedef Teuchos::ScalarTraits<ST>::magnitudeType MT; // Magnitude-type typedef typedef int OT; // Ordinal-type typedef ST one = Teuchos::ScalarTraits<ST>::one(); ST zero = Teuchos::ScalarTraits<ST>::zero(); #ifdef HAVE_MPI MPI_Comm mpiComm = MPI_COMM_WORLD; const Tpetra::MpiPlatform<OT,OT> ordinalPlatform(mpiComm); const Tpetra::MpiPlatform<OT,ST> scalarPlatform(mpiComm); #else const Tpetra::SerialPlatform<OT,OT> ordinalPlatform; const Tpetra::SerialPlatform<OT,ST> scalarPlatform; #endif // // Get the data from the HB file // // Name of input matrix file std::string matrixFile = "mhd1280b.cua"; int info=0; int dim,dim2,nnz; MT *dvals; int *colptr,*rowind; ST *cvals; nnz = -1; info = readHB_newmat_double(matrixFile.c_str(),&dim,&dim2,&nnz, &colptr,&rowind,&dvals); if (info == 0 || nnz < 0) { *out << "Error reading '" << matrixFile << "'" << std::endl; } #ifdef HAVE_MPI MPI_Finalize(); #endif // Convert interleaved doubles to std::complex values cvals = new ST[nnz]; for (int ii=0; ii<nnz; ii++) { cvals[ii] = ST(dvals[ii*2],dvals[ii*2+1]); } // Declare global dimension of the linear operator OT globalDim = dim; // Create the element space and std::vector space const Tpetra::ElementSpace<OT> elementSpace(globalDim,0,ordinalPlatform); const Tpetra::VectorSpace<OT,ST> vectorSpace(elementSpace,scalarPlatform); // Create my implementation of a Tpetra::Operator RCP<Tpetra::Operator<OT,ST> > tpetra_A = rcp( new MyOperator<OT,ST>(vectorSpace,dim,colptr,nnz,rowind,cvals) ); // Create a Thyra linear operator (A) using the Tpetra::CisMatrix (tpetra_A). RCP<Thyra::LinearOpBase<ST> > A = Teuchos::rcp( new Thyra::TpetraLinearOp<OT,ST>(tpetra_A) ); // // Set the parameters for the Belos LOWS Factory and create a parameter list. // int blockSize = 1; int maxIterations = globalDim; int maxRestarts = 15; int gmresKrylovLength = 50; int outputFrequency = 100; bool outputMaxResOnly = true; MT maxResid = 1e-5; Teuchos::RCP<Teuchos::ParameterList> belosLOWSFPL = Teuchos::rcp( new Teuchos::ParameterList() ); belosLOWSFPL->set("Solver Type","Block GMRES"); Teuchos::ParameterList& belosLOWSFPL_solver = belosLOWSFPL->sublist("Solver Types"); Teuchos::ParameterList& belosLOWSFPL_gmres = belosLOWSFPL_solver.sublist("Block GMRES"); belosLOWSFPL_gmres.set("Maximum Iterations",int(maxIterations)); belosLOWSFPL_gmres.set("Convergence Tolerance",MT(maxResid)); belosLOWSFPL_gmres.set("Maximum Restarts",int(maxRestarts)); belosLOWSFPL_gmres.set("Block Size",int(blockSize)); belosLOWSFPL_gmres.set("Num Blocks",int(gmresKrylovLength)); belosLOWSFPL_gmres.set("Output Frequency",int(outputFrequency)); belosLOWSFPL_gmres.set("Show Maximum Residual Norm Only",bool(outputMaxResOnly)); // Whether the linear solver succeeded. // (this will be set during the residual check at the end) bool success = true; // Number of random right-hand sides we will be solving for. int numRhs = 1; // Get the domain space for the Thyra linear operator Teuchos::RCP<const Thyra::VectorSpaceBase<ST> > domain = A->domain(); // Create the Belos LOWS factory. Teuchos::RCP<Thyra::LinearOpWithSolveFactoryBase<ST> > belosLOWSFactory = Teuchos::rcp(new Thyra::BelosLinearOpWithSolveFactory<ST>()); // Set the parameter list to specify the behavior of the factory. belosLOWSFactory->setParameterList( belosLOWSFPL ); // Set the output stream and the verbosity level (prints to std::cout by defualt) // NOTE: Set to VERB_NONE for no output from the solver. belosLOWSFactory->setVerbLevel(Teuchos::VERB_LOW); // Create a BelosLinearOpWithSolve object from the Belos LOWS factory. Teuchos::RCP<Thyra::LinearOpWithSolveBase<ST> > nsA = belosLOWSFactory->createOp(); // Initialize the BelosLinearOpWithSolve object with the Thyra linear operator. Thyra::initializeOp<ST>( *belosLOWSFactory, A, &*nsA ); // Create a right-hand side with numRhs vectors in it. Teuchos::RCP< Thyra::MultiVectorBase<ST> > b = Thyra::createMembers(domain, numRhs); // Create an initial std::vector with numRhs vectors in it and initialize it to one. Teuchos::RCP< Thyra::MultiVectorBase<ST> > x = Thyra::createMembers(domain, numRhs); Thyra::assign(&*x, one); // Initialize the right-hand side so that the solution is a std::vector of ones. A->apply( Thyra::NONCONJ_ELE, *x, &*b ); Thyra::assign(&*x, zero); // Perform solve using the linear operator to get the approximate solution of Ax=b, // where b is the right-hand side and x is the left-hand side. Thyra::SolveStatus<ST> solveStatus; solveStatus = Thyra::solve( *nsA, Thyra::NONCONJ_ELE, *b, &*x ); // Print out status of solve. *out << "\nBelos LOWS Status: "<< solveStatus << std::endl; // // Compute residual and ST check convergence. // std::vector<MT> norm_b(numRhs), norm_res(numRhs); Teuchos::RCP< Thyra::MultiVectorBase<ST> > y = Thyra::createMembers(domain, numRhs); // Compute the column norms of the right-hand side b. Thyra::norms_2( *b, &norm_b[0] ); // Compute y=A*x, where x is the solution from the linear solver. A->apply( Thyra::NONCONJ_ELE, *x, &*y ); // Compute A*x-b = y-b Thyra::update( -one, *b, &*y ); // Compute the column norms of A*x-b. Thyra::norms_2( *y, &norm_res[0] ); // Print out the final relative residual norms. MT rel_res = 0.0; *out << "Final relative residual norms" << std::endl; for (int i=0; i<numRhs; ++i) { rel_res = norm_res[i]/norm_b[i]; if (rel_res > maxResid) success = false; *out << "RHS " << i+1 << " : " << std::setw(16) << std::right << rel_res << std::endl; } return ( success ? 0 : 1 ); }