int CGSolver::SolveLinearSystemWithoutPreconditioner(double * x, const double * b, double eps, int maxIterations, int verbose) { int iteration=1; multiplicator(multiplicatorData, x, r); //A->MultiplyVector(x,r); for (int i=0; i<numRows; i++) { r[i] = b[i] - r[i]; d[i] = r[i]; } double residualNorm2 = ComputeDotProduct(r, r); double initialResidualNorm2 = residualNorm2; while ((residualNorm2 > eps * eps * initialResidualNorm2) && (iteration <= maxIterations)) { if (verbose) printf("CG iteration %d: current L2 error vs initial error=%G\n", iteration, sqrt(residualNorm2 / initialResidualNorm2)); multiplicator(multiplicatorData, d, q); //A->MultiplyVector(d,q); // q = A * d double dDotq = ComputeDotProduct(d, q); double alpha = residualNorm2 / dDotq; //printf("residualNorm2=%G dDotq=%G alpha=%G\n", residualNorm2, dDotq, alpha); for(int i=0; i<numRows; i++) x[i] += alpha * d[i]; if (iteration % 30 == 0) { // periodically compute the exact residual (Shewchuk, page 8) multiplicator(multiplicatorData, x, r); //A->MultiplyVector(x,r); for (int i=0; i<numRows; i++) r[i] = b[i] - r[i]; } else { for (int i=0; i<numRows; i++) r[i] = r[i] - alpha * q[i]; } double oldResidualNorm2 = residualNorm2; residualNorm2 = ComputeDotProduct(r, r); double beta = residualNorm2 / oldResidualNorm2; for (int i=0; i<numRows; i++) d[i] = r[i] + beta * d[i]; iteration++; } return (iteration-1) * ((residualNorm2 > eps * eps * initialResidualNorm2) ? -1 : 1); }
/*! Routine to compute an approximate solution to Ax = b @param[in] geom The description of the problem's geometry. @param[inout] A The known system matrix @param[inout] data The data structure with all necessary CG vectors preallocated @param[in] b The known right hand side vector @param[inout] x On entry: the initial guess; on exit: the new approximate solution @param[in] max_iter The maximum number of iterations to perform, even if tolerance is not met. @param[in] tolerance The stopping criterion to assert convergence: if norm of residual is <= to tolerance. @param[out] niters The number of iterations actually performed. @param[out] normr The 2-norm of the residual vector after the last iteration. @param[out] normr0 The 2-norm of the residual vector before the first iteration. @param[out] times The 7-element vector of the timing information accumulated during all of the iterations. @param[in] doPreconditioning The flag to indicate whether the preconditioner should be invoked at each iteration. @return Returns zero on success and a non-zero value otherwise. @see CG_ref() */ int CG(const SparseMatrix & A, CGData & data, const Vector & b, Vector & x, const int max_iter, const double tolerance, int & niters, double & normr, double & normr0, double * times, bool doPreconditioning) { double t_begin = mytimer(); // Start timing right away normr = 0.0; double rtz = 0.0, oldrtz = 0.0, alpha = 0.0, beta = 0.0, pAp = 0.0; double t0 = 0.0, t1 = 0.0, t2 = 0.0, t3 = 0.0, t4 = 0.0, t5 = 0.0; //#ifndef HPCG_NOMPI // double t6 = 0.0; //#endif local_int_t nrow = A.localNumberOfRows; Vector & r = data.r; // Residual vector Vector & z = data.z; // Preconditioned residual vector Vector & p = data.p; // Direction vector (in MPI mode ncol>=nrow) Vector & Ap = data.Ap; if (!doPreconditioning && A.geom->rank==0) HPCG_fout << "WARNING: PERFORMING UNPRECONDITIONED ITERATIONS" << std::endl; #ifdef HPCG_DEBUG int print_freq = 1; if (print_freq>50) print_freq=50; if (print_freq<1) print_freq=1; #endif // p is of length ncols, copy x to p for sparse MV operation CopyVector(x, p); //TODO paralel TICK(); ComputeSPMV(A, p, Ap); TOCK(t3); // Ap = A*p TICK(); ComputeWAXPBY(nrow, 1.0, b, -1.0, Ap, r, A.isWaxpbyOptimized); TOCK(t2); // r = b - Ax (x stored in p) TICK(); ComputeDotProduct(nrow, r, r, normr, t4, A.isDotProductOptimized); TOCK(t1); normr = sqrt(normr); #ifdef HPCG_DEBUG if (A.geom->rank==0) HPCG_fout << "Initial Residual = "<< normr << std::endl; #endif // Record initial residual for convergence testing normr0 = normr; // Start iterations for (int k=1; k<=max_iter && normr/normr0 > tolerance; k++ ) { TICK(); if (doPreconditioning) ComputeMG(A, r, z); // Apply preconditioner else CopyVector (r, z); // copy r to z (no preconditioning) TOCK(t5); // Preconditioner apply time if (k == 1) { TICK(); ComputeWAXPBY(nrow, 1.0, z, 0.0, z, p, A.isWaxpbyOptimized); TOCK(t2); // Copy Mr to p TICK(); ComputeDotProduct (nrow, r, z, rtz, t4, A.isDotProductOptimized); TOCK(t1); // rtz = r'*z } else { oldrtz = rtz; TICK(); ComputeDotProduct (nrow, r, z, rtz, t4, A.isDotProductOptimized); TOCK(t1); // rtz = r'*z beta = rtz/oldrtz; TICK(); ComputeWAXPBY (nrow, 1.0, z, beta, p, p, A.isWaxpbyOptimized); TOCK(t2); // p = beta*p + z } TICK(); ComputeSPMV(A, p, Ap); TOCK(t3); // Ap = A*p TICK(); ComputeDotProduct(nrow, p, Ap, pAp, t4, A.isDotProductOptimized); TOCK(t1); // alpha = p'*Ap alpha = rtz/pAp; TICK(); ComputeWAXPBY(nrow, 1.0, x, alpha, p, x, A.isWaxpbyOptimized);// x = x + alpha*p ComputeWAXPBY(nrow, 1.0, r, -alpha, Ap, r, A.isWaxpbyOptimized); TOCK(t2);// r = r - alpha*Ap TICK(); ComputeDotProduct(nrow, r, r, normr, t4, A.isDotProductOptimized); TOCK(t1); normr = sqrt(normr); #ifdef HPCG_DEBUG if (A.geom->rank==0 && (k%print_freq == 0 || k == max_iter)) HPCG_fout << "Iteration = "<< k << " Scaled Residual = "<< normr/normr0 << std::endl; #endif niters = k; } // Store times times[1] += t1; // dot-product time times[2] += t2; // WAXPBY time times[3] += t3; // SPMV time times[4] += t4; // AllReduce time times[5] += t5; // preconditioner apply time //#ifndef HPCG_NOMPI // times[6] += t6; // exchange halo time //#endif times[0] += mytimer() - t_begin; // Total time. All done... return(0); }
int CGSolver::SolveLinearSystemWithJacobiPreconditioner(double * x, const double * b, double eps, int maxIterations, int verbose) { if (invDiagonal == NULL) { // This code will only execute when the class was constructed via the "SparseMatrix * A_" constructor (and only once). // In the "blackBoxProductType callBackFunction_" constructor, invDiagonal would have already been set to non-NULL. // extract diagonal entries A->BuildDiagonalIndices(); // note: if indices are already built, this call will do nothing (you can therefore also call BuildDiagonalIndices() once and for all before calling SolveLinearSystemWithJacobiPreconditioner); in any case, BuildDiagonalIndices() is fast (a single linear traversal of all matrix elements) invDiagonal = (double*) malloc (sizeof(double) * numRows); A->GetDiagonal(invDiagonal); for(int i=0; i<numRows; i++) invDiagonal[i] = 1.0 / invDiagonal[i]; // potential division by zero here (uncommon in practice) } int iteration=1; multiplicator(multiplicatorData, x, r); //A->MultiplyVector(x,r); for (int i=0; i<numRows; i++) { r[i] = b[i] - r[i]; d[i] = invDiagonal[i] * r[i]; } double residualNorm2 = ComputeTriDotProduct(r, r, invDiagonal); double initialResidualNorm2 = residualNorm2; while ((residualNorm2 > eps * eps * initialResidualNorm2) && (iteration <= maxIterations)) { if (verbose) printf("CG iteration %d: current M^{-1}-L2 error vs initial error=%G\n", iteration, sqrt(residualNorm2 / initialResidualNorm2)); multiplicator(multiplicatorData, d, q); //A->MultiplyVector(d,q); // q = A * d double dDotq = ComputeDotProduct(d, q); double alpha = residualNorm2 / dDotq; for(int i=0; i<numRows; i++) x[i] += alpha * d[i]; if (iteration % 30 == 0) { // periodically compute the exact residual (Shewchuk, page 8) multiplicator(multiplicatorData, x, r); //A->MultiplyVector(x,r); for (int i=0; i<numRows; i++) r[i] = b[i] - r[i]; } else { for (int i=0; i<numRows; i++) r[i] = r[i] - alpha * q[i]; } double oldResidualNorm2 = residualNorm2; residualNorm2 = ComputeTriDotProduct(r, r, invDiagonal); double beta = residualNorm2 / oldResidualNorm2; for (int i=0; i<numRows; i++) d[i] = invDiagonal[i] * r[i] + beta * d[i]; iteration++; } if (residualNorm2 < 0) { printf("Warning: residualNorm2=%G is negative. Input matrix might not be SPD. Solution could be incorrect.\n", residualNorm2); } return (iteration-1) * ((residualNorm2 > eps * eps * initialResidualNorm2) ? -1 : 1); }
int TestSymmetry(SparseMatrix & A, Vector & b, Vector & xexact, TestSymmetryData & testsymmetry_data) { local_int_t nrow = A.localNumberOfRows; local_int_t ncol = A.localNumberOfColumns; Vector x_ncol, y_ncol, z_ncol; InitializeVector(x_ncol, ncol); InitializeVector(y_ncol, ncol); InitializeVector(z_ncol, ncol); double t4 = 0.0; // Needed for dot-product call, otherwise unused testsymmetry_data.count_fail = 0; // Test symmetry of matrix // First load vectors with random values FillRandomVector(x_ncol); FillRandomVector(y_ncol); double xNorm2, yNorm2; double ANorm = 2 * 26.0; // Next, compute x'*A*y ComputeDotProduct(nrow, y_ncol, y_ncol, yNorm2, t4, A.isDotProductOptimized); int ierr = ComputeSPMV(A, y_ncol, z_ncol); // z_nrow = A*y_overlap if (ierr) HPCG_fout << "Error in call to SpMV: " << ierr << ".\n" << endl; double xtAy = 0.0; ierr = ComputeDotProduct(nrow, x_ncol, z_ncol, xtAy, t4, A.isDotProductOptimized); // x'*A*y if (ierr) HPCG_fout << "Error in call to dot: " << ierr << ".\n" << endl; // Next, compute y'*A*x ComputeDotProduct(nrow, x_ncol, x_ncol, xNorm2, t4, A.isDotProductOptimized); ierr = ComputeSPMV(A, x_ncol, z_ncol); // b_computed = A*x_overlap if (ierr) HPCG_fout << "Error in call to SpMV: " << ierr << ".\n" << endl; double ytAx = 0.0; ierr = ComputeDotProduct(nrow, y_ncol, z_ncol, ytAx, t4, A.isDotProductOptimized); // y'*A*x if (ierr) HPCG_fout << "Error in call to dot: " << ierr << ".\n" << endl; testsymmetry_data.depsym_spmv = std::fabs((long double) (xtAy - ytAx))/((xNorm2*ANorm*yNorm2 + yNorm2*ANorm*xNorm2) * (DBL_EPSILON)); if (testsymmetry_data.depsym_spmv > 1.0) ++testsymmetry_data.count_fail; // If the difference is > 1, count it wrong if (A.geom->rank==0) HPCG_fout << "Departure from symmetry (scaled) for SpMV abs(x'*A*y - y'*A*x) = " << testsymmetry_data.depsym_spmv << endl; // Test symmetry of symmetric Gauss-Seidel // Compute x'*Minv*y ierr = ComputeMG(A, y_ncol, z_ncol); // z_ncol = Minv*y_ncol if (ierr) HPCG_fout << "Error in call to MG: " << ierr << ".\n" << endl; double xtMinvy = 0.0; ierr = ComputeDotProduct(nrow, x_ncol, z_ncol, xtMinvy, t4, A.isDotProductOptimized); // x'*Minv*y if (ierr) HPCG_fout << "Error in call to dot: " << ierr << ".\n" << endl; // Next, compute z'*Minv*x ierr = ComputeMG(A, x_ncol, z_ncol); // z_ncol = Minv*x_ncol if (ierr) HPCG_fout << "Error in call to MG: " << ierr << ".\n" << endl; double ytMinvx = 0.0; ierr = ComputeDotProduct(nrow, y_ncol, z_ncol, ytMinvx, t4, A.isDotProductOptimized); // y'*Minv*x if (ierr) HPCG_fout << "Error in call to dot: " << ierr << ".\n" << endl; testsymmetry_data.depsym_mg = std::fabs((long double) (xtMinvy - ytMinvx))/((xNorm2*ANorm*yNorm2 + yNorm2*ANorm*xNorm2) * (DBL_EPSILON)); if (testsymmetry_data.depsym_mg > 1.0) ++testsymmetry_data.count_fail; // If the difference is > 1, count it wrong if (A.geom->rank==0) HPCG_fout << "Departure from symmetry (scaled) for MG abs(x'*Minv*y - y'*Minv*x) = " << testsymmetry_data.depsym_mg << endl; CopyVector(xexact, x_ncol); // Copy exact answer into overlap vector int numberOfCalls = 2; double residual = 0.0; for (int i=0; i< numberOfCalls; ++i) { ierr = ComputeSPMV(A, x_ncol, z_ncol); // b_computed = A*x_overlap if (ierr) HPCG_fout << "Error in call to SpMV: " << ierr << ".\n" << endl; if ((ierr = ComputeResidual(A.localNumberOfRows, b, z_ncol, residual))) HPCG_fout << "Error in call to compute_residual: " << ierr << ".\n" << endl; if (A.geom->rank==0) HPCG_fout << "SpMV call [" << i << "] Residual [" << residual << "]" << endl; } DeleteVector(x_ncol); DeleteVector(y_ncol); DeleteVector(z_ncol); return 0; }