int CG(const MMatrix &A, MVector &x, const MVector &b, const Preconditioner &M, int &max_iter, Real &tol) { Real resid; MVector p, z, q; MVector alpha(1), beta(1), rho(1), rho_1(1); MVector r = b - A*x; Real normb = norm(b); if (normb == 0.0) normb = 1; if ((resid = norm(r) / normb) <= tol){ tol = resid; max_iter = 0; return 0; } for (int i = 1; i <= max_iter; i++) { // Assign Z z = M.solve(r); rho.p_[0] = dot(r, z); // Assign P if (i == 1) p = z; else { beta.p_[0] = rho.p_[0] / rho_1.p_[0]; p = z + beta.p_[0] * p; } // Assign Q q = A*p; alpha.p_[0] = rho.p_[0] / dot(p, q); // Change X and R x += alpha.p_[0] * p; r -= alpha.p_[0] * q; // Check tol if ((resid = norm(r) / normb) <= tol) { tol = resid; max_iter = i; return 0; } rho_1.p_[0] = rho.p_[0]; } tol = resid; return 1; }
bool IterativeSolvers::gmres(const IRCMatrix &A, Vector &x, const Vector &b, const Preconditioner &M) { const idx N = x.getLength(); idx i, j = 1, k; Vector s(maxInnerIter + 1); Vector cs(maxInnerIter + 1); Vector sn(maxInnerIter + 1); Vector w(N); real normb = norm(M.solve(b)); Vector r = M.solve(b - A * x); real beta = norm(r); if (normb == 0.0) normb = 1; real res(norm(r) / normb); if (res <= toler) { toler = res; maxIter = 0; return true; } Vector *v = new Vector[maxInnerIter + 1]; for (idx id = 0; id < maxInnerIter + 1; ++id) v[id] = Vector(N); // CREATE HESSENBERG MATRIX NEEDED TO STORE INTERMEDIATES DenseMatrix H(maxInnerIter + 1, maxInnerIter); Vector temp(N); Vector temp2(maxInnerIter + 1); // MAIN LOOP while (j <= maxIter) { v[0] = r * (1.0 / beta); s = 0.0; s(0) = beta; // INNER ITERATIONS for (i = 0; i < maxInnerIter && j <= maxIter; i++, j++) { // CALCULATE w = M^{-1}(A*v[i]) multiply(A, v[i], temp); M.solveMxb(w, temp); // PRE-CALCULATE DOT PRODUCTS IN PARALLEL // H(k,i) = dot( v[k], w) #ifdef USES_OPENMP #pragma omp parallel for #endif for (k = 0; k <= i ; ++k) { register real dp(0); for (idx id = 0 ; id < N ; ++id) dp += w[id] * v[k][id]; H(k, i) = dp; //dot(w,v[k]); } for (k = 0; k <= i; ++k) { // w -= v[k]*H(k,i) without temporaries register real tempr = H(k, i); #ifdef USES_OPENMP #pragma omp parallel for // why is this loop so critical?? #endif for (idx id = 0 ; id < N ; ++id) w[id] -= v[k][id] * tempr; } // BELOW PARALLEL REGION CALCULATES: // H(i+1,i) = norm(w); // v[i+1] = w * (1.0 / H(i+1, i)); H(i + 1, i) = 0; real tempr(0); #ifdef USES_OPENMP #pragma omp parallel shared(tempr) #endif { #ifdef USES_OPENMP #pragma omp for reduction(+:tempr) #endif for (idx id = 0 ; id < N ; ++id) tempr += w[id] * w[id]; //norm(w); #ifdef USES_OPENMP #pragma omp single #endif { H(i + 1, i) = sqrt(tempr); tempr = (1.0 / H(i + 1, i)); } #ifdef USES_OPENMP #pragma omp for #endif for (idx id = 0 ; id < N ; ++id) v[i + 1][id] = w[id] * tempr; }// end for omp parallel for (k = 0; k < i; k++) ApplyPlaneRotation(H(k, i), H(k + 1, i), cs(k), sn(k)); GeneratePlaneRotation(H(i, i), H(i + 1, i), cs(i), sn(i)); ApplyPlaneRotation(H(i, i), H(i + 1, i), cs(i), sn(i)); ApplyPlaneRotation(s(i), s(i + 1), cs(i), sn(i)); res = fabs(s(i + 1)) / normb; if (res < toler) { // COPY S INTO temp WITHOUT RESIZING for (idx id = 0 ; id < maxInnerIter + 1 ; ++id) temp2[id] = s[id]; Update(x, i, H, temp2, v); toler = res; maxIter = j; delete [] v; return true; } }// end for i IINNER ITERATIONS // COPY S INTO temp WITHOUT RESIZING for (idx id = 0 ; id < maxInnerIter + 1 ; ++id) temp2[id] = s[id]; Update(x, maxInnerIter - 1, H, temp2, v); //multiply(A, x, temp); //r = M.solve(b - A * x); M.solveMxb(r, b - A * x); beta = norm(r); res = beta / normb; if (res < toler) { toler = res; maxIter = j; delete [] v; return true; } } toler = res; delete [] v; return false; }