void MultiGrid::solve (MultiFab& _sol, const MultiFab& _rhs, Real _eps_rel, Real _eps_abs, LinOp::BC_Mode bc_mode) { // // Prepare memory for new level, and solve the general boundary // value problem to within relative error _eps_rel. Customized // to solve at level=0. // const int level = 0; prepareForLevel(level); // // Copy the initial guess, which may contain inhomogeneous boundray conditions, // into both "initialsolution" (to be added back later) and into "cor[0]" which // we will only use here to compute the residual, then will set back to 0 below // initialsolution->copy(_sol); cor[level]->copy(_sol); // // Put the problem in residual-correction form: we will now use "rhs[level // the initial residual (rhs[0]) rather than the initial RHS (_rhs) // to begin the solve. // Lp.residual(*rhs[level],_rhs,*cor[level],level,bc_mode); // // Now initialize correction to zero at this level (auto-filled at levels below) // (*cor[level]).setVal(0.0); // // // Elide a reduction by doing these together. // Real tmp[2] = { norm_inf(_rhs,true), norm_inf(*rhs[level],true) }; ParallelDescriptor::ReduceRealMax(tmp,2); if ( ParallelDescriptor::IOProcessor() && verbose > 0) { Spacer(std::cout, level); std::cout << "MultiGrid: Initial rhs = " << tmp[0] << '\n'; std::cout << "MultiGrid: Initial residual = " << tmp[1] << '\n'; } if (tmp[1] == 0.0) return; // // We can now use homogeneous bc's because we have put the problem into residual-correction form. // if ( !solve_(_sol, _eps_rel, _eps_abs, LinOp::Homogeneous_BC, tmp[0], tmp[1]) ) BoxLib::Error("MultiGrid:: failed to converge!"); }
int MultiGrid::solve_ (MultiFab& _sol, Real eps_rel, Real eps_abs, LinOp::BC_Mode bc_mode, Real bnorm, Real resnorm0) { BL_PROFILE("MultiGrid::solve_()"); // // If do_fixed_number_of_iters = 1, then do maxiter iterations without checking for convergence // // If do_fixed_number_of_iters = 0, then relax system maxiter times, // and stop if relative error <= _eps_rel or if absolute err <= _abs_eps // const Real strt_time = ParallelDescriptor::second(); const int level = 0; // // We take the max of the norms of the initial RHS and the initial residual in order to capture both cases // Real norm_to_test_against; bool using_bnorm; if (bnorm >= resnorm0) { norm_to_test_against = bnorm; using_bnorm = true; } else { norm_to_test_against = resnorm0; using_bnorm = false; } int returnVal = 0; Real error = resnorm0; // // Note: if eps_rel, eps_abs < 0 then that test is effectively bypassed // if ( ParallelDescriptor::IOProcessor() && eps_rel < 1.0e-16 && eps_rel > 0 ) { std::cout << "MultiGrid: Tolerance " << eps_rel << " < 1e-16 is probably set too low" << '\n'; } // // We initially define norm_cor based on the initial solution only so we can use it in the very first iteration // to decide whether the problem is already solved (this is relevant if the previous solve used was only solved // according to the Anorm test and not the bnorm test). // Real norm_cor = norm_inf(*initialsolution,true); ParallelDescriptor::ReduceRealMax(norm_cor); int nit = 1; const Real norm_Lp = Lp.norm(0, level); Real cg_time = 0; if ( use_Anorm_for_convergence == 1 ) { // // Don't need to go any further -- no iterations are required // if (error <= eps_abs || error < eps_rel*(norm_Lp*norm_cor+norm_to_test_against)) { if ( ParallelDescriptor::IOProcessor() && (verbose > 0) ) { std::cout << " Problem is already converged -- no iterations required\n"; } return 1; } for ( ; ( (error > eps_abs && error > eps_rel*(norm_Lp*norm_cor+norm_to_test_against)) || (do_fixed_number_of_iters == 1) ) && nit <= maxiter; ++nit) { relax(*cor[level], *rhs[level], level, eps_rel, eps_abs, bc_mode, cg_time); Real tmp[2] = { norm_inf(*cor[level],true), errorEstimate(level,bc_mode,true) }; ParallelDescriptor::ReduceRealMax(tmp,2); norm_cor = tmp[0]; error = tmp[1]; if ( ParallelDescriptor::IOProcessor() && verbose > 1 ) { const Real rel_error = error / norm_to_test_against; Spacer(std::cout, level); if (using_bnorm) { std::cout << "MultiGrid: Iteration " << nit << " resid/bnorm = " << rel_error << '\n'; } else { std::cout << "MultiGrid: Iteration " << nit << " resid/resid0 = " << rel_error << '\n'; } } } } else { // // Don't need to go any further -- no iterations are required // if (error <= eps_abs || error < eps_rel*norm_to_test_against) { if ( ParallelDescriptor::IOProcessor() && (verbose > 0) ) { std::cout << " Problem is already converged -- no iterations required\n"; } return 1; } for ( ; ( (error > eps_abs && error > eps_rel*norm_to_test_against) || (do_fixed_number_of_iters == 1) ) && nit <= maxiter; ++nit) { relax(*cor[level], *rhs[level], level, eps_rel, eps_abs, bc_mode, cg_time); error = errorEstimate(level, bc_mode); if ( ParallelDescriptor::IOProcessor() && verbose > 1 ) { const Real rel_error = error / norm_to_test_against; Spacer(std::cout, level); if (using_bnorm) { std::cout << "MultiGrid: Iteration " << nit << " resid/bnorm = " << rel_error << '\n'; } else { std::cout << "MultiGrid: Iteration " << nit << " resid/resid0 = " << rel_error << '\n'; } } } } Real run_time = (ParallelDescriptor::second() - strt_time); if ( verbose > 0 ) { if ( ParallelDescriptor::IOProcessor() ) { const Real rel_error = error / norm_to_test_against; Spacer(std::cout, level); if (using_bnorm) { std::cout << "MultiGrid: Iteration " << nit-1 << " resid/bnorm = " << rel_error << '\n'; } else { std::cout << "MultiGrid: Iteration " << nit-1 << " resid/resid0 = " << rel_error << '\n'; } } if ( verbose > 1 ) { Real tmp[2] = { run_time, cg_time }; ParallelDescriptor::ReduceRealMax(tmp,2,ParallelDescriptor::IOProcessorNumber()); if ( ParallelDescriptor::IOProcessor() ) std::cout << ", Solve time: " << tmp[0] << ", CG time: " << tmp[1]; } if ( ParallelDescriptor::IOProcessor() ) std::cout << '\n'; } if ( ParallelDescriptor::IOProcessor() && (verbose > 0) ) { if ( do_fixed_number_of_iters == 1) { std::cout << " Did fixed number of iterations: " << maxiter << std::endl; } else if ( error < eps_rel*norm_to_test_against ) { std::cout << " Converged res < eps_rel*max(bnorm,res_norm)\n"; } else if ( (use_Anorm_for_convergence == 1) && (error < eps_rel*norm_Lp*norm_cor) ) { std::cout << " Converged res < eps_rel*Anorm*sol\n"; } else if ( error < eps_abs ) { std::cout << " Converged res < eps_abs\n"; } } // // Omit ghost update since maybe not initialized in calling routine. // Add to boundary values stored in initialsolution. // _sol.copy(*cor[level]); _sol.plus(*initialsolution,0,_sol.nComp(),0); if ( use_Anorm_for_convergence == 1 ) { if ( do_fixed_number_of_iters == 1 || error <= eps_rel*(norm_Lp*norm_cor+norm_to_test_against) || error <= eps_abs ) returnVal = 1; } else { if ( do_fixed_number_of_iters == 1 || error <= eps_rel*(norm_to_test_against) || error <= eps_abs ) returnVal = 1; } // // Otherwise, failed to solve satisfactorily // return returnVal; }
void runGui(OrbGui &gui) { // GUI state vec2i wndSize = gui.input->getWindowSize(); // set up the default projection & modelview matrices glMatrixMode(GL_PROJECTION); glLoadIdentity(); glOrtho(0.0, (double)wndSize.x, (double)wndSize.y, 0.0, 10.0, -10.0); glMatrixMode(GL_MODELVIEW); glLoadIdentity(); ColumnLayout lyt(FixedLayout(10, 10, 200, wndSize.y), 10, 10, 10, 10, 3); Label("Ikarus").run(gui, lyt); Spacer(vec2i(0, 10)).run(gui, lyt); Label("Skeleton:").run(gui, lyt); ComboBox skelSel("skeleton-sel", WidgetID(curSkel)); for (int i = 0; i < (int)skeletons.size(); ++i) skelSel.add(WidgetID(i), skeletons[i].name); curSkel = skelSel.run(gui, lyt).getIndex(); if (Button("reload-btn", "Reload").run(gui, lyt)) skeletons.reset_at(curSkel, new SkeletonItem(skeletons[curSkel].fname, skeletons[curSkel].name)); SkeletonItem &skel = skeletons[curSkel]; Spacer(vec2i(0, 10)).run(gui, lyt); if (Button("reset-btn", "Reset Pose").run(gui, lyt)) { skel.solver->resetPose(); skel.targetPos = skel.solver->getEffectorPos(); targetSpeed = 0.0; } if (Button("reset-all-btn", "Reset All").run(gui, lyt)) { skel.solver->resetAll(); skel.targetPos = skel.solver->getEffectorPos(); targetSpeed = 0.0; } // the grid is always shown // (no point turning it off really) //showGrid = CheckBox("show-grid-chk", "Show grid", showGrid).run(gui, lyt); showJointBasis = CheckBox("show-joint-basis-chk", "Show joint basis vectors", showJointBasis).run(gui, lyt); showConstraints = CheckBox("show-constraints-chk", "Show joint constraints", showConstraints).run(gui, lyt); ikMode = CheckBox("ik-mode-chk", "IK Mode", ikMode).run(gui, lyt); ikEnabled = CheckBox("ik-enabled-chk", "IK Enabled", ikEnabled, ikMode).run(gui, lyt); bool constraintsOn = CheckBox("ik-constrained-chk", "Enable Constraints", skel.solver->areConstraintsEnabled(), ikMode).run(gui, lyt); skel.solver->enableConstraints(constraintsOn); if (Button("solve-btn", "Solve", ikMode && !ikEnabled).run(gui, lyt)) skel.solver->solveIk(30); if (Button("step-btn", "Step IK", ikMode && !ikEnabled).run(gui, lyt)) skel.solver->iterateIk(); if (Button("constraint-btn", "Apply Constraints", ikMode).run(gui, lyt)) { skel.solver->applyAllConstraints(); skel.targetPos = skel.solver->getEffectorPos(); targetSpeed = 0.0; } Label("Root bone:").run(gui, lyt); ComboBox rootSel("root-sel", WidgetID(&skel.solver->getRootBone())); for (int i = 0; i < skel.skeleton.numBones(); ++i) { const Bone &b = skel.skeleton[i]; if (! b.isEffector()) rootSel.add(WidgetID(&b), b.name); } const Bone *newRootBone = rootSel.run(gui, lyt).getData<const Bone>(); skel.solver->setRootBone(*newRootBone); Label("Effector:").run(gui, lyt); ComboBox effectorSel("effector-sel", WidgetID(&skel.solver->getEffector())); for (int i = 0; i < skel.skeleton.numBones(); ++i) { const Bone &b = skel.skeleton[i]; if (b.isEffector()) effectorSel.add(WidgetID(&b), b.name); } const Bone *newEffector = effectorSel.run(gui, lyt).getData<const Bone>(); if (newEffector != &skel.solver->getEffector()) { skel.solver->setEffector(*newEffector); skel.solver->setTargetPos(skel.solver->getEffectorPos()); skel.targetPos = skel.solver->getEffectorPos(); } int leftRightSplit = 250; int topBottomSplit = wndSize.y - 200; int a = leftRightSplit + (wndSize.x - leftRightSplit) / 3; int b = leftRightSplit + ((wndSize.x - leftRightSplit)*2) / 3; FixedLayout mainViewLyt(leftRightSplit, 0, wndSize.x - leftRightSplit, topBottomSplit); FixedLayout ortho0Lyt(leftRightSplit, topBottomSplit, a - leftRightSplit, wndSize.y - topBottomSplit); FixedLayout ortho1Lyt(a, topBottomSplit, b - a, wndSize.y - topBottomSplit); FixedLayout ortho2Lyt(b, topBottomSplit, wndSize.x - b, wndSize.y - topBottomSplit); if (ikMode) { IkSolverDisplay("displayP", &camPerspective, skel.solver.get(), showJointBasis, showConstraints, showGrid ? gridList : 0).run(gui, mainViewLyt); IkSolverDisplay("displayX", &camX, skel.solver.get(), showJointBasis, showConstraints).run(gui, ortho0Lyt); IkSolverDisplay("displayY", &camY, skel.solver.get(), showJointBasis, showConstraints).run(gui, ortho1Lyt); IkSolverDisplay("displayZ", &camZ, skel.solver.get(), showJointBasis, showConstraints).run(gui, ortho2Lyt); } else { SkeletonDisplay("displayP", &camPerspective, &skel.skeleton, showJointBasis, showConstraints, showGrid ? gridList : 0).run(gui, mainViewLyt); SkeletonDisplay("displayX", &camX, &skel.skeleton, showJointBasis, showConstraints).run(gui, ortho0Lyt); SkeletonDisplay("displayY", &camY, &skel.skeleton, showJointBasis, showConstraints).run(gui, ortho1Lyt); SkeletonDisplay("displayZ", &camZ, &skel.skeleton, showJointBasis, showConstraints).run(gui, ortho2Lyt); } }
int CGSolver::solve_bicgstab (MultiFab& sol, const MultiFab& rhs, Real eps_rel, Real eps_abs, LinOp::BC_Mode bc_mode) { BL_PROFILE("CGSolver::solve_bicgstab()"); const int nghost = sol.nGrow(), ncomp = 1; const BoxArray& ba = sol.boxArray(); const DistributionMapping& dm = sol.DistributionMap(); BL_ASSERT(sol.nComp() == ncomp); BL_ASSERT(sol.boxArray() == Lp.boxArray(lev)); BL_ASSERT(rhs.boxArray() == Lp.boxArray(lev)); MultiFab ph(ba, ncomp, nghost, dm); MultiFab sh(ba, ncomp, nghost, dm); MultiFab sorig(ba, ncomp, 0, dm); MultiFab p (ba, ncomp, 0, dm); MultiFab r (ba, ncomp, 0, dm); MultiFab s (ba, ncomp, 0, dm); MultiFab rh (ba, ncomp, 0, dm); MultiFab v (ba, ncomp, 0, dm); MultiFab t (ba, ncomp, 0, dm); Lp.residual(r, rhs, sol, lev, bc_mode); MultiFab::Copy(sorig,sol,0,0,1,0); MultiFab::Copy(rh, r, 0,0,1,0); sol.setVal(0); const LinOp::BC_Mode temp_bc_mode = LinOp::Homogeneous_BC; #ifdef CG_USE_OLD_CONVERGENCE_CRITERIA Real rnorm = norm_inf(r); #else // // Calculate the local values of these norms & reduce their values together. // Real vals[2] = { norm_inf(r, true), Lp.norm(0, lev, true) }; ParallelDescriptor::ReduceRealMax(vals,2,color()); Real rnorm = vals[0]; const Real Lp_norm = vals[1]; Real sol_norm = 0; #endif const Real rnorm0 = rnorm; if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev); std::cout << "CGSolver_BiCGStab: Initial error (error0) = " << rnorm0 << '\n'; } int ret = 0, nit = 1; Real rho_1 = 0, alpha = 0, omega = 0; if ( rnorm0 == 0 || rnorm0 < eps_abs ) { if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev); std::cout << "CGSolver_BiCGStab: niter = 0," << ", rnorm = " << rnorm << ", eps_abs = " << eps_abs << std::endl; } return ret; } for (; nit <= maxiter; ++nit) { const Real rho = dotxy(rh,r); if ( rho == 0 ) { ret = 1; break; } if ( nit == 1 ) { MultiFab::Copy(p,r,0,0,1,0); } else { const Real beta = (rho/rho_1)*(alpha/omega); sxay(p, p, -omega, v); sxay(p, r, beta, p); } if ( use_mg_precond ) { ph.setVal(0); mg_precond->solve(ph, p, eps_rel, eps_abs, temp_bc_mode); } else if ( use_jacobi_precond ) { ph.setVal(0); Lp.jacobi_smooth(ph, p, lev, temp_bc_mode); } else { MultiFab::Copy(ph,p,0,0,1,0); } Lp.apply(v, ph, lev, temp_bc_mode); if ( Real rhTv = dotxy(rh,v) ) { alpha = rho/rhTv; } else { ret = 2; break; } sxay(sol, sol, alpha, ph); sxay(s, r, -alpha, v); rnorm = norm_inf(s); if ( verbose > 2 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev); std::cout << "CGSolver_BiCGStab: Half Iter " << std::setw(11) << nit << " rel. err. " << rnorm/(rnorm0) << '\n'; } #ifdef CG_USE_OLD_CONVERGENCE_CRITERIA if ( rnorm < eps_rel*rnorm0 || rnorm < eps_abs ) break; #else sol_norm = norm_inf(sol); if ( rnorm < eps_rel*(Lp_norm*sol_norm + rnorm0 ) || rnorm < eps_abs ) break; #endif if ( use_mg_precond ) { sh.setVal(0); mg_precond->solve(sh, s, eps_rel, eps_abs, temp_bc_mode); } else if ( use_jacobi_precond ) { sh.setVal(0); Lp.jacobi_smooth(sh, s, lev, temp_bc_mode); } else { MultiFab::Copy(sh,s,0,0,1,0); } Lp.apply(t, sh, lev, temp_bc_mode); // // This is a little funky. I want to elide one of the reductions // in the following two dotxy()s. We do that by calculating the "local" // values and then reducing the two local values at the same time. // Real vals[2] = { dotxy(t,t,true), dotxy(t,s,true) }; ParallelDescriptor::ReduceRealSum(vals,2,color()); if ( vals[0] ) { omega = vals[1]/vals[0]; } else { ret = 3; break; } sxay(sol, sol, omega, sh); sxay(r, s, -omega, t); rnorm = norm_inf(r); if ( verbose > 2 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev); std::cout << "CGSolver_BiCGStab: Iteration " << std::setw(11) << nit << " rel. err. " << rnorm/(rnorm0) << '\n'; } #ifdef CG_USE_OLD_CONVERGENCE_CRITERIA if ( rnorm < eps_rel*rnorm0 || rnorm < eps_abs ) break; #else sol_norm = norm_inf(sol); if ( rnorm < eps_rel*(Lp_norm*sol_norm + rnorm0 ) || rnorm < eps_abs ) break; #endif if ( omega == 0 ) { ret = 4; break; } rho_1 = rho; } if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev); std::cout << "CGSolver_BiCGStab: Final: Iteration " << std::setw(4) << nit << " rel. err. " << rnorm/(rnorm0) << '\n'; } #ifdef CG_USE_OLD_CONVERGENCE_CRITERIA if ( ret == 0 && rnorm > eps_rel*rnorm0 && rnorm > eps_abs) #else if ( ret == 0 && rnorm > eps_rel*(Lp_norm*sol_norm + rnorm0 ) && rnorm > eps_abs ) #endif { if ( ParallelDescriptor::IOProcessor(color()) ) BoxLib::Warning("CGSolver_BiCGStab:: failed to converge!"); ret = 8; } if ( ( ret == 0 || ret == 8 ) && (rnorm < rnorm0) ) { sol.plus(sorig, 0, 1, 0); } else { sol.setVal(0); sol.plus(sorig, 0, 1, 0); } return ret; }
int CGSolver::solve_cabicgstab (MultiFab& sol, const MultiFab& rhs, Real eps_rel, Real eps_abs, LinOp::BC_Mode bc_mode) { BL_PROFILE("CGSolver::solve_cabicgstab()"); BL_ASSERT(sol.nComp() == 1); BL_ASSERT(sol.boxArray() == Lp.boxArray(lev)); BL_ASSERT(rhs.boxArray() == Lp.boxArray(lev)); Real temp1[4*SSS_MAX+1]; Real temp2[4*SSS_MAX+1]; Real temp3[4*SSS_MAX+1]; Real Tp[4*SSS_MAX+1][4*SSS_MAX+1]; Real Tpp[4*SSS_MAX+1][4*SSS_MAX+1]; Real aj[4*SSS_MAX+1]; Real cj[4*SSS_MAX+1]; Real ej[4*SSS_MAX+1]; Real Tpaj[4*SSS_MAX+1]; Real Tpcj[4*SSS_MAX+1]; Real Tppaj[4*SSS_MAX+1]; Real G[4*SSS_MAX+1][4*SSS_MAX+1]; // Extracted from first 4*SSS+1 columns of Gg[][]. indexed as [row][col] Real g[4*SSS_MAX+1]; // Extracted from last [4*SSS+1] column of Gg[][]. Real Gg[(4*SSS_MAX+1)*(4*SSS_MAX+2)]; // Buffer to hold the Gram-like matrix produced by matmul(). indexed as [row*(4*SSS+2) + col] // // If variable_SSS we "telescope" SSS. // We start with 1 and increase it up to SSS_MAX on the outer iterations. // if (variable_SSS) SSS = 1; zero( aj, 4*SSS_MAX+1); zero( cj, 4*SSS_MAX+1); zero( ej, 4*SSS_MAX+1); zero( Tpaj, 4*SSS_MAX+1); zero( Tpcj, 4*SSS_MAX+1); zero(Tppaj, 4*SSS_MAX+1); zero(temp1, 4*SSS_MAX+1); zero(temp2, 4*SSS_MAX+1); zero(temp3, 4*SSS_MAX+1); SetMonomialBasis(Tp,Tpp,SSS); const int ncomp = 1, nghost = sol.nGrow(); // // Contains the matrix powers of p[] and r[]. // // First 2*SSS+1 components are powers of p[]. // Next 2*SSS components are powers of r[]. // const BoxArray& ba = sol.boxArray(); const DistributionMapping& dm = sol.DistributionMap(); MultiFab PR(ba, 4*SSS_MAX+1, 0, dm); MultiFab p(ba, ncomp, 0, dm); MultiFab r(ba, ncomp, 0, dm); MultiFab rt(ba, ncomp, 0, dm); MultiFab tmp(ba, 4, nghost, dm); Lp.residual(r, rhs, sol, lev, bc_mode); BL_ASSERT(!r.contains_nan()); MultiFab::Copy(rt,r,0,0,1,0); MultiFab::Copy( p,r,0,0,1,0); const Real rnorm0 = norm_inf(r); Real delta = dotxy(r,rt); const Real L2_norm_of_rt = sqrt(delta); const LinOp::BC_Mode temp_bc_mode = LinOp::Homogeneous_BC; if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev); std::cout << "CGSolver_CABiCGStab: Initial error (error0) = " << rnorm0 << '\n'; } if ( rnorm0 == 0 || delta == 0 || rnorm0 < eps_abs ) { if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev); std::cout << "CGSolver_CABiCGStab: niter = 0," << ", rnorm = " << rnorm0 << ", delta = " << delta << ", eps_abs = " << eps_abs << '\n'; } return 0; } int niters = 0, ret = 0; Real L2_norm_of_resid = 0, atime = 0, gtime = 0; bool BiCGStabFailed = false, BiCGStabConverged = false; for (int m = 0; m < maxiter && !BiCGStabFailed && !BiCGStabConverged; ) { const Real time1 = ParallelDescriptor::second(); // // Compute the matrix powers on p[] & r[] (monomial basis). // The 2*SSS+1 powers of p[] followed by the 2*SSS powers of r[]. // MultiFab::Copy(PR,p,0,0,1,0); MultiFab::Copy(PR,r,0,2*SSS+1,1,0); BL_ASSERT(!PR.contains_nan(0, 1)); BL_ASSERT(!PR.contains_nan(2*SSS+1,1)); // // We use "tmp" to minimize the number of Lp.apply()s. // We do this by doing p & r together in a single call. // MultiFab::Copy(tmp,p,0,0,1,0); MultiFab::Copy(tmp,r,0,1,1,0); for (int n = 1; n < 2*SSS; n++) { Lp.apply(tmp, tmp, lev, temp_bc_mode, false, 0, 2, 2); MultiFab::Copy(tmp,tmp,2,0,2,0); MultiFab::Copy(PR,tmp,0, n,1,0); MultiFab::Copy(PR,tmp,1,2*SSS+n+1,1,0); BL_ASSERT(!PR.contains_nan(n, 1)); BL_ASSERT(!PR.contains_nan(2*SSS+n+1,1)); } MultiFab::Copy(tmp,PR,2*SSS-1,0,1,0); Lp.apply(tmp, tmp, lev, temp_bc_mode, false, 0, 1, 1); MultiFab::Copy(PR,tmp,1,2*SSS,1,0); BL_ASSERT(!PR.contains_nan(2*SSS-1,1)); BL_ASSERT(!PR.contains_nan(2*SSS, 1)); Real time2 = ParallelDescriptor::second(); atime += (time2-time1); BuildGramMatrix(Gg, PR, rt, SSS); const Real time3 = ParallelDescriptor::second(); gtime += (time3-time2); // // Form G[][] and g[] from Gg. // for (int i = 0, k = 0; i < 4*SSS+1; i++) { for (int j = 0; j < 4*SSS+1; j++) // // First 4*SSS+1 elements in each row go to G[][]. // G[i][j] = Gg[k++]; // // Last element in row goes to g[]. // g[i] = Gg[k++]; } zero(aj, 4*SSS+1); aj[0] = 1; zero(cj, 4*SSS+1); cj[2*SSS+1] = 1; zero(ej, 4*SSS+1); for (int nit = 0; nit < SSS; nit++) { gemv( Tpaj, Tp, aj, 4*SSS+1, 4*SSS+1); gemv( Tpcj, Tp, cj, 4*SSS+1, 4*SSS+1); gemv(Tppaj, Tpp, aj, 4*SSS+1, 4*SSS+1); const Real g_dot_Tpaj = dot(g, Tpaj, 4*SSS+1); if ( g_dot_Tpaj == 0 ) { if ( verbose > 1 && ParallelDescriptor::IOProcessor(color()) ) std::cout << "CGSolver_CABiCGStab: g_dot_Tpaj == 0, nit = " << nit << '\n'; BiCGStabFailed = true; ret = 1; break; } const Real alpha = delta / g_dot_Tpaj; if ( std::isinf(alpha) ) { if ( verbose > 1 && ParallelDescriptor::IOProcessor(color()) ) std::cout << "CGSolver_CABiCGStab: alpha == inf, nit = " << nit << '\n'; BiCGStabFailed = true; ret = 2; break; } axpy(temp1, Tpcj, -alpha, Tppaj, 4*SSS+1); gemv(temp2, G, temp1, 4*SSS+1, 4*SSS+1); axpy(temp3, cj, -alpha, Tpaj, 4*SSS+1); const Real omega_numerator = dot(temp3, temp2, 4*SSS+1); const Real omega_denominator = dot(temp1, temp2, 4*SSS+1); // // NOTE: omega_numerator/omega_denominator can be 0/x or 0/0, but should never be x/0. // // If omega_numerator==0, and ||s||==0, then convergence, x=x+alpha*aj. // If omega_numerator==0, and ||s||!=0, then stabilization breakdown. // // Partial update of ej must happen before the check on omega to ensure forward progress !!! // axpy(ej, ej, alpha, aj, 4*SSS+1); // // ej has been updated so consider that we've done an iteration since // even if we break out of the loop we'll be able to update both sol. // niters++; // // Calculate the norm of Saad's vector 's' to check intra s-step convergence. // axpy(temp1, cj,-alpha, Tpaj, 4*SSS+1); gemv(temp2, G, temp1, 4*SSS+1, 4*SSS+1); const Real L2_norm_of_s = dot(temp1,temp2,4*SSS+1); L2_norm_of_resid = (L2_norm_of_s < 0 ? 0 : sqrt(L2_norm_of_s)); if ( L2_norm_of_resid < eps_rel*L2_norm_of_rt ) { if ( verbose > 1 && L2_norm_of_resid == 0 && ParallelDescriptor::IOProcessor(color()) ) std::cout << "CGSolver_CABiCGStab: L2 norm of s: " << L2_norm_of_s << '\n'; BiCGStabConverged = true; break; } if ( omega_denominator == 0 ) { if ( verbose > 1 && ParallelDescriptor::IOProcessor(color()) ) std::cout << "CGSolver_CABiCGStab: omega_denominator == 0, nit = " << nit << '\n'; BiCGStabFailed = true; ret = 3; break; } const Real omega = omega_numerator / omega_denominator; if ( verbose > 1 && ParallelDescriptor::IOProcessor(color()) ) { if ( omega == 0 ) std::cout << "CGSolver_CABiCGStab: omega == 0, nit = " << nit << '\n'; if ( std::isinf(omega) ) std::cout << "CGSolver_CABiCGStab: omega == inf, nit = " << nit << '\n'; } if ( omega == 0 ) { BiCGStabFailed = true; ret = 4; break; } if ( std::isinf(omega) ) { BiCGStabFailed = true; ret = 4; break; } // // Complete the update of ej & cj now that omega is known to be ok. // axpy(ej, ej, omega, cj, 4*SSS+1); axpy(ej, ej,-omega*alpha, Tpaj, 4*SSS+1); axpy(cj, cj, -omega, Tpcj, 4*SSS+1); axpy(cj, cj, -alpha, Tpaj, 4*SSS+1); axpy(cj, cj, omega*alpha, Tppaj, 4*SSS+1); // // Do an early check of the residual to determine convergence. // gemv(temp1, G, cj, 4*SSS+1, 4*SSS+1); // // sqrt( (cj,Gcj) ) == L2 norm of the intermediate residual in exact arithmetic. // However, finite precision can lead to the norm^2 being < 0 (Jim Demmel). // If cj_dot_Gcj < 0 we flush to zero and consider ourselves converged. // const Real L2_norm_of_r = dot(cj, temp1, 4*SSS+1); L2_norm_of_resid = (L2_norm_of_r > 0 ? sqrt(L2_norm_of_r) : 0); if ( L2_norm_of_resid < eps_rel*L2_norm_of_rt ) { if ( verbose > 1 && L2_norm_of_resid == 0 && ParallelDescriptor::IOProcessor(color()) ) std::cout << "CGSolver_CABiCGStab: L2_norm_of_r: " << L2_norm_of_r << '\n'; BiCGStabConverged = true; break; } const Real delta_next = dot(g, cj, 4*SSS+1); if ( verbose > 1 && ParallelDescriptor::IOProcessor(color()) ) { if ( delta_next == 0 ) std::cout << "CGSolver_CABiCGStab: delta == 0, nit = " << nit << '\n'; if ( std::isinf(delta_next) ) std::cout << "CGSolver_CABiCGStab: delta == inf, nit = " << nit << '\n'; } if ( std::isinf(delta_next) ) { BiCGStabFailed = true; ret = 5; break; } // delta = inf? if ( delta_next == 0 ) { BiCGStabFailed = true; ret = 5; break; } // Lanczos breakdown... const Real beta = (delta_next/delta)*(alpha/omega); if ( verbose > 1 && ParallelDescriptor::IOProcessor(color()) ) { if ( beta == 0 ) std::cout << "CGSolver_CABiCGStab: beta == 0, nit = " << nit << '\n'; if ( std::isinf(beta) ) std::cout << "CGSolver_CABiCGStab: beta == inf, nit = " << nit << '\n'; } if ( std::isinf(beta) ) { BiCGStabFailed = true; ret = 6; break; } // beta = inf? if ( beta == 0 ) { BiCGStabFailed = true; ret = 6; break; } // beta = 0? can't make further progress(?) axpy(aj, cj, beta, aj, 4*SSS+1); axpy(aj, aj, -omega*beta, Tpaj, 4*SSS+1); delta = delta_next; } // // Update iterates. // for (int i = 0; i < 4*SSS+1; i++) sxay(sol,sol,ej[i],PR,i); MultiFab::Copy(p,PR,0,0,1,0); p.mult(aj[0],0,1); for (int i = 1; i < 4*SSS+1; i++) sxay(p,p,aj[i],PR,i); MultiFab::Copy(r,PR,0,0,1,0); r.mult(cj[0],0,1); for (int i = 1; i < 4*SSS+1; i++) sxay(r,r,cj[i],PR,i); if ( !BiCGStabFailed && !BiCGStabConverged ) { m += SSS; if ( variable_SSS && SSS < SSS_MAX ) { SSS++; SetMonomialBasis(Tp,Tpp,SSS); } } } if ( verbose > 0 ) { if ( ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev); std::cout << "CGSolver_CABiCGStab: Final: Iteration " << std::setw(4) << niters << " rel. err. " << L2_norm_of_resid << '\n'; } if ( verbose > 1 ) { Real tmp[2] = { atime, gtime }; ParallelDescriptor::ReduceRealMax(tmp,2,color()); if ( ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev); std::cout << "CGSolver_CABiCGStab apply time: " << tmp[0] << ", gram time: " << tmp[1] << '\n'; } } } if ( niters >= maxiter && !BiCGStabFailed && !BiCGStabConverged) { if ( L2_norm_of_resid > L2_norm_of_rt ) { if ( ParallelDescriptor::IOProcessor(color()) ) BoxLib::Warning("CGSolver_CABiCGStab: failed to converge!"); // // Return code 8 tells the MultiGrid driver to zero out the solution! // ret = 8; } else { // // Return codes 1-7 tells the MultiGrid driver to smooth the solution! // ret = 7; } } return ret; }
int CGSolver::jbb_precond (MultiFab& sol, const MultiFab& rhs, int lev, LinOp& Lp) { // // This is a local routine. No parallel is allowed to happen here. // int lev_loc = lev; const Real eps_rel = 1.e-2; const Real eps_abs = 1.e-16; const int nghost = sol.nGrow(); const int ncomp = sol.nComp(); const bool local = true; const LinOp::BC_Mode bc_mode = LinOp::Homogeneous_BC; BL_ASSERT(ncomp == 1 ); BL_ASSERT(sol.boxArray() == Lp.boxArray(lev_loc)); BL_ASSERT(rhs.boxArray() == Lp.boxArray(lev_loc)); const BoxArray& ba = sol.boxArray(); const DistributionMapping& dm = sol.DistributionMap(); MultiFab sorig(ba, ncomp, nghost, dm); MultiFab r(ba, ncomp, nghost, dm); MultiFab z(ba, ncomp, nghost, dm); MultiFab q(ba, ncomp, nghost, dm); MultiFab p(ba, ncomp, nghost, dm); sorig.copy(sol); Lp.residual(r, rhs, sorig, lev_loc, LinOp::Homogeneous_BC, local); sol.setVal(0); Real rnorm = norm_inf(r,local); const Real rnorm0 = rnorm; Real minrnorm = rnorm; if ( verbose > 2 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev_loc); std::cout << " jbb_precond: Initial error : " << rnorm0 << '\n'; } const Real Lp_norm = Lp.norm(0, lev_loc, local); Real sol_norm = 0; int ret = 0; // will return this value if all goes well Real rho_1 = 0; int nit = 1; if ( rnorm0 == 0 || rnorm0 < eps_abs ) { if ( verbose > 2 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev_loc); std::cout << "jbb_precond: niter = 0," << ", rnorm = " << rnorm << ", eps_abs = " << eps_abs << std::endl; } return 0; } for (; nit <= maxiter; ++nit) { z.copy(r); Real rho = dotxy(z,r,local); if (nit == 1) { p.copy(z); } else { Real beta = rho/rho_1; sxay(p, z, beta, p); } Lp.apply(q, p, lev_loc, bc_mode, local); Real alpha; if ( Real pw = dotxy(p,q,local) ) { alpha = rho/pw; } else { ret = 1; break; } if ( verbose > 3 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev_loc); std::cout << "jbb_precond:" << " nit " << nit << " rho " << rho << " alpha " << alpha << '\n'; } sxay(sol, sol, alpha, p); sxay( r, r,-alpha, q); rnorm = norm_inf(r, local); sol_norm = norm_inf(sol, local); if ( verbose > 2 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev_loc); std::cout << "jbb_precond: Iteration" << std::setw(4) << nit << " rel. err. " << rnorm/(rnorm0) << '\n'; } if ( rnorm < eps_rel*(Lp_norm*sol_norm + rnorm0) || rnorm < eps_abs ) { break; } if ( rnorm > def_unstable_criterion*minrnorm ) { ret = 2; break; } else if ( rnorm < minrnorm ) { minrnorm = rnorm; } rho_1 = rho; } if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev_loc); std::cout << "jbb_precond: Final Iteration" << std::setw(4) << nit << " rel. err. " << rnorm/(rnorm0) << '\n'; } if ( ret == 0 && rnorm > eps_rel*(Lp_norm*sol_norm + rnorm0) && rnorm > eps_abs ) { if ( ParallelDescriptor::IOProcessor(color()) ) { BoxLib::Warning("jbb_precond:: failed to converge!"); } ret = 8; } if ( ( ret == 0 || ret == 8 ) && (rnorm < rnorm0) ) { sol.plus(sorig, 0, 1, 0); } else { sol.setVal(0); sol.plus(sorig, 0, 1, 0); } return ret; }
int CGSolver::solve_cg (MultiFab& sol, const MultiFab& rhs, Real eps_rel, Real eps_abs, LinOp::BC_Mode bc_mode) { BL_PROFILE("CGSolver::solve_cg()"); const int nghost = sol.nGrow(), ncomp = 1; const BoxArray& ba = sol.boxArray(); const DistributionMapping& dm = sol.DistributionMap(); BL_ASSERT(sol.nComp() == ncomp); BL_ASSERT(sol.boxArray() == Lp.boxArray(lev)); BL_ASSERT(rhs.boxArray() == Lp.boxArray(lev)); MultiFab sorig(ba, ncomp, nghost, dm); MultiFab r(ba, ncomp, nghost, dm); MultiFab z(ba, ncomp, nghost, dm); MultiFab q(ba, ncomp, nghost, dm); MultiFab p(ba, ncomp, nghost, dm); MultiFab r1(ba, ncomp, nghost, dm); MultiFab z1(ba, ncomp, nghost, dm); MultiFab r2(ba, ncomp, nghost, dm); MultiFab z2(ba, ncomp, nghost, dm); MultiFab::Copy(sorig,sol,0,0,1,0); Lp.residual(r, rhs, sorig, lev, bc_mode); sol.setVal(0); const LinOp::BC_Mode temp_bc_mode=LinOp::Homogeneous_BC; Real rnorm = norm_inf(r); const Real rnorm0 = rnorm; Real minrnorm = rnorm; if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev); std::cout << " CG: Initial error : " << rnorm0 << '\n'; } const Real Lp_norm = Lp.norm(0, lev); Real sol_norm = 0; Real rho_1 = 0; int ret = 0; int nit = 1; if ( rnorm == 0 || rnorm < eps_abs ) { if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev); std::cout << " CG: niter = 0," << ", rnorm = " << rnorm << ", eps_rel*(Lp_norm*sol_norm + rnorm0 )" << eps_rel*(Lp_norm*sol_norm + rnorm0 ) << ", eps_abs = " << eps_abs << std::endl; } return 0; } for (; nit <= maxiter; ++nit) { if (use_jbb_precond && ParallelDescriptor::NProcs(color()) > 1) { z.setVal(0); jbb_precond(z,r,lev,Lp); } else { MultiFab::Copy(z,r,0,0,1,0); } Real rho = dotxy(z,r); if (nit == 1) { MultiFab::Copy(p,z,0,0,1,0); } else { Real beta = rho/rho_1; sxay(p, z, beta, p); } Lp.apply(q, p, lev, temp_bc_mode); Real alpha; if ( Real pw = dotxy(p,q) ) { alpha = rho/pw; } else { ret = 1; break; } if ( verbose > 2 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev); std::cout << "CGSolver_cg:" << " nit " << nit << " rho " << rho << " alpha " << alpha << '\n'; } sxay(sol, sol, alpha, p); sxay( r, r,-alpha, q); rnorm = norm_inf(r); sol_norm = norm_inf(sol); if ( verbose > 2 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev); std::cout << " CG: Iteration" << std::setw(4) << nit << " rel. err. " << rnorm/(rnorm0) << '\n'; } #ifdef CG_USE_OLD_CONVERGENCE_CRITERIA if ( rnorm < eps_rel*rnorm0 || rnorm < eps_abs ) break; #else if ( rnorm < eps_rel*(Lp_norm*sol_norm + rnorm0) || rnorm < eps_abs ) break; #endif if ( rnorm > def_unstable_criterion*minrnorm ) { ret = 2; break; } else if ( rnorm < minrnorm ) { minrnorm = rnorm; } rho_1 = rho; } if ( verbose > 0 && ParallelDescriptor::IOProcessor(color()) ) { Spacer(std::cout, lev); std::cout << " CG: Final Iteration" << std::setw(4) << nit << " rel. err. " << rnorm/(rnorm0) << '\n'; } #ifdef CG_USE_OLD_CONVERGENCE_CRITERIA if ( ret == 0 && rnorm > eps_rel*rnorm0 && rnorm > eps_abs ) #else if ( ret == 0 && rnorm > eps_rel*(Lp_norm*sol_norm + rnorm0) && rnorm > eps_abs ) #endif { if ( ParallelDescriptor::IOProcessor(color()) ) BoxLib::Warning("CGSolver_cg: failed to converge!"); ret = 8; } if ( ( ret == 0 || ret == 8 ) && (rnorm < rnorm0) ) { sol.plus(sorig, 0, 1, 0); } else { sol.setVal(0); sol.plus(sorig, 0, 1, 0); } return ret; }
int MCMultiGrid::solve_ (MultiFab& _sol, Real eps_rel, Real eps_abs, MCBC_Mode bc_mode, int level) { // // Relax system maxiter times, stop if relative error <= _eps_rel or // if absolute err <= _abs_eps // const Real strt_time = ParallelDescriptor::second(); // // Elide a reduction by doing these together. // Real tmp[2] = { norm_inf(*rhs[level],true), errorEstimate(level,bc_mode,true) }; ParallelDescriptor::ReduceRealMax(tmp,2); const Real norm_rhs = tmp[0]; const Real error0 = tmp[1]; int returnVal = 0; Real error = error0; if ( ParallelDescriptor::IOProcessor() && (verbose > 0) ) { Spacer(std::cout, level); std::cout << "MCMultiGrid: Initial rhs = " << norm_rhs << '\n'; std::cout << "MCMultiGrid: Initial error (error0) = " << error0 << '\n'; } if ( ParallelDescriptor::IOProcessor() && eps_rel < 1.0e-16 && eps_rel > 0 ) { std::cout << "MCMultiGrid: Tolerance " << eps_rel << " < 1e-16 is probably set too low" << '\n'; } // // Initialize correction to zero at this level (auto-filled at levels below) // (*cor[level]).setVal(0.0); // // Note: if eps_rel, eps_abs < 0 then that test is effectively bypassed. // int nit = 1; const Real new_error_0 = norm_rhs; //const Real norm_Lp = Lp.norm(0, level); for ( ; error > eps_abs && error > eps_rel*norm_rhs && nit <= maxiter; ++nit) { relax(*cor[level], *rhs[level], level, eps_rel, eps_abs, bc_mode); error = errorEstimate(level,bc_mode); if ( ParallelDescriptor::IOProcessor() && verbose > 1 ) { const Real rel_error = (error0 != 0) ? error/new_error_0 : 0; Spacer(std::cout, level); std::cout << "MCMultiGrid: Iteration " << nit << " error/error0 = " << rel_error << '\n'; } } Real run_time = (ParallelDescriptor::second() - strt_time); if ( verbose > 0 ) { if ( ParallelDescriptor::IOProcessor() ) { const Real rel_error = (error0 != 0) ? error/error0 : 0; Spacer(std::cout, level); std::cout << "MCMultiGrid: Final Iter. " << nit-1 << " error/error0 = " << rel_error; } if ( verbose > 1 ) { ParallelDescriptor::ReduceRealMax(run_time); if ( ParallelDescriptor::IOProcessor() ) std::cout << ", Solve time: " << run_time << '\n'; } } if ( ParallelDescriptor::IOProcessor() && (verbose > 0) ) { if ( error < eps_rel*norm_rhs ) { std::cout << " Converged res < eps_rel*bnorm\n"; } else if ( error < eps_abs ) { std::cout << " Converged res < eps_abs\n"; } } // // Omit ghost update since maybe not initialized in calling routine. // Add to boundary values stored in initialsolution. // _sol.copy(*cor[level]); _sol.plus(*initialsolution,0,_sol.nComp(),0); if ( error <= eps_rel*(norm_rhs) || error <= eps_abs ) returnVal = 1; // // Otherwise, failed to solve satisfactorily // return returnVal; }