static void gen_vsldoi(DisasContext *ctx) { TCGv_ptr ra, rb, rd; TCGv_i32 sh; if (unlikely(!ctx->altivec_enabled)) { gen_exception(ctx, POWERPC_EXCP_VPU); return; } ra = gen_avr_ptr(rA(ctx->opcode)); rb = gen_avr_ptr(rB(ctx->opcode)); rd = gen_avr_ptr(rD(ctx->opcode)); sh = tcg_const_i32(VSH(ctx->opcode)); gen_helper_vsldoi (rd, ra, rb, sh); tcg_temp_free_ptr(ra); tcg_temp_free_ptr(rb); tcg_temp_free_ptr(rd); tcg_temp_free_i32(sh); }
Foam::coupledSolverPerformance Foam::coupledCgSolver::solve ( FieldField<Field, scalar>& x, const FieldField<Field, scalar>& b, const direction cmpt ) const { // Prepare solver performance coupledSolverPerformance solverPerf(typeName, fieldName()); FieldField<Field, scalar> wA(x.size()); FieldField<Field, scalar> rA(x.size()); forAll (x, rowI) { wA.set(rowI, new scalarField(x[rowI].size(), 0)); rA.set(rowI, new scalarField(x[rowI].size(), 0)); }
static void gen_vmrgew(DisasContext *ctx) { TCGv_i64 tmp; int VT, VA, VB; if (unlikely(!ctx->altivec_enabled)) { gen_exception(ctx, POWERPC_EXCP_VPU); return; } VT = rD(ctx->opcode); VA = rA(ctx->opcode); VB = rB(ctx->opcode); tmp = tcg_temp_new_i64(); tcg_gen_shri_i64(tmp, cpu_avrh[VB], 32); tcg_gen_deposit_i64(cpu_avrh[VT], cpu_avrh[VA], tmp, 0, 32); tcg_gen_shri_i64(tmp, cpu_avrl[VB], 32); tcg_gen_deposit_i64(cpu_avrl[VT], cpu_avrl[VA], tmp, 0, 32); tcg_temp_free_i64(tmp); }
Foam::lduMatrix::solverPerformance Foam::PCG::solve ( scalarField& psi, const scalarField& source, const direction cmpt ) const { // --- Setup class containing solver performance data lduMatrix::solverPerformance solverPerf ( lduMatrix::preconditioner::getName(controlDict_) + typeName, fieldName_ ); register label nCells = psi.size(); scalar* __restrict__ psiPtr = psi.begin(); scalarField pA(nCells); scalar* __restrict__ pAPtr = pA.begin(); scalarField wA(nCells); scalar* __restrict__ wAPtr = wA.begin(); scalar wArA = matrix_.great_; scalar wArAold = wArA; // --- Calculate A.psi matrix_.Amul(wA, psi, interfaceBouCoeffs_, interfaces_, cmpt); // --- Calculate initial residual field scalarField rA(source - wA); scalar* __restrict__ rAPtr = rA.begin(); // --- Calculate normalisation factor scalar normFactor = this->normFactor(psi, source, wA, pA); if (lduMatrix::debug >= 2) { Info<< " Normalisation factor = " << normFactor << endl; } // --- Calculate normalised residual norm solverPerf.initialResidual() = gSumMag(rA)/normFactor; solverPerf.finalResidual() = solverPerf.initialResidual(); // --- Check convergence, solve if not converged if (!solverPerf.checkConvergence(tolerance_, relTol_)) { // --- Select and construct the preconditioner autoPtr<lduMatrix::preconditioner> preconPtr = lduMatrix::preconditioner::New ( *this, controlDict_ ); // --- Solver iteration do { // --- Store previous wArA wArAold = wArA; // --- Precondition residual preconPtr->precondition(wA, rA, cmpt); // --- Update search directions: wArA = gSumProd(wA, rA); if (solverPerf.nIterations() == 0) { for (register label cell=0; cell<nCells; cell++) { pAPtr[cell] = wAPtr[cell]; } } else { scalar beta = wArA/wArAold; for (register label cell=0; cell<nCells; cell++) { pAPtr[cell] = wAPtr[cell] + beta*pAPtr[cell]; } } // --- Update preconditioned residual matrix_.Amul(wA, pA, interfaceBouCoeffs_, interfaces_, cmpt); scalar wApA = gSumProd(wA, pA); // --- Test for singularity if (solverPerf.checkSingularity(mag(wApA)/normFactor)) break; // --- Update solution and residual: scalar alpha = wArA/wApA; for (register label cell=0; cell<nCells; cell++) { psiPtr[cell] += alpha*pAPtr[cell]; rAPtr[cell] -= alpha*wAPtr[cell]; } solverPerf.finalResidual() = gSumMag(rA)/normFactor; } while ( solverPerf.nIterations()++ < maxIter_ && !(solverPerf.checkConvergence(tolerance_, relTol_)) ); } return solverPerf; }
typename Foam::SolverPerformance<Type> Foam::PCICG<Type, DType, LUType>::solve(Field<Type>& psi) const { word preconditionerName(this->controlDict_.lookup("preconditioner")); // --- Setup class containing solver performance data SolverPerformance<Type> solverPerf ( preconditionerName + typeName, this->fieldName_ ); label nCells = psi.size(); Type* __restrict__ psiPtr = psi.begin(); Field<Type> pA(nCells); Type* __restrict__ pAPtr = pA.begin(); Field<Type> wA(nCells); Type* __restrict__ wAPtr = wA.begin(); Type wArA = solverPerf.great_*pTraits<Type>::one; Type wArAold = wArA; // --- Calculate A.psi this->matrix_.Amul(wA, psi); // --- Calculate initial residual field Field<Type> rA(this->matrix_.source() - wA); Type* __restrict__ rAPtr = rA.begin(); // --- Calculate normalisation factor Type normFactor = this->normFactor(psi, wA, pA); if (LduMatrix<Type, DType, LUType>::debug >= 2) { Info<< " Normalisation factor = " << normFactor << endl; } // --- Calculate normalised residual norm solverPerf.initialResidual() = cmptDivide(gSumCmptMag(rA), normFactor); solverPerf.finalResidual() = solverPerf.initialResidual(); // --- Check convergence, solve if not converged if ( this->minIter_ > 0 || !solverPerf.checkConvergence(this->tolerance_, this->relTol_) ) { // --- Select and construct the preconditioner autoPtr<typename LduMatrix<Type, DType, LUType>::preconditioner> preconPtr = LduMatrix<Type, DType, LUType>::preconditioner::New ( *this, this->controlDict_ ); // --- Solver iteration do { // --- Store previous wArA wArAold = wArA; // --- Precondition residual preconPtr->precondition(wA, rA); // --- Update search directions: wArA = gSumCmptProd(wA, rA); if (solverPerf.nIterations() == 0) { for (label cell=0; cell<nCells; cell++) { pAPtr[cell] = wAPtr[cell]; } } else { Type beta = cmptDivide ( wArA, stabilise(wArAold, solverPerf.vsmall_) ); for (label cell=0; cell<nCells; cell++) { pAPtr[cell] = wAPtr[cell] + cmptMultiply(beta, pAPtr[cell]); } } // --- Update preconditioned residual this->matrix_.Amul(wA, pA); Type wApA = gSumCmptProd(wA, pA); // --- Test for singularity if ( solverPerf.checkSingularity ( cmptDivide(cmptMag(wApA), normFactor) ) ) { break; } // --- Update solution and residual: Type alpha = cmptDivide ( wArA, stabilise(wApA, solverPerf.vsmall_) ); for (label cell=0; cell<nCells; cell++) { psiPtr[cell] += cmptMultiply(alpha, pAPtr[cell]); rAPtr[cell] -= cmptMultiply(alpha, wAPtr[cell]); } solverPerf.finalResidual() = cmptDivide(gSumCmptMag(rA), normFactor); } while ( ( solverPerf.nIterations()++ < this->maxIter_ && !solverPerf.checkConvergence(this->tolerance_, this->relTol_) ) || solverPerf.nIterations() < this->minIter_ ); } return solverPerf; }
Foam::lduMatrix::solverPerformance Foam::paralution_PFGMRES::solve ( scalarField& psi, const scalarField& source, const direction cmpt ) const { word precond_name = lduMatrix::preconditioner::getName(controlDict_); double div = controlDict_.lookupOrDefault<double>("div", 1e+08); int basis = controlDict_.lookupOrDefault<int>("BasisSize", 30); bool accel = controlDict_.lookupOrDefault<bool>("useAccelerator", true); word mformat = controlDict_.lookupOrDefault<word>("MatrixFormat", "CSR"); word pformat = controlDict_.lookupOrDefault<word>("PrecondFormat", "CSR"); int ILUp = controlDict_.lookupOrDefault<int>("ILUp", 0); int ILUq = controlDict_.lookupOrDefault<int>("ILUq", 1); int MEp = controlDict_.lookupOrDefault<int>("MEp", 1); word LBPre = controlDict_.lookupOrDefault<word>("LastBlockPrecond", "paralution_Jacobi"); lduMatrix::solverPerformance solverPerf(typeName + '(' + precond_name + ')', fieldName_); register label nCells = psi.size(); scalarField pA(nCells); scalarField wA(nCells); // --- Calculate A.psi matrix_.Amul(wA, psi, interfaceBouCoeffs_, interfaces_, cmpt); // --- Calculate initial residual field scalarField rA(source - wA); // --- Calculate normalisation factor scalar normFactor = this->normFactor(psi, source, wA, pA); // --- Calculate normalised residual norm solverPerf.initialResidual() = gSumMag(rA)/normFactor; solverPerf.finalResidual() = solverPerf.initialResidual(); // TODO check why we cannot skip 1 iteration when initial residual < relTol_ or why initial residual actually // does not drop below relTol_ if (!solverPerf.checkConvergence(tolerance_, relTol_)) { paralution::_matrix_format mf = paralution::CSR; if (mformat == "CSR") mf = paralution::CSR; else if (mformat == "DIA") mf = paralution::DIA; else if (mformat == "HYB") mf = paralution::HYB; else if (mformat == "ELL") mf = paralution::ELL; else if (mformat == "MCSR") mf = paralution::MCSR; else if (mformat == "BCSR") mf = paralution::BCSR; else if (mformat == "COO") mf = paralution::COO; else if (mformat == "DENSE") mf = paralution::DENSE; paralution::init_paralution(); paralution::LocalVector<double> x; paralution::LocalVector<double> rhs; paralution::LocalMatrix<double> mat; paralution::FGMRES<paralution::LocalMatrix<double>, paralution::LocalVector<double>, double> ls; import_openfoam_matrix(matrix(), &mat); import_openfoam_vector(source, &rhs); import_openfoam_vector(psi, &x); ls.Clear(); if (accel) { mat.MoveToAccelerator(); rhs.MoveToAccelerator(); x.MoveToAccelerator(); } paralution::Preconditioner<paralution::LocalMatrix<double>, paralution::LocalVector<double>, double > *precond = NULL; precond = GetPreconditioner<double>(precond_name, LBPre, pformat, ILUp, ILUq, MEp); if (precond != NULL) ls.SetPreconditioner(*precond); ls.SetOperator(mat); ls.SetBasisSize(basis); ls.Verbose(0); ls.Init(tolerance_*normFactor, // abs relTol_, // rel div, // div maxIter_); // max iter ls.Build(); switch(mf) { case paralution::DENSE: mat.ConvertToDENSE(); break; case paralution::CSR: mat.ConvertToCSR(); break; case paralution::MCSR: mat.ConvertToMCSR(); break; case paralution::BCSR: mat.ConvertToBCSR(); break; case paralution::COO: mat.ConvertToCOO(); break; case paralution::DIA: mat.ConvertToDIA(); break; case paralution::ELL: mat.ConvertToELL(); break; case paralution::HYB: mat.ConvertToHYB(); break; } // mat.info(); ls.Solve(rhs, &x); export_openfoam_vector(x, &psi); solverPerf.finalResidual() = ls.GetCurrentResidual() / normFactor; // divide by normFactor, see lduMatrixSolver.C solverPerf.nIterations() = ls.GetIterationCount(); solverPerf.checkConvergence(tolerance_, relTol_); ls.Clear(); if (precond != NULL) { precond->Clear(); delete precond; } paralution::stop_paralution(); } return solverPerf; }
void Foam::DICSmoother::smooth ( scalarField& psi, const scalarField& source, const direction cmpt, const label nSweeps ) const { //add by Xiaow:begin Foam::Time::enterSec("DICSmoother"); //add by Xiaow:end const scalar* const __restrict__ rDPtr = rD_.begin(); const scalar* const __restrict__ upperPtr = matrix_.upper().begin(); const label* const __restrict__ uPtr = matrix_.lduAddr().upperAddr().begin(); const label* const __restrict__ lPtr = matrix_.lduAddr().lowerAddr().begin(); // Temporary storage for the residual scalarField rA(rD_.size()); scalar* __restrict__ rAPtr = rA.begin(); //add by Xiaow:begin Foam::label interid =Foam::Time::commProfiler_.enterIterSec(); //add by Xiaow:end for (label sweep=0; sweep<nSweeps; sweep++) { matrix_.residual ( rA, psi, source, interfaceBouCoeffs_, interfaces_, cmpt ); rA *= rD_; register label nFaces = matrix_.upper().size(); for (register label face=0; face<nFaces; face++) { register label u = uPtr[face]; rAPtr[u] -= rDPtr[u]*upperPtr[face]*rAPtr[lPtr[face]]; } register label nFacesM1 = nFaces - 1; for (register label face=nFacesM1; face>=0; face--) { register label l = lPtr[face]; rAPtr[l] -= rDPtr[l]*upperPtr[face]*rAPtr[uPtr[face]]; } psi += rA; //add by Xiaow:begin Foam::Time::commProfiler_.endSingleIter(); //add by Xiaow:end } //add by Xiaow:begin Foam::Time::leaveSec(); //add by Xiaow:end }
Foam::solverPerformance Foam::PBiCGStab::solve ( scalarField& psi, const scalarField& source, const direction cmpt ) const { // --- Setup class containing solver performance data solverPerformance solverPerf ( lduMatrix::preconditioner::getName(controlDict_) + typeName, fieldName_ ); const label nCells = psi.size(); scalar* __restrict__ psiPtr = psi.begin(); scalarField pA(nCells); scalar* __restrict__ pAPtr = pA.begin(); scalarField yA(nCells); scalar* __restrict__ yAPtr = yA.begin(); // --- Calculate A.psi matrix_.Amul(yA, psi, interfaceBouCoeffs_, interfaces_, cmpt); // --- Calculate initial residual field scalarField rA(source - yA); scalar* __restrict__ rAPtr = rA.begin(); // --- Calculate normalisation factor const scalar normFactor = this->normFactor(psi, source, yA, pA); if (lduMatrix::debug >= 2) { Info<< " Normalisation factor = " << normFactor << endl; } // --- Calculate normalised residual norm solverPerf.initialResidual() = gSumMag(rA, matrix().mesh().comm()) /normFactor; solverPerf.finalResidual() = solverPerf.initialResidual(); // --- Check convergence, solve if not converged if ( minIter_ > 0 || !solverPerf.checkConvergence(tolerance_, relTol_) ) { scalarField AyA(nCells); scalar* __restrict__ AyAPtr = AyA.begin(); scalarField sA(nCells); scalar* __restrict__ sAPtr = sA.begin(); scalarField zA(nCells); scalar* __restrict__ zAPtr = zA.begin(); scalarField tA(nCells); scalar* __restrict__ tAPtr = tA.begin(); // --- Store initial residual const scalarField rA0(rA); // --- Initial values not used scalar rA0rA = 0; scalar alpha = 0; scalar omega = 0; // --- Select and construct the preconditioner autoPtr<lduMatrix::preconditioner> preconPtr = lduMatrix::preconditioner::New ( *this, controlDict_ ); // --- Solver iteration do { // --- Store previous rA0rA const scalar rA0rAold = rA0rA; rA0rA = gSumProd(rA0, rA, matrix().mesh().comm()); // --- Test for singularity if (solverPerf.checkSingularity(mag(rA0rA))) { break; } // --- Update pA if (solverPerf.nIterations() == 0) { for (label cell=0; cell<nCells; cell++) { pAPtr[cell] = rAPtr[cell]; } } else { // --- Test for singularity if (solverPerf.checkSingularity(mag(omega))) { break; } const scalar beta = (rA0rA/rA0rAold)*(alpha/omega); for (label cell=0; cell<nCells; cell++) { pAPtr[cell] = rAPtr[cell] + beta*(pAPtr[cell] - omega*AyAPtr[cell]); } } // --- Precondition pA preconPtr->precondition(yA, pA, cmpt); // --- Calculate AyA matrix_.Amul(AyA, yA, interfaceBouCoeffs_, interfaces_, cmpt); const scalar rA0AyA = gSumProd(rA0, AyA, matrix().mesh().comm()); alpha = rA0rA/rA0AyA; // --- Calculate sA for (label cell=0; cell<nCells; cell++) { sAPtr[cell] = rAPtr[cell] - alpha*AyAPtr[cell]; } // --- Test sA for convergence solverPerf.finalResidual() = gSumMag(sA, matrix().mesh().comm())/normFactor; if (solverPerf.checkConvergence(tolerance_, relTol_)) { for (label cell=0; cell<nCells; cell++) { psiPtr[cell] += alpha*yAPtr[cell]; } solverPerf.nIterations()++; return solverPerf; } // --- Precondition sA preconPtr->precondition(zA, sA, cmpt); // --- Calculate tA matrix_.Amul(tA, zA, interfaceBouCoeffs_, interfaces_, cmpt); const scalar tAtA = gSumSqr(tA, matrix().mesh().comm()); // --- Calculate omega from tA and sA // (cheaper than using zA with preconditioned tA) omega = gSumProd(tA, sA, matrix().mesh().comm())/tAtA; // --- Update solution and residual for (label cell=0; cell<nCells; cell++) { psiPtr[cell] += alpha*yAPtr[cell] + omega*zAPtr[cell]; rAPtr[cell] = sAPtr[cell] - omega*tAPtr[cell]; } solverPerf.finalResidual() = gSumMag(rA, matrix().mesh().comm()) /normFactor; } while ( ( solverPerf.nIterations()++ < maxIter_ && !solverPerf.checkConvergence(tolerance_, relTol_) ) || solverPerf.nIterations() < minIter_ ); } return solverPerf; }
Foam::SolverPerformance<Type> Foam::PBiCCCG<Type, DType, LUType>::solve ( Field<Type>& psi ) const { word preconditionerName(this->controlDict_.lookup("preconditioner")); // --- Setup class containing solver performance data SolverPerformance<Type> solverPerf ( preconditionerName + typeName, this->fieldName_ ); label nCells = psi.size(); Type* __restrict__ psiPtr = psi.begin(); Field<Type> pA(nCells); Type* __restrict__ pAPtr = pA.begin(); Field<Type> pT(nCells, Zero); Type* __restrict__ pTPtr = pT.begin(); Field<Type> wA(nCells); Type* __restrict__ wAPtr = wA.begin(); Field<Type> wT(nCells); Type* __restrict__ wTPtr = wT.begin(); scalar wArT = 1e15; //this->matrix_.great_; scalar wArTold = wArT; // --- Calculate A.psi and T.psi this->matrix_.Amul(wA, psi); this->matrix_.Tmul(wT, psi); // --- Calculate initial residual and transpose residual fields Field<Type> rA(this->matrix_.source() - wA); Field<Type> rT(this->matrix_.source() - wT); Type* __restrict__ rAPtr = rA.begin(); Type* __restrict__ rTPtr = rT.begin(); // --- Calculate normalisation factor Type normFactor = this->normFactor(psi, wA, pA); if (LduMatrix<Type, DType, LUType>::debug >= 2) { Info<< " Normalisation factor = " << normFactor << endl; } // --- Calculate normalised residual norm solverPerf.initialResidual() = cmptDivide(gSumCmptMag(rA), normFactor); solverPerf.finalResidual() = solverPerf.initialResidual(); // --- Check convergence, solve if not converged if ( this->minIter_ > 0 || !solverPerf.checkConvergence(this->tolerance_, this->relTol_) ) { // --- Select and construct the preconditioner autoPtr<typename LduMatrix<Type, DType, LUType>::preconditioner> preconPtr = LduMatrix<Type, DType, LUType>::preconditioner::New ( *this, this->controlDict_ ); // --- Solver iteration do { // --- Store previous wArT wArTold = wArT; // --- Precondition residuals preconPtr->precondition(wA, rA); preconPtr->preconditionT(wT, rT); // --- Update search directions: wArT = gSumProd(wA, rT); if (solverPerf.nIterations() == 0) { for (label cell=0; cell<nCells; cell++) { pAPtr[cell] = wAPtr[cell]; pTPtr[cell] = wTPtr[cell]; } } else { scalar beta = wArT/wArTold; for (label cell=0; cell<nCells; cell++) { pAPtr[cell] = wAPtr[cell] + (beta* pAPtr[cell]); pTPtr[cell] = wTPtr[cell] + (beta* pTPtr[cell]); } } // --- Update preconditioned residuals this->matrix_.Amul(wA, pA); this->matrix_.Tmul(wT, pT); scalar wApT = gSumProd(wA, pT); // --- Test for singularity if ( solverPerf.checkSingularity ( cmptDivide(pTraits<Type>::one*mag(wApT), normFactor) ) ) { break; } // --- Update solution and residual: scalar alpha = wArT/wApT; for (label cell=0; cell<nCells; cell++) { psiPtr[cell] += (alpha* pAPtr[cell]); rAPtr[cell] -= (alpha* wAPtr[cell]); rTPtr[cell] -= (alpha* wTPtr[cell]); } solverPerf.finalResidual() = cmptDivide(gSumCmptMag(rA), normFactor); } while ( ( solverPerf.nIterations()++ < this->maxIter_ && !solverPerf.checkConvergence(this->tolerance_, this->relTol_) ) || solverPerf.nIterations() < this->minIter_ ); } return solverPerf; }
Foam::lduSolverPerformance Foam::PBiCG::solve ( scalarField& x, const scalarField& b, const direction cmpt ) const { // --- Setup class containing solver performance data lduSolverPerformance solverPerf ( lduMatrix::preconditioner::getName(dict()) + typeName, fieldName() ); register label nCells = x.size(); scalar* __restrict__ xPtr = x.begin(); scalarField pA(nCells); scalar* __restrict__ pAPtr = pA.begin(); scalarField pT(nCells, 0.0); scalar* __restrict__ pTPtr = pT.begin(); scalarField wA(nCells); scalar* __restrict__ wAPtr = wA.begin(); scalarField wT(nCells); scalar* __restrict__ wTPtr = wT.begin(); scalar wArT = matrix_.great_; scalar wArTold = wArT; // Calculate A.x and T.x matrix_.Amul(wA, x, coupleBouCoeffs_, interfaces_, cmpt); matrix_.Tmul(wT, x, coupleIntCoeffs_, interfaces_, cmpt); // Calculate initial residual and transpose residual fields scalarField rA(b - wA); scalarField rT(b - wT); scalar* __restrict__ rAPtr = rA.begin(); scalar* __restrict__ rTPtr = rT.begin(); // Calculate normalisation factor scalar normFactor = this->normFactor(x, b, wA, pA, cmpt); if (lduMatrix::debug >= 2) { Info<< " Normalisation factor = " << normFactor << endl; } // Calculate normalised residual norm solverPerf.initialResidual() = gSumMag(rA)/normFactor; solverPerf.finalResidual() = solverPerf.initialResidual(); // Check convergence, solve if not converged if (!stop(solverPerf)) { // Select and construct the preconditioner autoPtr<lduPreconditioner> preconPtr; preconPtr = lduPreconditioner::New ( matrix_, coupleBouCoeffs_, coupleIntCoeffs_, interfaces_, dict() ); // Solver iteration do { // Store previous wArT wArTold = wArT; // Precondition residuals preconPtr->precondition(wA, rA, cmpt); preconPtr->preconditionT(wT, rT, cmpt); // Update search directions: wArT = gSumProd(wA, rT); if (solverPerf.nIterations() == 0) { for (register label cell=0; cell<nCells; cell++) { pAPtr[cell] = wAPtr[cell]; pTPtr[cell] = wTPtr[cell]; } } else { scalar beta = wArT/wArTold; for (register label cell=0; cell<nCells; cell++) { pAPtr[cell] = wAPtr[cell] + beta*pAPtr[cell]; pTPtr[cell] = wTPtr[cell] + beta*pTPtr[cell]; } } // Update preconditioned residuals matrix_.Amul(wA, pA, coupleBouCoeffs_, interfaces_, cmpt); matrix_.Tmul(wT, pT, coupleIntCoeffs_, interfaces_, cmpt); scalar wApT = gSumProd(wA, pT); // Test for singularity if (solverPerf.checkSingularity(mag(wApT)/normFactor)) break; // Update solution and residual: scalar alpha = wArT/wApT; for (register label cell=0; cell<nCells; cell++) { xPtr[cell] += alpha*pAPtr[cell]; rAPtr[cell] -= alpha*wAPtr[cell]; rTPtr[cell] -= alpha*wTPtr[cell]; } solverPerf.finalResidual() = gSumMag(rA)/normFactor; solverPerf.nIterations()++; } while (!stop(solverPerf)); } return solverPerf; }
Foam::solverPerformance Foam::PBiCG::solve ( scalarField& psi, const scalarField& source, const direction cmpt ) const { // --- Setup class containing solver performance data solverPerformance solverPerf ( lduMatrix::preconditioner::getName(controlDict_) + typeName, fieldName_ ); label nCells = psi.size(); scalar* __restrict__ psiPtr = psi.begin(); scalarField pA(nCells); scalar* __restrict__ pAPtr = pA.begin(); scalarField pT(nCells, 0.0); scalar* __restrict__ pTPtr = pT.begin(); scalarField wA(nCells); scalar* __restrict__ wAPtr = wA.begin(); scalarField wT(nCells); scalar* __restrict__ wTPtr = wT.begin(); scalar wArT = solverPerf.great_; scalar wArTold = wArT; // --- Calculate A.psi and T.psi matrix_.Amul(wA, psi, interfaceBouCoeffs_, interfaces_, cmpt); matrix_.Tmul(wT, psi, interfaceIntCoeffs_, interfaces_, cmpt); // --- Calculate initial residual and transpose residual fields scalarField rA(source - wA); scalarField rT(source - wT); scalar* __restrict__ rAPtr = rA.begin(); scalar* __restrict__ rTPtr = rT.begin(); // --- Calculate normalisation factor scalar normFactor = this->normFactor(psi, source, wA, pA); if (lduMatrix::debug >= 2) { Info<< " Normalisation factor = " << normFactor << endl; } // --- Calculate normalised residual norm solverPerf.initialResidual() = gSumMag(rA, matrix().mesh().comm()) /normFactor; solverPerf.finalResidual() = solverPerf.initialResidual(); // --- Check convergence, solve if not converged if ( minIter_ > 0 || !solverPerf.checkConvergence(tolerance_, relTol_) ) { // --- Select and construct the preconditioner autoPtr<lduMatrix::preconditioner> preconPtr = lduMatrix::preconditioner::New ( *this, controlDict_ ); // --- Solver iteration do { // --- Store previous wArT wArTold = wArT; // --- Precondition residuals preconPtr->precondition(wA, rA, cmpt); preconPtr->preconditionT(wT, rT, cmpt); // --- Update search directions: wArT = gSumProd(wA, rT, matrix().mesh().comm()); if (solverPerf.nIterations() == 0) { for (label cell=0; cell<nCells; cell++) { pAPtr[cell] = wAPtr[cell]; pTPtr[cell] = wTPtr[cell]; } } else { scalar beta = wArT/wArTold; for (label cell=0; cell<nCells; cell++) { pAPtr[cell] = wAPtr[cell] + beta*pAPtr[cell]; pTPtr[cell] = wTPtr[cell] + beta*pTPtr[cell]; } } // --- Update preconditioned residuals matrix_.Amul(wA, pA, interfaceBouCoeffs_, interfaces_, cmpt); matrix_.Tmul(wT, pT, interfaceIntCoeffs_, interfaces_, cmpt); scalar wApT = gSumProd(wA, pT, matrix().mesh().comm()); // --- Test for singularity if (solverPerf.checkSingularity(mag(wApT)/normFactor)) { break; } // --- Update solution and residual: scalar alpha = wArT/wApT; for (label cell=0; cell<nCells; cell++) { psiPtr[cell] += alpha*pAPtr[cell]; rAPtr[cell] -= alpha*wAPtr[cell]; rTPtr[cell] -= alpha*wTPtr[cell]; } solverPerf.finalResidual() = gSumMag(rA, matrix().mesh().comm()) /normFactor; } while ( ( solverPerf.nIterations()++ < maxIter_ && !solverPerf.checkConvergence(tolerance_, relTol_) ) || solverPerf.nIterations() < minIter_ ); } return solverPerf; }
Foam::solverPerformance Foam::paralution_AMG::solve ( scalarField& psi, const scalarField& source, const direction cmpt ) const { word precond_name = lduMatrix::preconditioner::getName(controlDict_); double div = controlDict_.lookupOrDefault<double>("div", 1e+08); bool accel = controlDict_.lookupOrDefault<bool>("useAccelerator", true); word mformat = controlDict_.lookupOrDefault<word>("MatrixFormat", "CSR"); word pformat = controlDict_.lookupOrDefault<word>("PrecondFormat", "CSR"); word sformat = controlDict_.lookupOrDefault<word>("SmootherFormat", "CSR"); word solver_name = controlDict_.lookupOrDefault<word>("CoarseGridSolver", "CG"); word smoother_name = controlDict_.lookupOrDefault<word>("smoother", "paralution_MultiColoredGS"); int MEp = controlDict_.lookupOrDefault<int>("MEp", 1); word LBPre = controlDict_.lookupOrDefault<word>("LastBlockPrecond", "paralution_Jacobi"); int iterPreSmooth = controlDict_.lookupOrDefault<int>("nPreSweeps", 1); int iterPostSmooth = controlDict_.lookupOrDefault<int>("nPostSweeps", 2); double epsCoupling = controlDict_.lookupOrDefault<double>("couplingStrength", 0.01); int coarsestCells = controlDict_.lookupOrDefault<int>("nCellsInCoarsestLevel", 300); int ILUp = controlDict_.lookupOrDefault<int>("ILUp", 0); int ILUq = controlDict_.lookupOrDefault<int>("ILUq", 1); double relax = controlDict_.lookupOrDefault<double>("Relaxation", 1.0); double aggrrelax = controlDict_.lookupOrDefault<double>("AggrRelax", 2./3.); bool scaling = controlDict_.lookupOrDefault<bool>("scaleCorrection", true); word interp_name = controlDict_.lookupOrDefault<word>("InterpolationType", "SmoothedAggregation"); solverPerformance solverPerf(typeName + '(' + precond_name + ')', fieldName_); register label nCells = psi.size(); scalarField pA(nCells); scalarField wA(nCells); // --- Calculate A.psi matrix_.Amul(wA, psi, interfaceBouCoeffs_, interfaces_, cmpt); // --- Calculate initial residual field scalarField rA(source - wA); // --- Calculate normalisation factor scalar normFactor = this->normFactor(psi, source, wA, pA); // --- Calculate normalised residual norm solverPerf.initialResidual() = gSumMag(rA)/normFactor; solverPerf.finalResidual() = solverPerf.initialResidual(); if ( !solverPerf.checkConvergence(tolerance_, relTol_) ) { paralution::_matrix_format mf = paralution::CSR; if (mformat == "CSR") mf = paralution::CSR; else if (mformat == "DIA") mf = paralution::DIA; else if (mformat == "HYB") mf = paralution::HYB; else if (mformat == "ELL") mf = paralution::ELL; else if (mformat == "MCSR") mf = paralution::MCSR; else if (mformat == "BCSR") mf = paralution::BCSR; else if (mformat == "COO") mf = paralution::COO; else if (mformat == "DENSE") mf = paralution::DENSE; paralution::_interp ip = paralution::SmoothedAggregation; if (interp_name == "SmoothedAggregation") ip = paralution::SmoothedAggregation; else if (interp_name == "Aggregation") ip = paralution::Aggregation; paralution::LocalVector<double> x; paralution::LocalVector<double> rhs; paralution::LocalMatrix<double> mat; paralution::AMG<paralution::LocalMatrix<double>, paralution::LocalVector<double>, double> ls; paralution::import_openfoam_matrix(matrix(), &mat); paralution::import_openfoam_vector(source, &rhs); paralution::import_openfoam_vector(psi, &x); ls.SetOperator(mat); // coupling strength ls.SetCouplingStrength(epsCoupling); // number of unknowns on coarsest level ls.SetCoarsestLevel(coarsestCells); // interpolation type for grid transfer operators ls.SetInterpolation(ip); // Relaxation parameter for smoothed interpolation aggregation ls.SetInterpRelax(aggrrelax); // Manual smoothers ls.SetManualSmoothers(true); // Manual course grid solver ls.SetManualSolver(true); // grid transfer scaling ls.SetScaling(scaling); // operator format ls.SetOperatorFormat(mf); ls.SetSmootherPreIter(iterPreSmooth); ls.SetSmootherPostIter(iterPostSmooth); ls.BuildHierarchy(); int levels = ls.GetNumLevels(); // Smoother via preconditioned FixedPoint iteration paralution::IterativeLinearSolver<paralution::LocalMatrix<double>, paralution::LocalVector<double>, double > **fp = NULL; fp = new paralution::IterativeLinearSolver<paralution::LocalMatrix<double>, paralution::LocalVector<double>, double >*[levels-1]; paralution::Preconditioner<paralution::LocalMatrix<double>, paralution::LocalVector<double>, double > **sm = NULL; sm = new paralution::Preconditioner<paralution::LocalMatrix<double>, paralution::LocalVector<double>, double >*[levels-1]; for (int i=0; i<levels-1; ++i) { fp[i] = paralution::GetIterativeLinearSolver<double>("paralution_FixedPoint", relax); sm[i] = paralution::GetPreconditioner<double>(smoother_name, LBPre, sformat, ILUp, ILUq, MEp); fp[i]->SetPreconditioner(*sm[i]); fp[i]->Verbose(0); } // Coarse Grid Solver and its Preconditioner paralution::IterativeLinearSolver<paralution::LocalMatrix<double>, paralution::LocalVector<double>, double > *cgs = NULL; cgs = paralution::GetIterativeLinearSolver<double>(solver_name, relax); cgs->Verbose(0); paralution::Preconditioner<paralution::LocalMatrix<double>, paralution::LocalVector<double>, double > *cgp = NULL; cgp = paralution::GetPreconditioner<double>(precond_name, LBPre, pformat, ILUp, ILUq, MEp); if (cgp != NULL) cgs->SetPreconditioner(*cgp); ls.SetSmoother(fp); ls.SetSolver(*cgs); // Switch to L1 norm to be consistent with OpenFOAM solvers ls.SetResidualNorm(1); ls.Init(tolerance_*normFactor, // abs relTol_, // rel div, // div maxIter_); // max iter ls.Build(); if (accel) { mat.MoveToAccelerator(); rhs.MoveToAccelerator(); x.MoveToAccelerator(); ls.MoveToAccelerator(); } switch(mf) { case paralution::DENSE: mat.ConvertToDENSE(); break; case paralution::CSR: mat.ConvertToCSR(); break; case paralution::MCSR: mat.ConvertToMCSR(); break; case paralution::BCSR: mat.ConvertToBCSR(); break; case paralution::COO: mat.ConvertToCOO(); break; case paralution::DIA: mat.ConvertToDIA(); break; case paralution::ELL: mat.ConvertToELL(); break; case paralution::HYB: mat.ConvertToHYB(); break; } ls.Verbose(0); // Solve linear system ls.Solve(rhs, &x); paralution::export_openfoam_vector(x, &psi); solverPerf.finalResidual() = ls.GetCurrentResidual() / normFactor; // divide by normFactor, see lduMatrixSolver.C solverPerf.nIterations() = ls.GetIterationCount(); solverPerf.checkConvergence(tolerance_, relTol_); // Clear MultiGrid object ls.Clear(); // Free all structures for (int i=0; i<levels-1; ++i) { delete fp[i]; delete sm[i]; } cgs->Clear(); if (cgp != NULL) delete cgp; delete[] fp; delete[] sm; delete cgs; } return solverPerf; }
// Solve linear system using Gaussian elimination // This method changes the content of the matrix mpA Vector LinearSystem::Solve() { Vector m(mSize); //See description in Appendix A Vector solution(mSize); // We introduce references to make the syntax readable Matrix& rA = *mpA; //NB not reference in published version Vector& rb = *mpb; //NB not reference in published version // forward sweep of Gaussian elimination for (int k=0; k<mSize-1; k++) { // see if pivoting is necessary double max = 0.0; int row = -1; for (int i=k; i<mSize; i++) { if (fabs(rA(i+1,k+1)) > max) { row = i; max=fabs(rA(i+1,k+1)); //NB bug in published version } } assert(row >= 0); //NB bug in published version // pivot if necessary if (row != k) { // swap matrix rows k+1 with row+1 for (int i=0; i<mSize; i++) { double temp = rA(k+1,i+1); rA(k+1,i+1) = rA(row+1,i+1); rA(row+1,i+1) = temp; } // swap vector entries k+1 with row+1 double temp = rb(k+1); rb(k+1) = rb(row+1); rb(row+1) = temp; } // create zeros in lower part of column k for (int i=k+1; i<mSize; i++) { m(i+1) = rA(i+1,k+1)/rA(k+1,k+1); for (int j=k; j<mSize; j++) { rA(i+1,j+1) -= rA(k+1,j+1)*m(i+1); } rb(i+1) -= rb(k+1)*m(i+1); } } // back substitution for (int i=mSize-1; i>-1; i--) { solution(i+1) = rb(i+1); for (int j=i+1; j<mSize; j++) { solution(i+1) -= rA(i+1,j+1)*solution(j+1); } solution(i+1) /= rA(i+1,i+1); } return solution; }
Foam::SolverPerformance<Type> Foam::PBiCCCG<Type, DType, LUType>::solve ( gpuField<Type>& psi ) const { word preconditionerName(this->controlDict_.lookup("preconditioner")); // --- Setup class containing solver performance data SolverPerformance<Type> solverPerf ( preconditionerName + typeName, this->fieldName_ ); register label nCells = psi.size(); gpuField<Type> pA(nCells); gpuField<Type> pT(nCells, pTraits<Type>::zero); gpuField<Type> wA(nCells); gpuField<Type> wT(nCells); scalar wArT = 1e15; //this->matrix_.great_; scalar wArTold = wArT; // --- Calculate A.psi and T.psi this->matrix_.Amul(wA, psi); this->matrix_.Tmul(wT, psi); // --- Calculate initial residual and transpose residual fields gpuField<Type> rA(this->matrix_.source() - wA); gpuField<Type> rT(this->matrix_.source() - wT); // --- Calculate normalisation factor Type normFactor = this->normFactor(psi, wA, pA); if (LduMatrix<Type, DType, LUType>::debug >= 2) { Info<< " Normalisation factor = " << normFactor << endl; } // --- Calculate normalised residual norm solverPerf.initialResidual() = cmptDivide(gSumCmptMag(rA), normFactor); solverPerf.finalResidual() = solverPerf.initialResidual(); // --- Check convergence, solve if not converged if ( this->minIter_ > 0 || !solverPerf.checkConvergence(this->tolerance_, this->relTol_) ) { // --- Select and construct the preconditioner autoPtr<typename LduMatrix<Type, DType, LUType>::preconditioner> preconPtr = LduMatrix<Type, DType, LUType>::preconditioner::New ( *this, this->controlDict_ ); // --- Solver iteration do { // --- Store previous wArT wArTold = wArT; // --- Precondition residuals preconPtr->precondition(wA, rA); preconPtr->preconditionT(wT, rT); // --- Update search directions: wArT = gSumProd(wA, rT); if (solverPerf.nIterations() == 0) { thrust::copy(wA.begin(),wA.end(),pA.begin()); thrust::copy(wT.begin(),wT.end(),pT.begin()); } else { scalar beta = wArT/wArTold; thrust::transform ( wA.begin(), wA.end(), thrust::make_transform_iterator ( pA.begin(), multiplyOperatorSFFunctor<scalar,Type,Type>(beta) ), pA.begin(), addOperatorFunctor<Type,Type,Type>() ); thrust::transform ( wT.begin(), wT.end(), thrust::make_transform_iterator ( pT.begin(), multiplyOperatorSFFunctor<scalar,Type,Type>(beta) ), pT.begin(), addOperatorFunctor<Type,Type,Type>() ); } // --- Update preconditioned residuals this->matrix_.Amul(wA, pA); this->matrix_.Tmul(wT, pT); scalar wApT = gSumProd(wA, pT); // --- Test for singularity if ( solverPerf.checkSingularity ( cmptDivide(pTraits<Type>::one*mag(wApT), normFactor) ) ) { break; } // --- Update solution and residual: scalar alpha = wArT/wApT; thrust::transform ( psi.begin(), psi.end(), thrust::make_transform_iterator ( pA.begin(), multiplyOperatorSFFunctor<scalar,Type,Type>(alpha) ), psi.begin(), addOperatorFunctor<Type,Type,Type>() ); thrust::transform ( rA.begin(), rA.end(), thrust::make_transform_iterator ( wA.begin(), multiplyOperatorSFFunctor<scalar,Type,Type>(alpha) ), rA.begin(), subtractOperatorFunctor<Type,Type,Type>() ); thrust::transform ( rT.begin(), rT.end(), thrust::make_transform_iterator ( wT.begin(), multiplyOperatorSFFunctor<scalar,Type,Type>(alpha) ), rT.begin(), subtractOperatorFunctor<Type,Type,Type>() ); solverPerf.finalResidual() = cmptDivide(gSumCmptMag(rA), normFactor); } while ( ( solverPerf.nIterations()++ < this->maxIter_ && !solverPerf.checkConvergence(this->tolerance_, this->relTol_) ) || solverPerf.nIterations() < this->minIter_ ); } return solverPerf; }