void sLinsysRoot::dumpRhs(int proc, const char* nameToken, SimpleVector& rhs) { int n = rhs.length(); char szNumber[30]; string strBuffer=""; int iter = g_iterNumber; if(iter!=0 && iter!=2 && iter!=20 && iter!=25 && iter!=55) return; char ipmPhase[4]; if(g_iterNumber-iter>0) strcpy(ipmPhase, "co"); else strcpy(ipmPhase, "pr"); char szFilename[256]; sprintf(szFilename, "%s_%s_%d__%d.mat", nameToken,ipmPhase, n, iter); for(int i=0; i<n; i++) { sprintf(szNumber, "%22.16f ", rhs[i]); strBuffer += szNumber; } FILE* file = fopen(szFilename, "w"); assert(file); fwrite(strBuffer.c_str(), 1, strBuffer.length(), file); fclose(file); }
void sLinsys::addTermToSchurResidual(sData* prob, SimpleVector& res, SimpleVector& x) { assert(gOuterSolve<3 ); SparseGenMatrix& A = prob->getLocalA(); SparseGenMatrix& C = prob->getLocalC(); SparseGenMatrix& R = prob->getLocalCrossHessian(); int nxP, aux; A.getSize(aux,nxP); assert(aux==locmy); C.getSize(aux,nxP); assert(aux==locmz); R.getSize(aux,nxP); assert(aux==locnx); assert(nxP==x.length()); int N=locnx+locmy+locmz; SimpleVector y(N); //y.setToZero(); R.mult( 0.0,&y[0],1, 1.0,&x[0],1); A.mult( 0.0,&y[locnx],1, 1.0,&x[0],1); C.mult( 0.0,&y[locnx+locmy],1, 1.0,&x[0],1); //cout << "4 - y norm:" << y.twonorm() << endl; //printf("%g %g %g %g\n", y[locnx+locmy+0], y[locnx+locmy+1], y[locnx+locmy+2], y[locnx+locmy+3]); solver->solve(y); R.transMult(1.0,&res[0],1, 1.0,&y[0],1); A.transMult(1.0,&res[0],1, 1.0,&y[locnx],1); C.transMult(1.0,&res[0],1, 1.0,&y[locnx+locmy],1); }
int dumpRhs(SimpleVector& v) { rhsCount++; char _filename[1024]; sprintf(_filename, "rhsDump-%g-%d.dat", g_iterNumber, rhsCount); cout << "saving to: " << _filename << " ..."; ofstream fd(_filename); fd << scientific; fd.precision(16); fd << v.length() << endl; for(int i=0; i<v.length(); i++) fd << v[i] << " "; fd << endl; cout << "done!" << endl; return 0; }
// solve aggregation system void sLinsysRootAggregation::solveReduced( sData *prob, SimpleVector& sol_0) { int myRank; MPI_Comm_rank(mpiComm, &myRank); SimpleVector& aggRHS = (*redRhs); assert(locmz==0); // no inequalities assert(locnx+locmy==sol_0.length()); assert(aggRHS.length() >= sol_0.length() && aggRHS.length()>0); // build aggregation rhs calcPreCondKKTResids(prob); computeReducedRhs(); _joinRedRHS( aggRHS, *temp_rhs_x, *temp_rhs_y); // solve aggregation system solver->Dsolve(aggRHS); // reset 1st stage var _separateVars( aggRHS, *temp_rhs_x, *temp_rhs_y); _set1stStVar(aggRHS, sol_0); }
/* * y = alpha*Lni^T x + beta*y * * ( [ R 0 0 ] ) * y = beta*y + Di\Li\ ( [ A 0 0 ] * x ) * ( [ C 0 0 ] ) */ void sLinsys::LniTransMult(sData *prob, SimpleVector& y, double alpha, SimpleVector& x) { SparseGenMatrix& A = prob->getLocalA(); SparseGenMatrix& C = prob->getLocalC(); SparseGenMatrix& R = prob->getLocalCrossHessian(); int N, nx0; //get nx(parent) from the number of cols of A (or C). Use N as dummy A.getSize(N,nx0); // a mild assert assert(nx0 <= x.length()); N = locnx+locmy+locmz; assert( y.length() == N); //!memopt SimpleVector LniTx(N); // shortcuts SimpleVector x1(&x[0], nx0); SimpleVector LniTx1(&LniTx[0], locnx); SimpleVector LniTx2(&LniTx[locnx], locmy); SimpleVector LniTx3(&LniTx[locnx+locmy], locmz); LniTx1.setToZero(); R.mult(0.0, LniTx1, 1.0, x1); A.mult(0.0, LniTx2, 1.0, x1); C.mult(0.0, LniTx3, 1.0, x1); solver->Lsolve(LniTx); solver->Dsolve(LniTx); y.axpy(alpha,LniTx); }
void sLinsysRootAug::solveReduced( sData *prob, SimpleVector& b) { assert(locmz==0||gOuterSolve<3); int myRank; MPI_Comm_rank(mpiComm, &myRank); #ifdef TIMING t_start=MPI_Wtime(); troot_total=tchild_total=tcomm_total=0.0; #endif assert(locnx+locmy+locmz==b.length()); SimpleVector& r = (*redRhs); assert(r.length() <= b.length()); SparseGenMatrix& C = prob->getLocalD(); /////////////////////////////////////////////////////////////////////// // LOCAL SOLVE /////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////// // b=[b1;b2;b3] is a locnx+locmy+locmz vector // the new rhs should be // r = [b1-C^T*(zDiag)^{-1}*b3; b2] /////////////////////////////////////////////////////////////////////// r.copyFromArray(b.elements()); //will copy only as many elems as r has // aliases to parts (no mem allocations) SimpleVector r3(&r[locnx+locmy], locmz); //r3 is used as a temp //buffer for b3 SimpleVector r2(&r[locnx], locmy); SimpleVector r1(&r[0], locnx); /////////////////////////////////////////////////////////////////////// // compute r1 = b1 - C^T*(zDiag)^{-1}*b3 /////////////////////////////////////////////////////////////////////// if(locmz>0) { assert(r3.length() == zDiag->length()); r3.componentDiv(*zDiag);//r3 is a copy of b3 C.transMult(1.0, r1, -1.0, r3); } /////////////////////////////////////////////////////////////////////// // r contains all the stuff -> solve for it /////////////////////////////////////////////////////////////////////// if(gInnerSCsolve==0) { // Option 1. - solve with the factors solver->Dsolve(r); } else if(gInnerSCsolve==1) { // Option 2 - solve with the factors and perform iter. ref. solveWithIterRef(prob, r); } else { assert(gInnerSCsolve==2); // Option 3 - use the factors as preconditioner and apply BiCGStab solveWithBiCGStab(prob, r); } /////////////////////////////////////////////////////////////////////// // r is the sln to the reduced system // the sln to the aug system should be // x = [r1; r2; (zDiag)^{-1} * (b3-C*r1); /////////////////////////////////////////////////////////////////////// SimpleVector b1(&b[0], locnx); SimpleVector b2(&b[locnx], locmy); SimpleVector b3(&b[locnx+locmy], locmz); b1.copyFrom(r1); b2.copyFrom(r2); if(locmz>0) { C.mult(1.0, b3, -1.0, r1); b3.componentDiv(*zDiag); } #ifdef TIMING if(myRank==0 && gInnerSCsolve>=1) cout << "Root - Refin times: child=" << tchild_total << " root=" << troot_total << " comm=" << tcomm_total << " total=" << MPI_Wtime()-t_start << endl; #endif }
void sLinsysRootAug::solveWithBiCGStab( sData *prob, SimpleVector& b) { int n = b.length(); const int maxit=500; const double tol=1e-12, EPS=2e-16; double iter=0.0; int myRank; MPI_Comm_rank(mpiComm, &myRank); SimpleVector r(n); //residual SimpleVector s(n); //residual associated with half iterate SimpleVector rt(n); //shadow residual SimpleVector xmin(n); //minimal residual iterate SimpleVector x(n); //iterate SimpleVector xhalf(n); // half iterate of BiCG SimpleVector p(n),paux(n); SimpleVector v(n), t(n); int flag; double imin; double n2b; //norm of b double normr, normrmin; //norm of the residual and norm of residual at min-resid iterate double normr_act; double tolb; //relative tolerance double rho, omega, alpha; int stag, maxmsteps, maxstagsteps, moresteps; double relres; //maxit = n/2+1; ////////////////////////////////////////////////////////////////// // Problem Setup and intialization ////////////////////////////////////////////////////////////////// n2b = b.twonorm(); tolb = n2b*tol; #ifdef TIMING taux = MPI_Wtime(); #endif //initial guess x.copyFrom(b); solver->Dsolve(x); //initial residual r.copyFrom(b); #ifdef TIMING troot_total += (MPI_Wtime()-taux); taux = MPI_Wtime(); #endif //applyA(1.0, r, -1.0, x); SCmult(1.0,r, -1.0,x, prob); #ifdef TIMING tchild_total += (MPI_Wtime()-taux); #endif normr=r.twonorm(); #ifdef TIMING if(myRank==0) cout << "BiCG: initial rel resid: " << normr/n2b << endl; #endif if(normr<tolb) { //initial guess is good enough b.copyFrom(x); flag=0; return; } rt.copyFrom(r); //Shadow residual double* resvec = new double[2*maxit+1]; resvec[0] = normr; normrmin=normr; rho=1.0; omega=1.0; stag=0; maxmsteps=min(min(n/50, 5), n-maxit); maxstagsteps=3; moresteps=0; ////////////////////////////////////////////////////////////////// // loop over maxit iterations ////////////////////////////////////////////////////////////////// int ii=0; while(ii<maxit) { //cout << ii << " "; flag=-1; /////////////////////////////// // First half of the iterate /////////////////////////////// double rho1=rho; double beta; rho = rt.dotProductWith(r); //printf("rho=%g\n", rho); if(0.0==rho) { flag=4; break; } if(ii==0) p.copyFrom(r); else { beta = (rho/rho1)*(alpha/omega); if(beta==0.0) { flag=4; break; } //-------- p = r + beta*(p - omega*v) -------- p.axpy(-omega, v); p.scale(beta); p.axpy(1.0, r); } #ifdef TIMING taux = MPI_Wtime(); #endif //------ v = A*(M2inv*(M1inv*p)) and ph=M2inv*(M1inv*p) //first use v as temp storage //applyM1(0.0, v, 1.0, p); //applyM2(0.0, paux, 1.0, v); //applyA (0.0, v, 1.0, paux); paux.copyFrom(p); solver->solve(paux); #ifdef TIMING troot_total += (MPI_Wtime()-taux); #endif SCmult(0.0,v, 1.0,paux, prob); SimpleVector& ph = paux; double rtv = rt.dotProductWith(v); if(rtv==0.0) { flag=4; break; } alpha = rho/rtv; if(fabs(alpha)*ph.twonorm()<EPS*x.twonorm()) stag++; else stag=0; // xhalf = x + alpha*ph and the associated residual xhalf.copyFrom(x); xhalf.axpy( alpha, ph); s. copyFrom(r); s.axpy(-alpha, v); normr = s.twonorm(); normr_act = normr; resvec[2*ii] = normr; //printf("iter %g normr=%g\n", ii+0.5, normr); //-------- check for convergence in the middle of the iterate. -------- if(normr<=tolb || stag>=maxstagsteps || moresteps) { s.copyFrom(b); //applyA(1.0, s, -1.0, xhalf); // s=b-Ax SCmult(1.0,s, -1.0,xhalf, prob); normr_act = s.twonorm(); if(normr<=tolb) { //converged x.copyFrom(xhalf); flag = 0; iter = 0.5+ii; break; } else { if(stag>=maxstagsteps && moresteps==0) { stag=0; } moresteps++; if(moresteps>=maxmsteps) { //method stagnated flag=3; x.copyFrom(xhalf); break; } } } if(stag>=maxstagsteps) { flag=3; break;} //stagnation //update quantities related to minimal norm iterate if(normr_act<normrmin) { xmin.copyFrom(xhalf); normrmin=normr_act; imin=0.5+ii; } #ifdef TIMING taux = MPI_Wtime(); #endif /////////////////////////////// // Second half of the iterate ////////////////////////////// //applyM1(0.0, t, 1.0, s); //applyM1(s, stemp); //applyM2(0.0, paux, 1.0, t); //applyM2(stemp, sh); //applyA (0.0, t, 1.0, paux); //applyA (sh, t); //kkt->mult(0.0,paux, 1.0,s); paux.copyFrom(s); solver->solve(paux); #ifdef TIMING troot_total += (MPI_Wtime()-taux); #endif SCmult(0.0,t, 1.0,paux, prob); SimpleVector& sh = paux; double tt = t.dotProductWith(t); if(tt==0.0) { flag=4; break;} omega=t.dotProductWith(s); omega /= tt; if(fabs(omega)*sh.twonorm() < EPS*xhalf.twonorm()) stag++; else stag=0; x.copyFrom(xhalf); x.axpy( omega, sh); // x=xhalf+omega*sh r.copyFrom(s); r.axpy(-omega, t ); // r=s-omega*t normr = r.twonorm(); normr_act = normr; resvec[2*ii+1] = normr; //printf("stag=%d maxstagsteps=%d moresteps=%d normr=%g\n", // stag, maxstagsteps, moresteps, normr); //-------- check for convergence at the end of the iterate. -------- if(normr<=tolb || stag>=maxstagsteps || moresteps) { r.copyFrom(b); //applyA(1.0, r, -1.0, x); //r=b-Ax SCmult(1.0,r, -1.0,x, prob); normr_act=r.twonorm(); if(normr<=tolb) { flag = 0; iter = 1.0+ii; break; } else { if(stag>=maxstagsteps && moresteps==0) { stag = 0; } moresteps++; if(moresteps>=maxmsteps) { //method stagnated flag=3; break; } } } // end convergence check if(stag>=maxstagsteps) { flag=3; break;} //stagnation //update quantities related to minimal norm iterate if(normr_act<normrmin) { xmin.copyFrom(x); normrmin=normr_act; imin=1.5+ii; } //printf("iter %g normr=%g\n", ii+1.0, normr); /////////////////////////////// // Next iterate /////////////////////////////// ii++; }//end while if(ii>=maxit) { iter=ii; flag=10; } if(flag==0 || flag==-1) { relres = normr_act/n2b; #ifdef TIMING if(myRank==0) { printf("BiCGStab converged: normResid=%g relResid=%g iter=%g\n", normr_act, relres, iter); } #endif } else { if(ii==maxit) flag=10;//aaa //FAILURE -> return minimum resid-norm iterate r.copyFrom(b); //applyA(1.0, r, -1.0, xmin); SCmult(1.0,r, -1.0,xmin, prob); normr=r.twonorm(); if(normr >= normr_act) { x.copyFrom(xmin); //iter=imin; relres=normr/n2b; } else { iter=1.0+ii; relres = normr/n2b; } #ifdef TIMING if(myRank==0) { printf("BiCGStab did not NOT converged after %g[%d] iterations.\n", iter,ii); printf("\t - Error code %d\n\t - Act res=%g\n\t - Rel res=%g %g\n\n", flag, normr, relres, normrmin); } #endif } b.copyFrom(x); delete[] resvec; }
/** rxy = beta*rxy + alpha * SC * x */ void sLinsysRootAug::SCmult( double beta, SimpleVector& rxy, double alpha, SimpleVector& x, sData* prob) { //if (iAmDistrib) { //only one process substracts [ (Q+Dx0+C'*Dz0*C)*xx + A'*xy ] from r // [ A*xx ] int myRank; MPI_Comm_rank(mpiComm, &myRank); if(myRank==0) { //only this proc substracts from rxy rxy.scalarMult(beta); SparseSymMatrix& Q = prob->getLocalQ(); Q.mult(1.0,&rxy[0],1, alpha,&x[0],1); if(locmz>0) { SparseSymMatrix* CtDC_sp = dynamic_cast<SparseSymMatrix*>(CtDC); CtDC_sp->mult(1.0,&rxy[0],1, alpha,&x[0],1); } SimpleVector& xDiagv = dynamic_cast<SimpleVector&>(*xDiag); assert(xDiagv.length() == locnx); for(int i=0; i<xDiagv.length(); i++) rxy[i] += alpha*xDiagv[i]*x[i]; SparseGenMatrix& A=prob->getLocalB(); A.transMult(1.0,&rxy[0],1, alpha,&x[locnx],1); A.mult(1.0,&rxy[locnx],1, alpha,&x[0],1); } else { //other processes set r to zero since they will get this portion from process 0 rxy.setToZero(); } #ifdef TIMING taux=MPI_Wtime(); #endif // now children add [0 A^T C^T ]*inv(KKT)*[0;A;C] x SimpleVector xx(locnx); xx.copyFromArray(x.elements()); xx.scalarMult(-alpha); for(size_t it=0; it<children.size(); it++) { children[it]->addTermToSchurResidual(prob->children[it],rxy,xx); } #ifdef TIMING tchild_total += (MPI_Wtime()-taux); #endif //~done computing residual #ifdef TIMING taux=MPI_Wtime(); #endif //all-reduce residual if(iAmDistrib) { SimpleVector buf(rxy.length()); buf.setToZero(); //we use dx as the recv buffer MPI_Allreduce(rxy.elements(), buf.elements(), locnx+locmy, MPI_DOUBLE, MPI_SUM, mpiComm); rxy.copyFrom(buf); } #ifdef TIMING tcomm_total += (MPI_Wtime()-taux); #endif }