/** evaluate everything before optimization */ void QMCCostFunctionSingle::checkConfigurations() { dG.resize(W.getTotalNum()); dL.resize(W.getTotalNum()); int numLocWalkers=W.getActiveWalkers(); Records.resize(numLocWalkers,6); typedef MCWalkerConfiguration::Walker_t Walker_t; MCWalkerConfiguration::iterator it(W.begin()); MCWalkerConfiguration::iterator it_end(W.end()); int nat = W.getTotalNum(); int iw=0; int totalElements=W.getTotalNum()*OHMMS_DIM; Etarget=0.0; while(it != it_end) { Walker_t& thisWalker(**it); //clean-up DataSet to save re-used values thisWalker.DataSet.clear(); //rewind the counter thisWalker.DataSet.rewind(); //MCWalkerConfiguraton::registerData add distance-table data W.registerData(thisWalker,thisWalker.DataSet); Return_t* saved=Records[iw]; #if defined(QMC_COMPLEX) app_error() << " Optimization is not working with complex wavefunctions yet" << endl; app_error() << " Needs to fix TrialWaveFunction::evaluateDeltaLog " << endl; Psi.evaluateDeltaLog(W, saved[LOGPSI_FIXED], saved[LOGPSI_FREE], dG, dL); thisWalker.DataSet.add(&(dG[0][0]),&(dG[0][0])+totalElements); #else Psi.evaluateDeltaLog(W, saved[LOGPSI_FIXED], saved[LOGPSI_FREE], thisWalker.Drift, dL); #endif thisWalker.DataSet.add(dL.first_address(),dL.last_address()); Etarget += saved[ENERGY_TOT] = H.evaluate(W); saved[ENERGY_FIXED] = H.getInvariantEnergy(); ++it; ++iw; } //Need to sum over the processors vector<Return_t> etemp(2); etemp[0]=Etarget; etemp[1]=static_cast<Return_t>(numLocWalkers); myComm->allreduce(etemp); Etarget = static_cast<Return_t>(etemp[0]/etemp[1]); NumSamples = static_cast<int>(etemp[1]); setTargetEnergy(Etarget); ReportCounter=0; }
/** evaluate everything before optimization */ void QMCCostFunctionOMP::checkConfigurations() { /* mmorales: Since there are cases when memory is an issue (too many dets), the use of a buffer is decoupled from the use of includeNonlocalH in the cost function. Without a buffer, everything is recalculated. Options: - "yes" or "all" : store everything - "minimum" : store orbitals and inverses only, recalculate dets FIX FIX FIX: right now, there is no way to include the nonlocalH in the cost function without using a buffer because evaluateLog needs to be called to get the inverse of psiM This implies that isOptimizable must be set to true, which is risky. Fix this somehow */ StoreDerivInfo=false; DerivStorageLevel=-1; if(usebuffer == "yes" || usebuffer == "all") { StoreDerivInfo=true; if(includeNonlocalH!="no") DerivStorageLevel=0; else DerivStorageLevel=1; app_log() <<"Using buffers for temporary storage in QMCCostFunction.\n" <<endl; } else if (usebuffer == "minimum") { StoreDerivInfo=true; // in this case the use of nonlocalH is irrelevant, since the same inf is enough for both cases DerivStorageLevel=2; app_log() <<"Using minimum storage for determinant evaluation. \n"; } else { if(includeNonlocalH!="no") { APP_ABORT("Need to enable the use of includeNonlocalH=='name' without a buffer."); } } int numW = 0; for(int i=0; i<wClones.size(); i++) numW += wClones[i]->getActiveWalkers(); app_log() <<"Memory usage: " <<endl; app_log() <<"Linear method (approx matrix usage: 4*N^2): " <<NumParams()*NumParams()*sizeof(QMCTraits::RealType)*4.0/1.0e6 <<" MB" <<endl; // assuming 4 matrices app_log() <<"Deriv,HDerivRecord: " <<numW*NumOptimizables*sizeof(QMCTraits::RealType)*3.0/1.0e6 <<" MB" <<endl; if(StoreDerivInfo) { MCWalkerConfiguration& dummy(*wClones[0]); long memorb=0,meminv=0,memdets=0,memorbs_only=0; Psi.memoryUsage_DataForDerivatives(dummy,memorbs_only,memorb,meminv,memdets); memorbs_only*=sizeof(QMCTraits::RealType); memorb*=sizeof(QMCTraits::RealType); meminv*=sizeof(QMCTraits::RealType); memdets*=sizeof(QMCTraits::RealType); app_log() <<"Buffer memory cost: MB/walker MB/total " <<endl; app_log() <<"Orbitals only: " <<memorbs_only/1.0e6 <<" " <<memorbs_only*numW/1.0e6 <<endl; app_log() <<"Orbitals + dervs: " <<memorb/1.0e6 <<" " <<memorb*numW/1.0e6 <<endl; app_log() <<"Inverse: " <<meminv/1.0e6 <<" " <<meminv*numW/1.0e6 <<endl; app_log() <<"Determinants: " <<memdets/1.0e6 <<" " <<memdets*numW/1.0e6 <<endl; } app_log().flush(); RealType et_tot=0.0; RealType e2_tot=0.0; #pragma omp parallel reduction(+:et_tot,e2_tot) { int ip = omp_get_thread_num(); MCWalkerConfiguration& wRef(*wClones[ip]); if (RecordsOnNode[ip] ==0) { RecordsOnNode[ip]=new Matrix<Return_t>; RecordsOnNode[ip]->resize(wRef.getActiveWalkers(),SUM_INDEX_SIZE); if (needGrads) { DerivRecords[ip]=new Matrix<Return_t>; DerivRecords[ip]->resize(wRef.getActiveWalkers(),NumOptimizables); HDerivRecords[ip]=new Matrix<Return_t>; HDerivRecords[ip]->resize(wRef.getActiveWalkers(),NumOptimizables); } } else if (RecordsOnNode[ip]->size1()!=wRef.getActiveWalkers()) { RecordsOnNode[ip]->resize(wRef.getActiveWalkers(),SUM_INDEX_SIZE); if (needGrads) { DerivRecords[ip]->resize(wRef.getActiveWalkers(),NumOptimizables); HDerivRecords[ip]->resize(wRef.getActiveWalkers(),NumOptimizables); } } QMCHamiltonianBase* nlpp = (includeNonlocalH =="no")? 0: hClones[ip]->getHamiltonian(includeNonlocalH.c_str()); //set the optimization mode for the trial wavefunction psiClones[ip]->startOptimization(); // synchronize the random number generator with the node (*MoverRng[ip]) = (*RngSaved[ip]); hClones[ip]->setRandomGenerator(MoverRng[ip]); //int nat = wRef.getTotalNum(); //int totalElements=W.getTotalNum()*OHMMS_DIM; typedef MCWalkerConfiguration::Walker_t Walker_t; Return_t e0=0.0; // Return_t ef=0.0; Return_t e2=0.0; for (int iw=0, iwg=wPerNode[ip]; iw<wRef.getActiveWalkers(); ++iw,++iwg) { ParticleSet::Walker_t& thisWalker(*wRef[iw]); wRef.R=thisWalker.R; wRef.update(); Return_t* restrict saved=(*RecordsOnNode[ip])[iw]; // Return_t logpsi(0); // psiClones[ip]->evaluateDeltaLog(wRef, saved[LOGPSI_FIXED], saved[LOGPSI_FREE], *dLogPsi[iwg],*d2LogPsi[iwg]); // buffer for MultiSlaterDet data // if((usebuffer=="yes")||(includeNonlocalH=="yes")) if(StoreDerivInfo) { psiClones[ip]->registerDataForDerivatives(wRef, thisWalker.DataSetForDerivatives,DerivStorageLevel); psiClones[ip]->evaluateDeltaLog(wRef, saved[LOGPSI_FIXED], saved[LOGPSI_FREE], *dLogPsi[iwg], *d2LogPsi[iwg], thisWalker.DataSetForDerivatives); // logpsi = saved[LOGPSI_FIXED] + saved[LOGPSI_FREE]; } else { psiClones[ip]->evaluateDeltaLog(wRef, saved[LOGPSI_FIXED], saved[LOGPSI_FREE], *dLogPsi[iwg], *d2LogPsi[iwg]); // logpsi = psiClones[ip]->evaluateLog(wRef); } // if(includeNonlocalH!="no") logpsi = saved[LOGPSI_FIXED] + saved[LOGPSI_FREE]; Return_t x= hClones[ip]->evaluate(wRef); e0 += saved[ENERGY_TOT] = x; e2 += x*x; saved[ENERGY_FIXED] = hClones[ip]->getLocalPotential(); if(nlpp) saved[ENERGY_FIXED] -= nlpp->Value; //if (includeNonlocalH!="no") // saved[ENERGY_FIXED] = hClones[ip]->getLocalPotential() - (*(hClones[ip]->getHamiltonian(includeNonlocalH.c_str()))).Value; //else // saved[ENERGY_FIXED] = hClones[ip]->getLocalPotential(); // ef += saved[ENERGY_FIXED]; saved[REWEIGHT]=thisWalker.Weight=1.0; // thisWalker.resetProperty(logpsi,psiClones[ip]->getPhase(),x); if (needGrads) { //allocate vector vector<Return_t> Dsaved(NumOptimizables,0.0); vector<Return_t> HDsaved(NumOptimizables,0.0); psiClones[ip]->evaluateDerivatives(wRef, OptVariablesForPsi, Dsaved, HDsaved); std::copy(Dsaved.begin(),Dsaved.end(),(*DerivRecords[ip])[iw]); std::copy(HDsaved.begin(),HDsaved.end(),(*HDerivRecords[ip])[iw]); } } //add them all using reduction et_tot+=e0; e2_tot+=e2; // #pragma omp atomic // eft_tot+=ef; } OptVariablesForPsi.setComputed(); // app_log() << " VMC Efavg = " << eft_tot/static_cast<Return_t>(wPerNode[NumThreads]) << endl; //Need to sum over the processors vector<Return_t> etemp(3); etemp[0]=et_tot; etemp[1]=static_cast<Return_t>(wPerNode[NumThreads]); etemp[2]=e2_tot; myComm->allreduce(etemp); Etarget = static_cast<Return_t>(etemp[0]/etemp[1]); NumSamples = static_cast<int>(etemp[1]); app_log() << " VMC Eavg = " << Etarget << endl; app_log() << " VMC Evar = " << etemp[2]/etemp[1]-Etarget*Etarget << endl; app_log() << " Total weights = " << etemp[1] << endl; app_log().flush(); setTargetEnergy(Etarget); ReportCounter=0; }
/************************************************************************* Internal working subroutine for bidiagonal decomposition *************************************************************************/ bool bidiagonalsvddecompositioninternal(ap::real_1d_array& d, ap::real_1d_array e, int n, bool isupper, bool isfractionalaccuracyrequired, ap::real_2d_array& u, int ustart, int nru, ap::real_2d_array& c, int cstart, int ncc, ap::real_2d_array& vt, int vstart, int ncvt) { bool result; int i; int idir; int isub; int iter; int j; int ll = 0; // Eliminate compiler warning. int lll; int m; int maxit; int oldll; int oldm; double abse; double abss; double cosl; double cosr; double cs; double eps; double f; double g; double h; double mu; double oldcs; double oldsn = 0.; // Eliminate compiler warning. double r; double shift; double sigmn; double sigmx; double sinl; double sinr; double sll; double smax; double smin; double sminl; double sminoa; double sn; double thresh; double tol; double tolmul; double unfl; ap::real_1d_array work0; ap::real_1d_array work1; ap::real_1d_array work2; ap::real_1d_array work3; int maxitr; bool matrixsplitflag; bool iterflag; ap::real_1d_array utemp; ap::real_1d_array vttemp; ap::real_1d_array ctemp; ap::real_1d_array etemp; bool fwddir; double tmp; int mm1; int mm0; bool bchangedir; int uend; int cend; int vend; result = true; if( n==0 ) { return result; } if( n==1 ) { if( d(1)<0 ) { d(1) = -d(1); if( ncvt>0 ) { ap::vmul(&vt(vstart, vstart), ap::vlen(vstart,vstart+ncvt-1), -1); } } return result; } // // init // work0.setbounds(1, n-1); work1.setbounds(1, n-1); work2.setbounds(1, n-1); work3.setbounds(1, n-1); uend = ustart+ap::maxint(nru-1, 0); vend = vstart+ap::maxint(ncvt-1, 0); cend = cstart+ap::maxint(ncc-1, 0); utemp.setbounds(ustart, uend); vttemp.setbounds(vstart, vend); ctemp.setbounds(cstart, cend); maxitr = 12; fwddir = true; // // resize E from N-1 to N // etemp.setbounds(1, n); for(i = 1; i <= n-1; i++) { etemp(i) = e(i); } e.setbounds(1, n); for(i = 1; i <= n-1; i++) { e(i) = etemp(i); } e(n) = 0; idir = 0; // // Get machine constants // eps = ap::machineepsilon; unfl = ap::minrealnumber; // // If matrix lower bidiagonal, rotate to be upper bidiagonal // by applying Givens rotations on the left // if( !isupper ) { for(i = 1; i <= n-1; i++) { generaterotation(d(i), e(i), cs, sn, r); d(i) = r; e(i) = sn*d(i+1); d(i+1) = cs*d(i+1); work0(i) = cs; work1(i) = sn; } // // Update singular vectors if desired // if( nru>0 ) { applyrotationsfromtheright(fwddir, ustart, uend, 1+ustart-1, n+ustart-1, work0, work1, u, utemp); } if( ncc>0 ) { applyrotationsfromtheleft(fwddir, 1+cstart-1, n+cstart-1, cstart, cend, work0, work1, c, ctemp); } } // // Compute singular values to relative accuracy TOL // (By setting TOL to be negative, algorithm will compute // singular values to absolute accuracy ABS(TOL)*norm(input matrix)) // tolmul = ap::maxreal(double(10), ap::minreal(double(100), pow(eps, -0.125))); tol = tolmul*eps; if( !isfractionalaccuracyrequired ) { tol = -tol; } // // Compute approximate maximum, minimum singular values // smax = 0; for(i = 1; i <= n; i++) { smax = ap::maxreal(smax, fabs(d(i))); } for(i = 1; i <= n-1; i++) { smax = ap::maxreal(smax, fabs(e(i))); } sminl = 0; if( tol>=0 ) { // // Relative accuracy desired // sminoa = fabs(d(1)); if( sminoa!=0 ) { mu = sminoa; for(i = 2; i <= n; i++) { mu = fabs(d(i))*(mu/(mu+fabs(e(i-1)))); sminoa = ap::minreal(sminoa, mu); if( sminoa==0 ) { break; } } } sminoa = sminoa/sqrt(double(n)); thresh = ap::maxreal(tol*sminoa, maxitr*n*n*unfl); } else { // // Absolute accuracy desired // thresh = ap::maxreal(fabs(tol)*smax, maxitr*n*n*unfl); } // // Prepare for main iteration loop for the singular values // (MAXIT is the maximum number of passes through the inner // loop permitted before nonconvergence signalled.) // maxit = maxitr*n*n; iter = 0; oldll = -1; oldm = -1; // // M points to last element of unconverged part of matrix // m = n; // // Begin main iteration loop // while(true) { // // Check for convergence or exceeding iteration count // if( m<=1 ) { break; } if( iter>maxit ) { result = false; return result; } // // Find diagonal block of matrix to work on // if( tol<0&&fabs(d(m))<=thresh ) { d(m) = 0; } smax = fabs(d(m)); smin = smax; matrixsplitflag = false; for(lll = 1; lll <= m-1; lll++) { ll = m-lll; abss = fabs(d(ll)); abse = fabs(e(ll)); if( tol<0&&abss<=thresh ) { d(ll) = 0; } if( abse<=thresh ) { matrixsplitflag = true; break; } smin = ap::minreal(smin, abss); smax = ap::maxreal(smax, ap::maxreal(abss, abse)); } if( !matrixsplitflag ) { ll = 0; } else { // // Matrix splits since E(LL) = 0 // e(ll) = 0; if( ll==m-1 ) { // // Convergence of bottom singular value, return to top of loop // m = m-1; continue; } } ll = ll+1; // // E(LL) through E(M-1) are nonzero, E(LL-1) is zero // if( ll==m-1 ) { // // 2 by 2 block, handle separately // svdv2x2(d(m-1), e(m-1), d(m), sigmn, sigmx, sinr, cosr, sinl, cosl); d(m-1) = sigmx; e(m-1) = 0; d(m) = sigmn; // // Compute singular vectors, if desired // if( ncvt>0 ) { mm0 = m+(vstart-1); mm1 = m-1+(vstart-1); ap::vmove(&vttemp(vstart), &vt(mm1, vstart), ap::vlen(vstart,vend), cosr); ap::vadd(&vttemp(vstart), &vt(mm0, vstart), ap::vlen(vstart,vend), sinr); ap::vmul(&vt(mm0, vstart), ap::vlen(vstart,vend), cosr); ap::vsub(&vt(mm0, vstart), &vt(mm1, vstart), ap::vlen(vstart,vend), sinr); ap::vmove(&vt(mm1, vstart), &vttemp(vstart), ap::vlen(vstart,vend)); } if( nru>0 ) { mm0 = m+ustart-1; mm1 = m-1+ustart-1; ap::vmove(utemp.getvector(ustart, uend), u.getcolumn(mm1, ustart, uend), cosl); ap::vadd(utemp.getvector(ustart, uend), u.getcolumn(mm0, ustart, uend), sinl); ap::vmul(u.getcolumn(mm0, ustart, uend), cosl); ap::vsub(u.getcolumn(mm0, ustart, uend), u.getcolumn(mm1, ustart, uend), sinl); ap::vmove(u.getcolumn(mm1, ustart, uend), utemp.getvector(ustart, uend)); } if( ncc>0 ) { mm0 = m+cstart-1; mm1 = m-1+cstart-1; ap::vmove(&ctemp(cstart), &c(mm1, cstart), ap::vlen(cstart,cend), cosl); ap::vadd(&ctemp(cstart), &c(mm0, cstart), ap::vlen(cstart,cend), sinl); ap::vmul(&c(mm0, cstart), ap::vlen(cstart,cend), cosl); ap::vsub(&c(mm0, cstart), &c(mm1, cstart), ap::vlen(cstart,cend), sinl); ap::vmove(&c(mm1, cstart), &ctemp(cstart), ap::vlen(cstart,cend)); } m = m-2; continue; } // // If working on new submatrix, choose shift direction // (from larger end diagonal element towards smaller) // // Previously was // "if (LL>OLDM) or (M<OLDLL) then" // fixed thanks to Michael Rolle < *****@*****.** > // Very strange that LAPACK still contains it. // bchangedir = false; if( idir==1&&fabs(d(ll))<1.0E-3*fabs(d(m)) ) { bchangedir = true; } if( idir==2&&fabs(d(m))<1.0E-3*fabs(d(ll)) ) { bchangedir = true; } if( ll!=oldll||m!=oldm||bchangedir ) { if( fabs(d(ll))>=fabs(d(m)) ) { // // Chase bulge from top (big end) to bottom (small end) // idir = 1; } else { // // Chase bulge from bottom (big end) to top (small end) // idir = 2; } } // // Apply convergence tests // if( idir==1 ) { // // Run convergence test in forward direction // First apply standard test to bottom of matrix // if( (fabs(e(m-1))<=fabs(tol)*fabs(d(m)))||(tol<0&&fabs(e(m-1))<=thresh) ) { e(m-1) = 0; continue; } if( tol>=0 ) { // // If relative accuracy desired, // apply convergence criterion forward // mu = fabs(d(ll)); sminl = mu; iterflag = false; for(lll = ll; lll <= m-1; lll++) { if( fabs(e(lll))<=tol*mu ) { e(lll) = 0; iterflag = true; break; } mu = fabs(d(lll+1))*(mu/(mu+fabs(e(lll)))); sminl = ap::minreal(sminl, mu); } if( iterflag ) { continue; } } } else { // // Run convergence test in backward direction // First apply standard test to top of matrix // if( (fabs(e(ll))<=fabs(tol)*fabs(d(ll)))||(tol<0&&fabs(e(ll))<=thresh) ) { e(ll) = 0; continue; } if( tol>=0 ) { // // If relative accuracy desired, // apply convergence criterion backward // mu = fabs(d(m)); sminl = mu; iterflag = false; for(lll = m-1; lll >= ll; lll--) { if( fabs(e(lll))<=tol*mu ) { e(lll) = 0; iterflag = true; break; } mu = fabs(d(lll))*(mu/(mu+fabs(e(lll)))); sminl = ap::minreal(sminl, mu); } if( iterflag ) { continue; } } } oldll = ll; oldm = m; // // Compute shift. First, test if shifting would ruin relative // accuracy, and if so set the shift to zero. // if( tol>=0&&n*tol*(sminl/smax)<=ap::maxreal(eps, 0.01*tol) ) { // // Use a zero shift to avoid loss of relative accuracy // shift = 0; } else { // // Compute the shift from 2-by-2 block at end of matrix // if( idir==1 ) { sll = fabs(d(ll)); svd2x2(d(m-1), e(m-1), d(m), shift, r); } else { sll = fabs(d(m)); svd2x2(d(ll), e(ll), d(ll+1), shift, r); } // // Test if shift negligible, and if so set to zero // if( sll>0 ) { if( ap::sqr(shift/sll)<eps ) { shift = 0; } } } // // Increment iteration count // iter = iter+m-ll; // // If SHIFT = 0, do simplified QR iteration // if( shift==0 ) { if( idir==1 ) { // // Chase bulge from top to bottom // Save cosines and sines for later singular vector updates // cs = 1; oldcs = 1; for(i = ll; i <= m-1; i++) { generaterotation(d(i)*cs, e(i), cs, sn, r); if( i>ll ) { e(i-1) = oldsn*r; } generaterotation(oldcs*r, d(i+1)*sn, oldcs, oldsn, tmp); d(i) = tmp; work0(i-ll+1) = cs; work1(i-ll+1) = sn; work2(i-ll+1) = oldcs; work3(i-ll+1) = oldsn; } h = d(m)*cs; d(m) = h*oldcs; e(m-1) = h*oldsn; // // Update singular vectors // if( ncvt>0 ) { applyrotationsfromtheleft(fwddir, ll+vstart-1, m+vstart-1, vstart, vend, work0, work1, vt, vttemp); } if( nru>0 ) { applyrotationsfromtheright(fwddir, ustart, uend, ll+ustart-1, m+ustart-1, work2, work3, u, utemp); } if( ncc>0 ) { applyrotationsfromtheleft(fwddir, ll+cstart-1, m+cstart-1, cstart, cend, work2, work3, c, ctemp); } // // Test convergence // if( fabs(e(m-1))<=thresh ) { e(m-1) = 0; } } else { // // Chase bulge from bottom to top // Save cosines and sines for later singular vector updates // cs = 1; oldcs = 1; for(i = m; i >= ll+1; i--) { generaterotation(d(i)*cs, e(i-1), cs, sn, r); if( i<m ) { e(i) = oldsn*r; } generaterotation(oldcs*r, d(i-1)*sn, oldcs, oldsn, tmp); d(i) = tmp; work0(i-ll) = cs; work1(i-ll) = -sn; work2(i-ll) = oldcs; work3(i-ll) = -oldsn; } h = d(ll)*cs; d(ll) = h*oldcs; e(ll) = h*oldsn; // // Update singular vectors // if( ncvt>0 ) { applyrotationsfromtheleft(!fwddir, ll+vstart-1, m+vstart-1, vstart, vend, work2, work3, vt, vttemp); } if( nru>0 ) { applyrotationsfromtheright(!fwddir, ustart, uend, ll+ustart-1, m+ustart-1, work0, work1, u, utemp); } if( ncc>0 ) { applyrotationsfromtheleft(!fwddir, ll+cstart-1, m+cstart-1, cstart, cend, work0, work1, c, ctemp); } // // Test convergence // if( fabs(e(ll))<=thresh ) { e(ll) = 0; } } } else { // // Use nonzero shift // if( idir==1 ) { // // Chase bulge from top to bottom // Save cosines and sines for later singular vector updates // f = (fabs(d(ll))-shift)*(extsignbdsqr(double(1), d(ll))+shift/d(ll)); g = e(ll); for(i = ll; i <= m-1; i++) { generaterotation(f, g, cosr, sinr, r); if( i>ll ) { e(i-1) = r; } f = cosr*d(i)+sinr*e(i); e(i) = cosr*e(i)-sinr*d(i); g = sinr*d(i+1); d(i+1) = cosr*d(i+1); generaterotation(f, g, cosl, sinl, r); d(i) = r; f = cosl*e(i)+sinl*d(i+1); d(i+1) = cosl*d(i+1)-sinl*e(i); if( i<m-1 ) { g = sinl*e(i+1); e(i+1) = cosl*e(i+1); } work0(i-ll+1) = cosr; work1(i-ll+1) = sinr; work2(i-ll+1) = cosl; work3(i-ll+1) = sinl; } e(m-1) = f; // // Update singular vectors // if( ncvt>0 ) { applyrotationsfromtheleft(fwddir, ll+vstart-1, m+vstart-1, vstart, vend, work0, work1, vt, vttemp); } if( nru>0 ) { applyrotationsfromtheright(fwddir, ustart, uend, ll+ustart-1, m+ustart-1, work2, work3, u, utemp); } if( ncc>0 ) { applyrotationsfromtheleft(fwddir, ll+cstart-1, m+cstart-1, cstart, cend, work2, work3, c, ctemp); } // // Test convergence // if( fabs(e(m-1))<=thresh ) { e(m-1) = 0; } } else { // // Chase bulge from bottom to top // Save cosines and sines for later singular vector updates // f = (fabs(d(m))-shift)*(extsignbdsqr(double(1), d(m))+shift/d(m)); g = e(m-1); for(i = m; i >= ll+1; i--) { generaterotation(f, g, cosr, sinr, r); if( i<m ) { e(i) = r; } f = cosr*d(i)+sinr*e(i-1); e(i-1) = cosr*e(i-1)-sinr*d(i); g = sinr*d(i-1); d(i-1) = cosr*d(i-1); generaterotation(f, g, cosl, sinl, r); d(i) = r; f = cosl*e(i-1)+sinl*d(i-1); d(i-1) = cosl*d(i-1)-sinl*e(i-1); if( i>ll+1 ) { g = sinl*e(i-2); e(i-2) = cosl*e(i-2); } work0(i-ll) = cosr; work1(i-ll) = -sinr; work2(i-ll) = cosl; work3(i-ll) = -sinl; } e(ll) = f; // // Test convergence // if( fabs(e(ll))<=thresh ) { e(ll) = 0; } // // Update singular vectors if desired // if( ncvt>0 ) { applyrotationsfromtheleft(!fwddir, ll+vstart-1, m+vstart-1, vstart, vend, work2, work3, vt, vttemp); } if( nru>0 ) { applyrotationsfromtheright(!fwddir, ustart, uend, ll+ustart-1, m+ustart-1, work0, work1, u, utemp); } if( ncc>0 ) { applyrotationsfromtheleft(!fwddir, ll+cstart-1, m+cstart-1, cstart, cend, work0, work1, c, ctemp); } } } // // QR iteration finished, go back and check convergence // continue; } // // All singular values converged, so make them positive // for(i = 1; i <= n; i++) { if( d(i)<0 ) { d(i) = -d(i); // // Change sign of singular vectors, if desired // if( ncvt>0 ) { ap::vmul(&vt(i+vstart-1, vstart), ap::vlen(vstart,vend), -1); } } } // // Sort the singular values into decreasing order (insertion sort on // singular values, but only one transposition per singular vector) // for(i = 1; i <= n-1; i++) { // // Scan for smallest D(I) // isub = 1; smin = d(1); for(j = 2; j <= n+1-i; j++) { if( d(j)<=smin ) { isub = j; smin = d(j); } } if( isub!=n+1-i ) { // // Swap singular values and vectors // d(isub) = d(n+1-i); d(n+1-i) = smin; if( ncvt>0 ) { j = n+1-i; ap::vmove(&vttemp(vstart), &vt(isub+vstart-1, vstart), ap::vlen(vstart,vend)); ap::vmove(&vt(isub+vstart-1, vstart), &vt(j+vstart-1, vstart), ap::vlen(vstart,vend)); ap::vmove(&vt(j+vstart-1, vstart), &vttemp(vstart), ap::vlen(vstart,vend)); } if( nru>0 ) { j = n+1-i; ap::vmove(utemp.getvector(ustart, uend), u.getcolumn(isub+ustart-1, ustart, uend)); ap::vmove(u.getcolumn(isub+ustart-1, ustart, uend), u.getcolumn(j+ustart-1, ustart, uend)); ap::vmove(u.getcolumn(j+ustart-1, ustart, uend), utemp.getvector(ustart, uend)); } if( ncc>0 ) { j = n+1-i; ap::vmove(&ctemp(cstart), &c(isub+cstart-1, cstart), ap::vlen(cstart,cend)); ap::vmove(&c(isub+cstart-1, cstart), &c(j+cstart-1, cstart), ap::vlen(cstart,cend)); ap::vmove(&c(j+cstart-1, cstart), &ctemp(cstart), ap::vlen(cstart,cend)); } } } return result; }