hsVector3 hsMatrix44::RemoveScale() { hsVector3 xCol(fMap[0][0], fMap[0][1], fMap[0][2]); float xLen = xCol.Magnitude(); hsVector3 yCol(fMap[1][0], fMap[1][1], fMap[1][2]); float yLen = yCol.Magnitude(); hsVector3 zCol(fMap[2][0], fMap[2][1], fMap[2][2]); float zLen = zCol.Magnitude(); fMap[0][0] /= xLen; fMap[0][1] /= xLen; fMap[0][2] /= xLen; fMap[1][0] /= yLen; fMap[1][1] /= yLen; fMap[1][2] /= yLen; fMap[2][0] /= zLen; fMap[2][1] /= zLen; fMap[2][2] /= zLen; hsVector3 oldScale(xLen, yLen, zLen); return oldScale; }
bool DMCcuda::run() { bool NLmove = NonLocalMove == "yes"; bool scaleweight = ScaleWeight == "yes"; if (NLmove) app_log() << " Using Casula nonlocal moves in DMCcuda.\n"; if (scaleweight) app_log() << " Scaling weight per Umrigar/Nightengale.\n"; resetRun(); Mover->MaxAge = 1; IndexType block = 0; IndexType nAcceptTot = 0; IndexType nRejectTot = 0; int nat = W.getTotalNum(); int nw = W.getActiveWalkers(); vector<RealType> LocalEnergy(nw), LocalEnergyOld(nw), oldScale(nw), newScale(nw); vector<PosType> delpos(nw); vector<PosType> dr(nw); vector<PosType> newpos(nw); vector<ValueType> ratios(nw), rplus(nw), rminus(nw), R2prop(nw), R2acc(nw); vector<PosType> oldG(nw), newG(nw); vector<ValueType> oldL(nw), newL(nw); vector<Walker_t*> accepted(nw); Matrix<ValueType> lapl(nw, nat); Matrix<GradType> grad(nw, nat); vector<ValueType> V2(nw), V2bar(nw); vector<vector<NonLocalData> > Txy(nw); for (int iw=0; iw<nw; iw++) W[iw]->Weight = 1.0; do { IndexType step = 0; nAccept = nReject = 0; Estimators->startBlock(nSteps); do { step++; CurrentStep++; nw = W.getActiveWalkers(); ResizeTimer.start(); LocalEnergy.resize(nw); oldScale.resize(nw); newScale.resize(nw); delpos.resize(nw); dr.resize(nw); newpos.resize(nw); ratios.resize(nw); rplus.resize(nw); rminus.resize(nw); oldG.resize(nw); newG.resize(nw); oldL.resize(nw); newL.resize(nw); accepted.resize(nw); lapl.resize(nw, nat); grad.resize(nw, nat); R2prop.resize(nw,0.0); R2acc.resize(nw,0.0); V2.resize(nw,0.0); V2bar.resize(nw,0.0); W.updateLists_GPU(); ResizeTimer.stop(); if (NLmove) { Txy.resize(nw); for (int iw=0; iw<nw; iw++) { Txy[iw].clear(); Txy[iw].push_back(NonLocalData(-1, 1.0, PosType())); } } for (int iw=0; iw<nw; iw++) W[iw]->Age++; DriftDiffuseTimer.start(); for(int iat=0; iat<nat; iat++) { Psi.getGradient (W, iat, oldG); //create a 3N-Dimensional Gaussian with variance=1 makeGaussRandomWithEngine(delpos,Random); for(int iw=0; iw<nw; iw++) { delpos[iw] *= m_sqrttau; oldScale[iw] = getDriftScale(m_tauovermass,oldG[iw]); dr[iw] = delpos[iw] + (oldScale[iw]*oldG[iw]); newpos[iw]=W[iw]->R[iat] + dr[iw]; ratios[iw] = 1.0; R2prop[iw] += dot(delpos[iw], delpos[iw]); } W.proposeMove_GPU(newpos, iat); Psi.ratio(W,iat,ratios,newG, newL); accepted.clear(); vector<bool> acc(nw, false); for(int iw=0; iw<nw; ++iw) { PosType drOld = newpos[iw] - (W[iw]->R[iat] + oldScale[iw]*oldG[iw]); RealType logGf = -m_oneover2tau * dot(drOld, drOld); newScale[iw] = getDriftScale(m_tauovermass,newG[iw]); PosType drNew = (newpos[iw] + newScale[iw]*newG[iw]) - W[iw]->R[iat]; RealType logGb = -m_oneover2tau * dot(drNew, drNew); RealType x = logGb - logGf; RealType prob = ratios[iw]*ratios[iw]*std::exp(x); if(Random() < prob && ratios[iw] > 0.0) { accepted.push_back(W[iw]); nAccept++; W[iw]->R[iat] = newpos[iw]; W[iw]->Age = 0; acc[iw] = true; R2acc[iw] += dot(delpos[iw], delpos[iw]); V2[iw] += m_tauovermass * m_tauovermass * dot(newG[iw],newG[iw]); V2bar[iw] += newScale[iw] * newScale[iw] * dot(newG[iw],newG[iw]); } else { nReject++; V2[iw] += m_tauovermass * m_tauovermass * dot(oldG[iw],oldG[iw]); V2bar[iw] += oldScale[iw] * oldScale[iw] * dot(oldG[iw],oldG[iw]); } } W.acceptMove_GPU(acc); if (accepted.size()) Psi.update(accepted,iat); } DriftDiffuseTimer.stop(); // Psi.recompute(W, false); Psi.gradLapl(W, grad, lapl); HTimer.start(); if (NLmove) H.evaluate (W, LocalEnergy, Txy); else H.evaluate (W, LocalEnergy); HTimer.stop(); // for (int iw=0; iw<nw; iw++) { // branchEngine->clampEnergy(LocalEnergy[iw]); // W[iw]->getPropertyBase()[LOCALENERGY] = LocalEnergy[iw]; // } if (CurrentStep == 1) LocalEnergyOld = LocalEnergy; if (NLmove) { // Now, attempt nonlocal move accepted.clear(); vector<int> iatList; vector<PosType> accPos; for (int iw=0; iw<nw; iw++) { /// HACK HACK HACK // if (LocalEnergy[iw] < -2300.0) { // cerr << "Walker " << iw << " has energy " // << LocalEnergy[iw] << endl;; // double maxWeight = 0.0; // int elMax = -1; // PosType posMax; // for (int j=1; j<Txy[iw].size(); j++) // if (std::fabs(Txy[iw][j].Weight) > std::fabs(maxWeight)) { // maxWeight = Txy[iw][j].Weight; // elMax = Txy[iw][j].PID; // posMax = W[iw]->R[elMax] + Txy[iw][j].Delta; // } // cerr << "Maximum weight is " << maxWeight << " for electron " // << elMax << " at position " << posMax << endl; // PosType unit = W.Lattice.toUnit(posMax); // unit[0] -= round(unit[0]); // unit[1] -= round(unit[1]); // unit[2] -= round(unit[2]); // cerr << "Reduced position = " << unit << endl; // } int ibar = NLop.selectMove(Random(), Txy[iw]); if (ibar) { accepted.push_back(W[iw]); int iat = Txy[iw][ibar].PID; iatList.push_back(iat); accPos.push_back(W[iw]->R[iat] + Txy[iw][ibar].Delta); } } if (accepted.size()) { Psi.ratio(accepted,iatList, accPos, ratios, newG, newL); Psi.update(accepted,iatList); for (int i=0; i<accepted.size(); i++) accepted[i]->R[iatList[i]] = accPos[i]; W.NLMove_GPU (accepted, accPos, iatList); // HACK HACK HACK // Recompute the kinetic energy // Psi.gradLapl(W, grad, lapl); // H.evaluate (W, LocalEnergy); //W.copyWalkersToGPU(); } } // Now branch BranchTimer.start(); for (int iw=0; iw<nw; iw++) { RealType v2=0.0, v2bar=0.0; for(int iat=0; iat<nat; iat++) { v2 += dot(W.G[iat],W.G[iat]); RealType newscale = getDriftScale(m_tauovermass,newG[iw]); v2 += m_tauovermass * m_tauovermass * dot(newG[iw],newG[iw]); v2bar += newscale * newscale * dot(newG[iw],newG[iw]); } //RealType scNew = std::sqrt(V2bar[iw] / V2[iw]); RealType scNew = std::sqrt(v2bar/v2); RealType scOld = (CurrentStep == 1) ? scNew : W[iw]->getPropertyBase()[DRIFTSCALE]; W[iw]->getPropertyBase()[DRIFTSCALE] = scNew; // fprintf (stderr, "iw = %d scNew = %1.8f scOld = %1.8f\n", iw, scNew, scOld); RealType tauRatio = R2acc[iw] / R2prop[iw]; if (tauRatio < 0.5) cerr << " tauRatio = " << tauRatio << endl; RealType taueff = m_tauovermass * tauRatio; if (scaleweight) W[iw]->Weight *= branchEngine->branchWeightTau (LocalEnergy[iw], LocalEnergyOld[iw], scNew, scOld, taueff); else W[iw]->Weight *= branchEngine->branchWeight (LocalEnergy[iw], LocalEnergyOld[iw]); W[iw]->getPropertyBase()[R2ACCEPTED] = R2acc[iw]; W[iw]->getPropertyBase()[R2PROPOSED] = R2prop[iw]; } Mover->setMultiplicity(W.begin(), W.end()); branchEngine->branch(CurrentStep,W); nw = W.getActiveWalkers(); LocalEnergyOld.resize(nw); for (int iw=0; iw<nw; iw++) LocalEnergyOld[iw] = W[iw]->getPropertyBase()[LOCALENERGY]; BranchTimer.stop(); } while(step<nSteps); Psi.recompute(W, true); double accept_ratio = (double)nAccept/(double)(nAccept+nReject); Estimators->stopBlock(accept_ratio); nAcceptTot += nAccept; nRejectTot += nReject; ++block; recordBlock(block); } while(block<nBlocks); //finalize a qmc section return finalize(block); }
bool VMCcuda::runWithDrift() { resetRun(); IndexType block = 0; IndexType nAcceptTot = 0; IndexType nRejectTot = 0; int nat = W.getTotalNum(); int nw = W.getActiveWalkers(); vector<RealType> LocalEnergy(nw), oldScale(nw), newScale(nw); vector<PosType> delpos(nw); vector<PosType> dr(nw); vector<PosType> newpos(nw); vector<ValueType> ratios(nw), rplus(nw), rminus(nw); vector<PosType> oldG(nw), newG(nw); vector<ValueType> oldL(nw), newL(nw); vector<Walker_t*> accepted(nw); Matrix<ValueType> lapl(nw, nat); Matrix<GradType> grad(nw, nat); // First, do warmup steps for (int step=0; step<myWarmupSteps; step++) { for(int iat=0; iat<nat; iat++) { Psi.getGradient (W, iat, oldG); //create a 3N-Dimensional Gaussian with variance=1 makeGaussRandomWithEngine(delpos,Random); for(int iw=0; iw<nw; iw++) { oldScale[iw] = getDriftScale(m_tauovermass,oldG[iw]); dr[iw] = (m_sqrttau*delpos[iw]) + (oldScale[iw]*oldG[iw]); newpos[iw]=W[iw]->R[iat] + dr[iw]; ratios[iw] = 1.0; } W.proposeMove_GPU(newpos, iat); Psi.ratio(W,iat,ratios,newG, newL); accepted.clear(); vector<bool> acc(nw, false); for(int iw=0; iw<nw; ++iw) { PosType drOld = newpos[iw] - (W[iw]->R[iat] + oldScale[iw]*oldG[iw]); RealType logGf = -m_oneover2tau * dot(drOld, drOld); newScale[iw] = getDriftScale(m_tauovermass,newG[iw]); PosType drNew = (newpos[iw] + newScale[iw]*newG[iw]) - W[iw]->R[iat]; RealType logGb = -m_oneover2tau * dot(drNew, drNew); RealType x = logGb - logGf; RealType prob = ratios[iw]*ratios[iw]*std::exp(x); if(Random() < prob) { accepted.push_back(W[iw]); nAccept++; W[iw]->R[iat] = newpos[iw]; acc[iw] = true; } else nReject++; } W.acceptMove_GPU(acc); if (accepted.size()) Psi.update(accepted,iat); } } // Now do data collection steps do { IndexType step = 0; nAccept = nReject = 0; Estimators->startBlock(nSteps); do { step++; CurrentStep++; for (int isub=0; isub<nSubSteps; isub++) { for(int iat=0; iat<nat; iat++) { Psi.getGradient (W, iat, oldG); //create a 3N-Dimensional Gaussian with variance=1 makeGaussRandomWithEngine(delpos,Random); for(int iw=0; iw<nw; iw++) { oldScale[iw] = getDriftScale(m_tauovermass,oldG[iw]); dr[iw] = (m_sqrttau*delpos[iw]) + (oldScale[iw]*oldG[iw]); newpos[iw]=W[iw]->R[iat] + dr[iw]; ratios[iw] = 1.0; } W.proposeMove_GPU(newpos, iat); Psi.ratio(W,iat,ratios,newG, newL); accepted.clear(); vector<bool> acc(nw, false); for(int iw=0; iw<nw; ++iw) { PosType drOld = newpos[iw] - (W[iw]->R[iat] + oldScale[iw]*oldG[iw]); // if (dot(drOld, drOld) > 25.0) // cerr << "Large drift encountered! Old drift = " << drOld << endl; RealType logGf = -m_oneover2tau * dot(drOld, drOld); newScale[iw] = getDriftScale(m_tauovermass,newG[iw]); PosType drNew = (newpos[iw] + newScale[iw]*newG[iw]) - W[iw]->R[iat]; // if (dot(drNew, drNew) > 25.0) // cerr << "Large drift encountered! Drift = " << drNew << endl; RealType logGb = -m_oneover2tau * dot(drNew, drNew); RealType x = logGb - logGf; RealType prob = ratios[iw]*ratios[iw]*std::exp(x); if(Random() < prob) { accepted.push_back(W[iw]); nAccept++; W[iw]->R[iat] = newpos[iw]; acc[iw] = true; } else nReject++; } W.acceptMove_GPU(acc); if (accepted.size()) Psi.update(accepted,iat); } // cerr << "Rank = " << myComm->rank() << // " CurrentStep = " << CurrentStep << " isub = " << isub << endl; } Psi.gradLapl(W, grad, lapl); H.evaluate (W, LocalEnergy); if (myPeriod4WalkerDump && (CurrentStep % myPeriod4WalkerDump)==0) W.saveEnsemble(); Estimators->accumulate(W); } while(step<nSteps); Psi.recompute(W); double accept_ratio = (double)nAccept/(double)(nAccept+nReject); Estimators->stopBlock(accept_ratio); nAcceptTot += nAccept; nRejectTot += nReject; ++block; recordBlock(block); } while(block<nBlocks); //finalize a qmc section if (!myComm->rank()) gpu::cuda_memory_manager.report(); return finalize(block); }
bool DMCcuda::runWithNonlocal() { resetRun(); Mover->MaxAge = 1; IndexType block = 0; IndexType nAcceptTot = 0; IndexType nRejectTot = 0; int nat = W.getTotalNum(); int nw = W.getActiveWalkers(); vector<RealType> LocalEnergy(nw), LocalEnergyOld(nw), oldScale(nw), newScale(nw); vector<PosType> delpos(nw); vector<PosType> dr(nw); vector<PosType> newpos(nw); vector<ValueType> ratios(nw), rplus(nw), rminus(nw), R2prop(nw), R2acc(nw); vector<PosType> oldG(nw), newG(nw); vector<ValueType> oldL(nw), newL(nw); vector<Walker_t*> accepted(nw); Matrix<ValueType> lapl(nw, nat); Matrix<GradType> grad(nw, nat); vector<vector<NonLocalData> > Txy(nw); for (int iw=0; iw<nw; iw++) W[iw]->Weight = 1.0; do { IndexType step = 0; nAccept = nReject = 0; Estimators->startBlock(nSteps); do { step++; CurrentStep++; nw = W.getActiveWalkers(); LocalEnergy.resize(nw); oldScale.resize(nw); newScale.resize(nw); delpos.resize(nw); dr.resize(nw); newpos.resize(nw); ratios.resize(nw); rplus.resize(nw); rminus.resize(nw); oldG.resize(nw); newG.resize(nw); oldL.resize(nw); newL.resize(nw); accepted.resize(nw); lapl.resize(nw, nat); grad.resize(nw, nat); R2prop.resize(nw,0.0); R2acc.resize(nw,0.0); W.updateLists_GPU(); Txy.resize(nw); for (int iw=0; iw<nw; iw++) { Txy[iw].clear(); Txy[iw].push_back(NonLocalData(-1, 1.0, PosType())); W[iw]->Age++; } for(int iat=0; iat<nat; iat++) { Psi.getGradient (W, iat, oldG); //create a 3N-Dimensional Gaussian with variance=1 makeGaussRandomWithEngine(delpos,Random); for(int iw=0; iw<nw; iw++) { delpos[iw] *= m_sqrttau; oldScale[iw] = getDriftScale(m_tauovermass,oldG[iw]); dr[iw] = delpos[iw] + (oldScale[iw]*oldG[iw]); newpos[iw]=W[iw]->R[iat] + dr[iw]; ratios[iw] = 1.0; R2prop[iw] += dot(delpos[iw], delpos[iw]); } W.proposeMove_GPU(newpos, iat); Psi.ratio(W,iat,ratios,newG, newL); accepted.clear(); vector<bool> acc(nw, false); for(int iw=0; iw<nw; ++iw) { PosType drOld = newpos[iw] - (W[iw]->R[iat] + oldScale[iw]*oldG[iw]); RealType logGf = -m_oneover2tau * dot(drOld, drOld); newScale[iw] = getDriftScale(m_tauovermass,newG[iw]); PosType drNew = (newpos[iw] + newScale[iw]*newG[iw]) - W[iw]->R[iat]; RealType logGb = -m_oneover2tau * dot(drNew, drNew); RealType x = logGb - logGf; RealType prob = ratios[iw]*ratios[iw]*std::exp(x); if(Random() < prob && ratios[iw] > 0.0) { accepted.push_back(W[iw]); nAccept++; W[iw]->R[iat] = newpos[iw]; W[iw]->Age = 0; acc[iw] = true; R2acc[iw] += dot(delpos[iw], delpos[iw]); } else nReject++; } W.acceptMove_GPU(acc); if (accepted.size()) Psi.update(accepted,iat); } for (int iw=0; iw < nw; iw++) if (W[iw]->Age) cerr << "Encountered stuck walker with iw=" << iw << endl; // Psi.recompute(W, false); Psi.gradLapl(W, grad, lapl); H.evaluate (W, LocalEnergy, Txy); if (CurrentStep == 1) LocalEnergyOld = LocalEnergy; // Now, attempt nonlocal move accepted.clear(); vector<int> iatList; vector<PosType> accPos; for (int iw=0; iw<nw; iw++) { int ibar = NLop.selectMove(Random(), Txy[iw]); // cerr << "Txy[iw].size() = " << Txy[iw].size() << endl; if (ibar) { accepted.push_back(W[iw]); int iat = Txy[iw][ibar].PID; iatList.push_back(iat); accPos.push_back(W[iw]->R[iat] + Txy[iw][ibar].Delta); } } if (accepted.size()) { // W.proposeMove_GPU(newpos, iatList); Psi.ratio(accepted,iatList, accPos, ratios, newG, newL); Psi.update(accepted,iatList); for (int i=0; i<accepted.size(); i++) accepted[i]->R[iatList[i]] = accPos[i]; W.copyWalkersToGPU(); } // Now branch for (int iw=0; iw<nw; iw++) { W[iw]->Weight *= branchEngine->branchWeight(LocalEnergy[iw], LocalEnergyOld[iw]); W[iw]->getPropertyBase()[R2ACCEPTED] = R2acc[iw]; W[iw]->getPropertyBase()[R2PROPOSED] = R2prop[iw]; } Mover->setMultiplicity(W.begin(), W.end()); branchEngine->branch(CurrentStep,W); nw = W.getActiveWalkers(); LocalEnergyOld.resize(nw); for (int iw=0; iw<nw; iw++) LocalEnergyOld[iw] = W[iw]->getPropertyBase()[LOCALENERGY]; } while(step<nSteps); Psi.recompute(W, true); double accept_ratio = (double)nAccept/(double)(nAccept+nReject); Estimators->stopBlock(accept_ratio); nAcceptTot += nAccept; nRejectTot += nReject; ++block; recordBlock(block); } while(block<nBlocks); //finalize a qmc section return finalize(block); }