// import: QDP <- CPS // import: QDP <- CPS // export : CPS <- QDP // impexFermion(0,Lat,src,src_cps,0,1); void impexFermion( int if_export, CPS_NAMESPACE::Lattice &Lat, multi1d<LatticeFermion> const &qdp, CPS_NAMESPACE::Float *cps_p, int even, int odd, int Ls=0, double fac_t=1.){ // double M5 = GJP.DwfHeight(); // double fac = (5-M5)*(5-M5); int i, node_latt[5]; char *fname="impexFermion"; unsigned long vol=1; for(i=0;i<5;i++) node_latt[i]=GJP.NodeSites(i); for(i=0;i<4;i++) vol *= node_latt[i]; if(Ls==0){ Ls = node_latt[4]; // if( !if_export) fac=fac_t; } double fac = fac_t; if (qdp.size() != Ls ) { printf("qdp.size()(%d) != node_latt[4](%d)\n",qdp.size(),node_latt[4]); exit(-4); } static int called=0; #if 1 #ifndef USE_OMP int x[5]; for(x[4]=0;x[4]<Ls;x[4]++){ QDPdouble *qdp_p = (QDPdouble *) &(qdp[x[4]].elem(0).elem(0).elem(0).real()); // printf("%d: cps_p=%p qdp_p=%p\n",x[4],cps_p,qdp_p); for ( x[3]=0;x[3]<node_latt[3];x[3]++ ) { for ( x[2]=0;x[2]<node_latt[2];x[2]++ ) { for ( x[1]=0;x[1]<node_latt[1];x[1]++ ) { for ( x[0]=0;x[0]<node_latt[0];x[0]++ ) { #else // omp_set_num_threads(1); QDPdouble *q_p[Ls]; for(i=0;i<Ls;i++) q_p[i] = (QDPdouble *) &(qdp[i].elem(0).elem(0).elem(0).real()); #pragma omp parallel for default(shared) for (i=0;i<vol*Ls;i++){ unsigned long rest=i; int x[5]; for(int j =0; j<4;j++){ x[j]= rest%node_latt[j]; rest = rest/node_latt[j]; } x[4]=rest; // QDPdouble *qdp_p = (QDPdouble *) &(qdp[x[4]].elem(0).elem(0).elem(0).real()); QDPdouble *qdp_p = q_p[x[4]]; const int tnum = omp_get_thread_num(); // if ((called%10000)==0) // Printf("impex %d: %d %d %d %d %d:%d\n",i,x[0],x[1],x[2],x[3],x[4],tnum); #endif for ( int coco=0;coco<12;coco++ ) { for ( int reim=0;reim<2;reim++ ) { int cidx = chroma_idx(x,node_latt,reim,coco,12); int bidx = Lat.FsiteOffsetChkb(x); bidx = reim + 2 *(coco + 12 *bidx); if (0) if( !if_export) {if (bidx==0) Printf("%d %d %d %d %d: cps[0](in)=%g\n", x[0],x[1],x[2],x[3],x[4], *(cps_p+bidx));} else {if (cidx==0) Printf("%d %d %d %d %d: qdp[0](in)=%g\n", x[0],x[1],x[2],x[3],x[4], *(qdp_p+cidx));} if( !if_export){ if(1) if ((odd) && ( (x[0]+x[1]+x[2]+x[3]+x[4])%2==1) ){ *(qdp_p+cidx) = *(cps_p+bidx)*fac; } else if ((even) && ( (x[0]+x[1]+x[2]+x[3]+x[4])%2==0) ){ *(qdp_p+cidx) = *(cps_p+bidx)*fac; } else { // *(qdp_p+cidx) = 0.; } } else { if(1) if ((odd) && ( (x[0]+x[1]+x[2]+x[3]+x[4])%2==1) ){ *(cps_p+bidx) = *(qdp_p+cidx)*fac; } else if ((even) && ( (x[0]+x[1]+x[2]+x[3]+x[4])%2==0) ){ *(cps_p+bidx) = *(qdp_p+cidx)*fac; } else { // *(cps_p+bidx) = 0.; } } if (0) if( if_export) {if (bidx==0) Printf("%d %d %d %d %d: cps[0]=%g\n", x[0],x[1],x[2],x[3],x[4], *(cps_p+bidx));} else {if (cidx==0) Printf("%d %d %d %d %d: qdp[0]=%g\n", x[0],x[1],x[2],x[3],x[4], *(qdp_p+cidx));} }} // reim,coco #ifndef USE_OMP }}}} // x #endif } // x[4] #endif called++; }
int BfmWrapper::bfmMultiInvert5d(multi1d< multi1d<T4> >& psi, const multi1d<Real>& shifts, const multi1d<Real>& Residuals, const multi1d<T4>& chi) { int nshift = shifts.size(); Fermion_t sol_t[nshift]; Fermion_t src_t; double dshifts[nshift]; double alpha[nshift]; double mresidual[nshift]; int dontsum=0; int Ls = invParam.BAP.Ls; if ( chi.size() != Ls ) { QDP_error_exit("Ls mismatch in bfmMultiInvert5d"); } psi.resize(nshift); for(int shift=0;shift<nshift;shift++){ dshifts[shift] =toDouble(shifts[shift]); alpha[shift] = 1.0; mresidual[shift]= toDouble(Residuals[shift]); psi[shift].resize(Ls); for(int s=0;s<Ls;s++){ psi[shift][s] = zero; } } int res; multi1d<T4> src = chi; // Set up BAGEL object int lx = QDP::Layout::subgridLattSize()[0]; int ly = QDP::Layout::subgridLattSize()[1]; int lz = QDP::Layout::subgridLattSize()[2]; int lt = QDP::Layout::subgridLattSize()[3]; bfmarg bfma; #if defined(QDP_USE_OMP_THREADS) bfma.Threads(omp_get_max_threads()); #else bfma.Threads(1); #endif // bfma.Verbose(0); //Physics parameters bfmActionParams *bfmap = (bfmActionParams *) &bfma; *bfmap = invParam.BAP; // Algorithm & code control bfma.time_report_iter=-100; bfma.max_iter = invParam.MaxIter; //Geometry bfma.node_latt[0] = lx; bfma.node_latt[1] = ly; bfma.node_latt[2] = lz; bfma.node_latt[3] = lt; multi1d<int> procs = QDP::Layout::logicalSize(); for(int mu=0;mu<4;mu++){ if (procs[mu]>1) bfma.local_comm[mu] = 0; else bfma.local_comm[mu] = 1; } QDPIO::cout << "Initialising BAGEL-2 solver "<<endl; // Bfm object bfm_qdp<Float> bfm; bfm.init(bfma); //Gauge field import bfm.importGauge(links); //Fermion import int cb = 1; for(int shift=0;shift<nshift;shift++){ sol_t[shift] = bfm.allocFermion(); if (sol_t[shift] == NULL ) { QDP_error_exit("Allocate failed\n"); } bfm.importFermion(psi[shift],sol_t[shift],cb); } src_t = bfm.allocFermion(); if (src_t == NULL ) { QDP_error_exit("Allocate failed\n"); } bfm.importFermion(src,src_t,cb); // Run the inverter int iter; #pragma omp parallel for for(int i=0;i<bfm.nthread;i++) { int iter_thr = bfm.CGNE_prec_MdagM_multi_shift(sol_t,src_t, dshifts, alpha, nshift, mresidual, dontsum); if ( i==0 ) iter = iter_thr; } res = iter; for(int shift=0;shift<nshift;shift++) { bfm.exportFermion(psi[shift],sol_t[shift],cb); bfm.freeFermion(sol_t[shift]); } bfm.freeFermion(src_t); res=iter; bfm.end(); return res; }
template<class Float> int BfmWrapper::bfmMultiInvert4d(multi1d<T4> & psi, const multi1d<Real>& shifts, const multi1d<Real>& Residuals, const T4& chi) { int nshift = shifts.size(); Fermion_t sol_t[nshift]; Fermion_t src_t; double dshifts[nshift]; double alpha[nshift]; double mresidual[nshift]; int dontsum=0; for(int shift=0;shift<nshift;shift++){ dshifts[shift] =toDouble(shifts[shift]); alpha[shift] = 1.0; mresidual[shift]= toDouble(Residuals[shift]); psi[shift] = zero; } int res; T4 src = chi; // Set up BAGEL object int lx = QDP::Layout::subgridLattSize()[0]; int ly = QDP::Layout::subgridLattSize()[1]; int lz = QDP::Layout::subgridLattSize()[2]; int lt = QDP::Layout::subgridLattSize()[3]; bfmarg bfma; #if defined(QDP_USE_OMP_THREADS) bfma.Threads(omp_get_max_threads()); #else bfma.Threads(1); #endif // bfma.Verbose(0); //Physics parameters bfmActionParams *bfmap = (bfmActionParams *) &bfma; *bfmap = invParam.BAP; // Algorithm & code control bfma.time_report_iter=-100; bfma.max_iter = invParam.MaxIter; //Geometry bfma.node_latt[0] = lx; bfma.node_latt[1] = ly; bfma.node_latt[2] = lz; bfma.node_latt[3] = lt; multi1d<int> procs = QDP::Layout::logicalSize(); for(int mu=0;mu<4;mu++){ if (procs[mu]>1) bfma.local_comm[mu] = 0; else bfma.local_comm[mu] = 1; } QDPIO::cout << "Initialising BAGEL-2 solver "<<endl; // Bfm object bfm_qdp<Float> bfm; bfm.init(bfma); //Gauge field import bfm.importGauge(links); //begin karthee clover //Missing clover term if ( invParam.BAP.solver == CloverFermion) { // Import the ClovDiag, ClovOffDiag // Import the ClovInvDiag, ClovInvOffDiag bfm.importClover(CloverDiag,CloverOffDiag, bfm.A); bfm.importClover(CloverInvDiag,CloverInvOffDiag, bfm.Ainv); } //end karthee clover int cb = 1; for(int shift=0;shift<nshift;shift++){ sol_t[shift] = bfm.allocFermion(); bfm.importFermion(psi[shift],sol_t[shift],cb); } src_t = bfm.allocFermion(); bfm.importFermion(src,src_t,cb); // Run the inverter int iter; QDPIO::cout << "Calling BAGEL multishift inverter"<<endl; #pragma omp parallel for for(int i=0;i<bfm.nthread;i++) { int iter_thr = bfm.CGNE_prec_MdagM_multi_shift(sol_t,src_t, dshifts, alpha, nshift, mresidual, dontsum); if ( i==0 ) iter = iter_thr; } res = iter; for(int shift=0;shift<nshift;shift++) { bfm.exportFermion(psi[shift],sol_t[shift],cb); bfm.freeFermion(sol_t[shift]); } bfm.freeFermion(src_t); bfm.end(); return res; }
void BfmWrapper::bfmOneFlavorRatioRationalForce(multi1d<T4> &phi, multi1d<LatticeColorMatrix> &force, Real M_pv, Real M_f, Real nrm_pv,multi1d<Real> &shifts_pv, multi1d<Real> &residue_pv, Real nrm_f,multi1d<Real> &shifts_f, multi1d<Real> &residue_f ) { double m_pv = toDouble(M_pv); double m_f = toDouble(M_f ); int n_pv = shifts_pv.size(); int n_f = shifts_f.size(); double ak_f[n_f]; double bk_f[n_f]; double ak_pv[n_pv]; double bk_pv[n_pv]; double rk_pv[n_pv]; double rk_f[n_f]; double a0_f = toDouble(nrm_f); double a0_pv = toDouble(nrm_pv); double residual = toDouble(invParam.RsdTarget[0]); for(int pole=0;pole<n_f;pole++){ ak_f[pole]=toDouble(residue_f[pole]); bk_f[pole]=toDouble(shifts_f[pole]); rk_f[pole] = residual; } for(int pole=0;pole<n_pv;pole++){ ak_pv[pole]=toDouble(residue_pv[pole]); bk_pv[pole]=toDouble(shifts_pv[pole]); rk_pv[pole] = residual; } for(int k=0;k<4;k++){ rk_f[k] *= 10; rk_pv[k] *= 10; } rk_f[0] *= 2; RationalSanityCheck(-0.5,n_f,a0_f,ak_f,bk_f); RationalSanityCheck(0.25,n_pv,a0_pv,ak_pv,bk_pv); QDPIO::cout << "BfmWrapper: Checked the rational functions look rational" <<endl; // Set up BAGEL object int lx = QDP::Layout::subgridLattSize()[0]; int ly = QDP::Layout::subgridLattSize()[1]; int lz = QDP::Layout::subgridLattSize()[2]; int lt = QDP::Layout::subgridLattSize()[3]; bfmarg bfma; #if defined(QDP_USE_OMP_THREADS) bfma.Threads(omp_get_max_threads()); #else bfma.Threads(1); #endif // bfma.Verbose(0); //Physics parameters bfmActionParams *bfmap = (bfmActionParams *) &bfma; *bfmap = invParam.BAP; // Algorithm & code control bfma.time_report_iter=-100; bfma.max_iter = invParam.MaxIter; bfma.residual = residual; QDPIO::cout << "BfmWrapper: residual "<< bfma.residual<<endl; //Geometry bfma.node_latt[0] = lx; bfma.node_latt[1] = ly; bfma.node_latt[2] = lz; bfma.node_latt[3] = lt; multi1d<int> procs = QDP::Layout::logicalSize(); for(int mu=0;mu<4;mu++){ if (procs[mu]>1) bfma.local_comm[mu] = 0; else bfma.local_comm[mu] = 1; } // Bfm object bfm_qdp<Float> M; bfm_qdp<Float> PV; bfma.mass = m_f; M.init(bfma); bfma.mass = m_pv; PV.init(bfma); //Gauge field import M.importGauge(links); PV.importGauge(links); Matrix_t force_t[2]; force_t[0] = M.allocMatrix(); force_t[1] = M.allocMatrix(); Fermion_t phi_t; phi_t = M.allocFermion(); static int range_check; if ( range_check == 0 ) { QDPIO::cout << "BfmWrapper: checking spectral range "<<endl; this->SpectralRange<Float>(M); QDPIO::cout << "BfmWrapper: checking spectral range "<<endl; this->SpectralRange<Float>(PV); range_check=1; } #pragma omp parallel for for(int i=0;i<M.nthread;i++) { int cb=1; M.importFermion(phi,phi_t,cb); // Odd odd force term M.zeroMatrix(force_t[0]); M.zeroMatrix(force_t[1]); this->bfmOneFlavorRatioRationalForce<Float>(M, PV, phi_t,force_t, n_pv,a0_pv,ak_pv,bk_pv,rk_pv, n_f ,a0_f ,ak_f ,bk_f,rk_f); M.exportForce(force_t[0],force,0); M.exportForce(force_t[1],force,1); } M.freeFermion(phi_t); M.freeMatrix(force_t[0]); M.freeMatrix(force_t[1]); M.end(); PV.end(); }