void DiracStaggered::MdagM(cudaColorSpinorField &out, const cudaColorSpinorField &in) const { if (!initDslash){ initDslashConstants(*fatGauge, in.Stride()); initStaggeredConstants(*fatGauge, *longGauge); } bool reset = newTmp(&tmp1, in); cudaColorSpinorField* mytmp = dynamic_cast<cudaColorSpinorField*>(&(tmp1->Even())); cudaColorSpinorField* ineven = dynamic_cast<cudaColorSpinorField*>(&(in.Even())); cudaColorSpinorField* inodd = dynamic_cast<cudaColorSpinorField*>(&(in.Odd())); cudaColorSpinorField* outeven = dynamic_cast<cudaColorSpinorField*>(&(out.Even())); cudaColorSpinorField* outodd = dynamic_cast<cudaColorSpinorField*>(&(out.Odd())); //even Dslash(*mytmp, *ineven, QUDA_ODD_PARITY); DslashXpay(*outeven, *mytmp, QUDA_EVEN_PARITY, *ineven, 4*mass*mass); //odd Dslash(*mytmp, *inodd, QUDA_EVEN_PARITY); DslashXpay(*outodd, *mytmp, QUDA_ODD_PARITY, *inodd, 4*mass*mass); deleteTmp(&tmp1, reset); }
void DiracStaggered::MdagM(ColorSpinorField &out, const ColorSpinorField &in) const { bool reset = newTmp(&tmp1, in); //even Dslash(tmp1->Even(), in.Even(), QUDA_ODD_PARITY); DslashXpay(out.Even(), tmp1->Even(), QUDA_EVEN_PARITY, in.Even(), 4*mass*mass); //odd Dslash(tmp1->Even(), in.Odd(), QUDA_EVEN_PARITY); DslashXpay(out.Odd(), tmp1->Even(), QUDA_ODD_PARITY, in.Odd(), 4*mass*mass); deleteTmp(&tmp1, reset); }
void DiracStaggeredPC::MdagM(cudaColorSpinorField &out, const cudaColorSpinorField &in) const { if (!initDslash){ initDslashConstants(*fatGauge, in.Stride()); initStaggeredConstants(*fatGauge, *longGauge); } bool reset = newTmp(&tmp1, in); QudaParity parity = QUDA_INVALID_PARITY; QudaParity other_parity = QUDA_INVALID_PARITY; if (matpcType == QUDA_MATPC_EVEN_EVEN) { parity = QUDA_EVEN_PARITY; other_parity = QUDA_ODD_PARITY; } else if (matpcType == QUDA_MATPC_ODD_ODD) { parity = QUDA_ODD_PARITY; other_parity = QUDA_EVEN_PARITY; } else { errorQuda("Invalid matpcType(%d) in function\n", matpcType); } Dslash(*tmp1, in, other_parity); DslashXpay(out, *tmp1, parity, in, 4*mass*mass); deleteTmp(&tmp1, reset); }
void DiracWilsonPC::M(cudaColorSpinorField &out, const cudaColorSpinorField &in) const { double kappa2 = -kappa*kappa; bool reset = newTmp(&tmp1, in); if (matpcType == QUDA_MATPC_EVEN_EVEN) { Dslash(*tmp1, in, QUDA_ODD_PARITY); DslashXpay(out, *tmp1, QUDA_EVEN_PARITY, in, kappa2); } else if (matpcType == QUDA_MATPC_ODD_ODD) { Dslash(*tmp1, in, QUDA_EVEN_PARITY); DslashXpay(out, *tmp1, QUDA_ODD_PARITY, in, kappa2); } else { errorQuda("MatPCType %d not valid for DiracWilsonPC", matpcType); } deleteTmp(&tmp1, reset); }
// Apply the even-odd preconditioned clover-improved Dirac operator void DiracDomainWallPC::M(ColorSpinorField &out, const ColorSpinorField &in) const { if ( in.Ndim() != 5 || out.Ndim() != 5) errorQuda("Wrong number of dimensions\n"); double kappa2 = -kappa5*kappa5; bool reset = newTmp(&tmp1, in); if (matpcType == QUDA_MATPC_EVEN_EVEN) { Dslash(*tmp1, in, QUDA_ODD_PARITY); DslashXpay(out, *tmp1, QUDA_EVEN_PARITY, in, kappa2); } else if (matpcType == QUDA_MATPC_ODD_ODD) { Dslash(*tmp1, in, QUDA_EVEN_PARITY); DslashXpay(out, *tmp1, QUDA_ODD_PARITY, in, kappa2); } else { errorQuda("MatPCType %d not valid for DiracDomainWallPC", matpcType); } deleteTmp(&tmp1, reset); }
void DiracOpWilson::MatDag(Vector *out, Vector *in) { char *fname = "MatDag(V*,V*)"; VRB.Func(cname,fname); int temp_size = GJP.VolNodeSites() * lat.FsiteSize() / 2; // points to the even part of fermion source Vector *even_in = (Vector *) ( (IFloat *) in + temp_size ); // points to the even part of fermion solution Vector *even_out = (Vector *) ( (IFloat *) out + temp_size ); Dslash(out, even_in, CHKB_EVEN, DAG_YES); fTimesV1PlusV2((IFloat *)out, -(IFloat) kappa, (IFloat *)out, (IFloat *)in, temp_size); Dslash(even_out, in, CHKB_ODD, DAG_YES); fTimesV1PlusV2((IFloat *)even_out, -(IFloat) kappa, (IFloat *)even_out, (IFloat *)even_in, temp_size); }
void DiracOpWilson::CalcHmdForceVecs(Vector *chi) { char *fname = "CalcHmdForceVecs(V*)" ; VRB.Func(cname,fname) ; if (f_out == 0) ERR.Pointer(cname, fname, "f_out") ; if (f_in == 0) ERR.Pointer(cname, fname, "f_in") ; //------------------------------------------------------------------ // f_out stores (chi,rho), f_in stores (psi,sigma) //------------------------------------------------------------------ Vector *chi_new, *rho, *psi, *sigma ; int f_size_cb = 12 * GJP.VolNodeSites() ; chi_new = f_out ; chi_new->CopyVec(chi, f_size_cb) ; psi = f_in ; MatPc(psi,chi) ; psi->VecTimesEquFloat(-kappa*kappa,f_size_cb) ; rho = (Vector *)((Float *)f_out + f_size_cb) ; Dslash(rho, chi, CHKB_ODD, DAG_NO) ; sigma = (Vector *)((Float *)f_in + f_size_cb) ; Dslash(sigma, psi, CHKB_ODD, DAG_YES) ; return ; }
// Apply the even-odd preconditioned clover-improved Dirac operator void DiracCloverPC::M(cudaColorSpinorField &out, const cudaColorSpinorField &in) const { double kappa2 = -kappa*kappa; // FIXME: For asymmetric, a "DslashCxpay" kernel would improve performance. bool reset = newTmp(&tmp1, in); if (matpcType == QUDA_MATPC_EVEN_EVEN_ASYMMETRIC) { bool reset = newTmp(&tmp2, in); // DiracCloverPC::Dslash applies A^{-1}Dslash Dslash(*tmp1, in, QUDA_ODD_PARITY); Clover(*tmp2, in, QUDA_EVEN_PARITY); // DiracWilson::Dslash applies only Dslash DiracWilson::DslashXpay(out, *tmp1, QUDA_EVEN_PARITY, *tmp2, kappa2); } else if (matpcType == QUDA_MATPC_ODD_ODD_ASYMMETRIC) { // FIXME: It would be nice if I could do something like: cudaColorSpinorField tmp3( in.param() ); // to save copying the data from 'in' bool reset = newTmp(&tmp2, in); // DiracCloverPC::Dslash applies A^{-1}Dslash Dslash(*tmp1, in, QUDA_EVEN_PARITY); Clover(*tmp2, in, QUDA_ODD_PARITY); // DiracWilson::Dslash applies only Dslash DiracWilson::DslashXpay(out, *tmp1, QUDA_ODD_PARITY, *tmp2, kappa2); } else if (!dagger) { // symmetric preconditioning if (matpcType == QUDA_MATPC_EVEN_EVEN) { Dslash(*tmp1, in, QUDA_ODD_PARITY); DslashXpay(out, *tmp1, QUDA_EVEN_PARITY, in, kappa2); } else if (matpcType == QUDA_MATPC_ODD_ODD) { Dslash(*tmp1, in, QUDA_EVEN_PARITY); DslashXpay(out, *tmp1, QUDA_ODD_PARITY, in, kappa2); } else { errorQuda("Invalid matpcType"); } } else { // symmetric preconditioning, dagger if (matpcType == QUDA_MATPC_EVEN_EVEN) { CloverInv(out, in, QUDA_EVEN_PARITY); Dslash(*tmp1, out, QUDA_ODD_PARITY); DiracWilson::DslashXpay(out, *tmp1, QUDA_EVEN_PARITY, in, kappa2); } else if (matpcType == QUDA_MATPC_ODD_ODD) { CloverInv(out, in, QUDA_ODD_PARITY); Dslash(*tmp1, out, QUDA_EVEN_PARITY); DiracWilson::DslashXpay(out, *tmp1, QUDA_ODD_PARITY, in, kappa2); } else { errorQuda("MatPCType %d not valid for DiracCloverPC", matpcType); } } deleteTmp(&tmp1, reset); }
// Apply the even-odd preconditioned clover-improved Dirac operator void DiracCloverPC::M(cudaColorSpinorField &out, const cudaColorSpinorField &in) const { double kappa2 = -kappa*kappa; // FIXME: For asymmetric, a "DslashCxpay" kernel would improve performance. bool reset = newTmp(&tmp1, in); if (matpcType == QUDA_MATPC_EVEN_EVEN_ASYMMETRIC) { Dslash(*tmp1, in, QUDA_ODD_PARITY); Clover(out, in, QUDA_EVEN_PARITY); #ifdef MULTI_GPU // not safe to alias because of partial updates cudaColorSpinorField tmp3(in); #else // safe since out is not read after writing cudaColorSpinorField &tmp3 = out; #endif DiracWilson::DslashXpay(out, *tmp1, QUDA_EVEN_PARITY, tmp3, kappa2); } else if (matpcType == QUDA_MATPC_ODD_ODD_ASYMMETRIC) { Dslash(*tmp1, in, QUDA_EVEN_PARITY); Clover(out, in, QUDA_ODD_PARITY); #ifdef MULTI_GPU // not safe to alias because of partial updates cudaColorSpinorField tmp3(in); #else // safe since out is not read after writing cudaColorSpinorField &tmp3 = out; #endif DiracWilson::DslashXpay(out, *tmp1, QUDA_ODD_PARITY, tmp3, kappa2); } else if (!dagger) { // symmetric preconditioning if (matpcType == QUDA_MATPC_EVEN_EVEN) { Dslash(*tmp1, in, QUDA_ODD_PARITY); DslashXpay(out, *tmp1, QUDA_EVEN_PARITY, in, kappa2); } else if (matpcType == QUDA_MATPC_ODD_ODD) { Dslash(*tmp1, in, QUDA_EVEN_PARITY); DslashXpay(out, *tmp1, QUDA_ODD_PARITY, in, kappa2); } else { errorQuda("Invalid matpcType"); } } else { // symmetric preconditioning, dagger if (matpcType == QUDA_MATPC_EVEN_EVEN) { CloverInv(out, in, QUDA_EVEN_PARITY); Dslash(*tmp1, out, QUDA_ODD_PARITY); DiracWilson::DslashXpay(out, *tmp1, QUDA_EVEN_PARITY, in, kappa2); } else if (matpcType == QUDA_MATPC_ODD_ODD) { CloverInv(out, in, QUDA_ODD_PARITY); Dslash(*tmp1, out, QUDA_EVEN_PARITY); DiracWilson::DslashXpay(out, *tmp1, QUDA_ODD_PARITY, in, kappa2); } else { errorQuda("MatPCType %d not valid for DiracCloverPC", matpcType); } } deleteTmp(&tmp1, reset); }
void DiracStaggeredPC::MdagM(ColorSpinorField &out, const ColorSpinorField &in) const { bool reset = newTmp(&tmp1, in); QudaParity parity = QUDA_INVALID_PARITY; QudaParity other_parity = QUDA_INVALID_PARITY; if (matpcType == QUDA_MATPC_EVEN_EVEN) { parity = QUDA_EVEN_PARITY; other_parity = QUDA_ODD_PARITY; } else if (matpcType == QUDA_MATPC_ODD_ODD) { parity = QUDA_ODD_PARITY; other_parity = QUDA_EVEN_PARITY; } else { errorQuda("Invalid matpcType(%d) in function\n", matpcType); } Dslash(*tmp1, in, other_parity); DslashXpay(out, *tmp1, parity, in, 4*mass*mass); deleteTmp(&tmp1, reset); }
// Apply the even-odd preconditioned clover-improved Dirac operator void DiracCloverPC::M(cudaColorSpinorField &out, const cudaColorSpinorField &in) const { double kappa2 = -kappa*kappa; bool reset1 = newTmp(&tmp1, in); if (matpcType == QUDA_MATPC_EVEN_EVEN_ASYMMETRIC) { // DiracCloverPC::Dslash applies A^{-1}Dslash Dslash(*tmp1, in, QUDA_ODD_PARITY); // DiracClover::DslashXpay applies (A - kappa^2 D) DiracClover::DslashXpay(out, *tmp1, QUDA_EVEN_PARITY, in, kappa2); } else if (matpcType == QUDA_MATPC_ODD_ODD_ASYMMETRIC) { // DiracCloverPC::Dslash applies A^{-1}Dslash Dslash(*tmp1, in, QUDA_EVEN_PARITY); // DiracClover::DslashXpay applies (A - kappa^2 D) DiracClover::DslashXpay(out, *tmp1, QUDA_ODD_PARITY, in, kappa2); } else if (!dagger) { // symmetric preconditioning if (matpcType == QUDA_MATPC_EVEN_EVEN) { Dslash(*tmp1, in, QUDA_ODD_PARITY); DslashXpay(out, *tmp1, QUDA_EVEN_PARITY, in, kappa2); } else if (matpcType == QUDA_MATPC_ODD_ODD) { Dslash(*tmp1, in, QUDA_EVEN_PARITY); DslashXpay(out, *tmp1, QUDA_ODD_PARITY, in, kappa2); } else { errorQuda("Invalid matpcType"); } } else { // symmetric preconditioning, dagger if (matpcType == QUDA_MATPC_EVEN_EVEN) { CloverInv(out, in, QUDA_EVEN_PARITY); Dslash(*tmp1, out, QUDA_ODD_PARITY); DiracWilson::DslashXpay(out, *tmp1, QUDA_EVEN_PARITY, in, kappa2); } else if (matpcType == QUDA_MATPC_ODD_ODD) { CloverInv(out, in, QUDA_ODD_PARITY); Dslash(*tmp1, out, QUDA_EVEN_PARITY); DiracWilson::DslashXpay(out, *tmp1, QUDA_ODD_PARITY, in, kappa2); } else { errorQuda("MatPCType %d not valid for DiracCloverPC", matpcType); } } deleteTmp(&tmp1, reset1); }
//------------------------------------------------------------------ // int MatInv(Vector *out, Vector *in, // Float *true_res, PreserveType prs_in); // The inverse of the unconditioned Dirac Operator // using Conjugate gradient. // If true_res !=0 the value of the true residual is returned // in true_res. // *true_res = |src - MatPcDagMatPc * sol| / |src| // prs_in is used to specify if the source // in should be preserved or not. If not the memory usage // is less by half the size of a fermion vector. // The function returns the total number of CG iterations. //------------------------------------------------------------------ int DiracOpWilson::MatInv(Vector *out, Vector *in, Float *true_res, PreserveType prs_in) { char *fname = "MatInv(V*,V*,F*)"; VRB.Func(cname,fname); Vector *temp2; int temp_size = GJP.VolNodeSites() * lat.FsiteSize() / 2; // check out if converted //for (int ii = 0; ii < 2 * temp_size; ii++) { // VRB.Result(cname, fname, "in[%d] = %e\n", ii, // *((Float *)in + ii)); // VRB.Result(cname, fname, "out[%d] = %e\n", ii, // *((Float *)out + ii)); //} Vector *temp = (Vector *) smalloc(temp_size * sizeof(Float)); if (temp == 0) ERR.Pointer(cname, fname, "temp"); VRB.Smalloc(cname,fname, "temp", temp, temp_size * sizeof(Float)); if(prs_in == PRESERVE_YES){ temp2 = (Vector *) smalloc(2*temp_size * sizeof(Float)); if (temp2 == 0) ERR.Pointer(cname, fname, "temp2"); VRB.Smalloc(cname,fname, "temp2", temp2, temp_size * sizeof(Float)); } // save source if(prs_in == PRESERVE_YES){ moveMem((Float *)temp2, (Float *)in, 2*temp_size*sizeof(Float)); } #if 0 { printf("in(before)=\n"); IFloat *temp_p = (IFloat *)in; for(int ii = 0; ii< GJP.VolNodeSites();ii++){ for(int jj = 0; jj< lat.FsiteSize();jj++){ if (fabs(*temp_p)>1e-7){ printf("i=%d j=%d\n",ii,jj); printf("%e\n",*(temp_p)); } temp_p++; } } } #endif // points to the even part of fermion source Vector *even_in = (Vector *) ( (Float *) in + temp_size ); // points to the even part of fermion solution Vector *even_out = (Vector *) ( (Float *) out + temp_size ); Dslash(temp, even_in, CHKB_EVEN, DAG_NO); fTimesV1PlusV2((Float *)temp, (Float) kappa, (Float *)temp, (Float *)in, temp_size); #if 0 { printf("temp(before)=\n"); IFloat *temp_p = (IFloat *)temp; for(int ii = 0; ii< GJP.VolNodeSites();ii++){ for(int jj = 0; jj< lat.FsiteSize();jj++){ if (fabs(*temp_p)>1e-7){ printf("i=%d j=%d\n",ii,jj); printf("%e\n",*(temp_p)); } temp_p++; } } } #endif int iter; switch (dirac_arg->Inverter) { case CG: MatPcDag(in, temp); iter = InvCg(out,in,true_res); break; case BICGSTAB: iter = BiCGstab(out,temp,0.0,dirac_arg->bicgstab_n,true_res); break; default: ERR.General(cname,fname,"InverterType %d not implemented\n", dirac_arg->Inverter); } Dslash(temp, out, CHKB_ODD, DAG_NO); fTimesV1PlusV2((Float *)even_out, (Float) kappa, (Float *)temp, (Float *) even_in, temp_size); VRB.Sfree(cname, fname, "temp", temp); sfree(temp); // restore source if(prs_in == PRESERVE_YES){ moveMem((Float *)in, (Float *)temp2, 2*temp_size*sizeof(Float)); } #if 0 { printf("in(after)=\n"); IFloat *temp_p = (IFloat *)in; for(int ii = 0; ii< GJP.VolNodeSites();ii++){ for(int jj = 0; jj< lat.FsiteSize();jj++){ if (fabs(*temp_p)>1e-7){ printf("i=%d j=%d\n",ii,jj); printf("%e\n",*(temp_p)); } temp_p++; } } } #endif #if 0 { printf("temp2(after)=\n"); IFloat *temp_p = (IFloat *)temp2; for(int ii = 0; ii< GJP.VolNodeSites();ii++){ for(int jj = 0; jj< lat.FsiteSize();jj++){ if (fabs(*temp_p)>1e-7){ printf("i=%d j=%d\n",ii,jj); printf("%e\n",*(temp_p)); } temp_p++; } } } #endif if(prs_in == PRESERVE_YES){ VRB.Sfree(cname, fname, "temp2", temp2); sfree(temp2); } return iter; }
//------------------------------------------------------------------ // int MatInv(Vector *out, Vector *in, // Float *true_res, PreserveType prs_in); // The inverse of the unconditioned Dirac Operator // using Conjugate gradient. // If true_res !=0 the value of the true residual is returned // in true_res. // *true_res = |src - MatPcDagMatPc * sol| / |src| // prs_in is used to specify if the source // in should be preserved or not. If not the memory usage // is less by half the size of a fermion vector. // The function returns the total number of CG iterations. //------------------------------------------------------------------ int DiracOpWilson::MatInv(Vector *out, Vector *in, Float *true_res, PreserveType prs_in) { char *fname = "MatInv(V*,V*,F*)"; VRB.Func(cname,fname); Vector *temp2; int temp_size = GJP.VolNodeSites() * lat.FsiteSize() / 2; // check out if converted //for (int ii = 0; ii < 2 * temp_size; ii++) { // VRB.Result(cname, fname, "in[%d] = %e\n", ii, // *((IFloat *)in + ii)); // VRB.Result(cname, fname, "out[%d] = %e\n", ii, // *((IFloat *)out + ii)); //} Vector *temp = (Vector *) smalloc(temp_size * sizeof(Float)); if (temp == 0) ERR.Pointer(cname, fname, "temp"); VRB.Smalloc(cname,fname, "temp", temp, temp_size * sizeof(Float)); if(prs_in == PRESERVE_YES){ temp2 = (Vector *) smalloc(temp_size * sizeof(Float)); if (temp2 == 0) ERR.Pointer(cname, fname, "temp2"); VRB.Smalloc(cname,fname, "temp2", temp2, temp_size * sizeof(Float)); } // points to the even part of fermion source Vector *even_in = (Vector *) ( (IFloat *) in + temp_size ); // points to the even part of fermion solution Vector *even_out = (Vector *) ( (IFloat *) out + temp_size ); Dslash(temp, even_in, CHKB_EVEN, DAG_NO); fTimesV1PlusV2((IFloat *)temp, (IFloat) kappa, (IFloat *)temp, (IFloat *)in, temp_size); // save source if(prs_in == PRESERVE_YES){ moveMem((IFloat *)temp2, (IFloat *)in, temp_size * sizeof(IFloat) / sizeof(char)); } MatPcDag(in, temp); int iter = InvCg(out,in,true_res); // restore source if(prs_in == PRESERVE_YES){ moveMem((IFloat *)in, (IFloat *)temp2, temp_size * sizeof(IFloat) / sizeof(char)); } Dslash(temp, out, CHKB_ODD, DAG_NO); fTimesV1PlusV2((IFloat *)even_out, (IFloat) kappa, (IFloat *)temp, (IFloat *) even_in, temp_size); VRB.Sfree(cname, fname, "temp", temp); sfree(temp); if(prs_in == PRESERVE_YES){ VRB.Sfree(cname, fname, "temp2", temp2); sfree(temp2); } return iter; }