void DiracCloverPC::reconstruct(cudaColorSpinorField &x, const cudaColorSpinorField &b, const QudaSolutionType solType) const { if (solType == QUDA_MATPC_SOLUTION || solType == QUDA_MATPCDAG_MATPC_SOLUTION) { return; } checkFullSpinor(x, b); bool reset = newTmp(&tmp1, b.Even()); // create full solution if (matpcType == QUDA_MATPC_EVEN_EVEN || matpcType == QUDA_MATPC_EVEN_EVEN_ASYMMETRIC) { // x_o = A_oo^-1 (b_o + k D_oe x_e) DiracWilson::DslashXpay(*tmp1, x.Even(), QUDA_ODD_PARITY, b.Odd(), kappa); CloverInv(x.Odd(), *tmp1, QUDA_ODD_PARITY); } else if (matpcType == QUDA_MATPC_ODD_ODD || matpcType == QUDA_MATPC_ODD_ODD_ASYMMETRIC) { // x_e = A_ee^-1 (b_e + k D_eo x_o) DiracWilson::DslashXpay(*tmp1, x.Odd(), QUDA_EVEN_PARITY, b.Even(), kappa); CloverInv(x.Even(), *tmp1, QUDA_EVEN_PARITY); } else { errorQuda("MatPCType %d not valid for DiracCloverPC", matpcType); } deleteTmp(&tmp1, reset); }
// Apply the even-odd preconditioned clover-improved Dirac operator void DiracCloverPC::M(cudaColorSpinorField &out, const cudaColorSpinorField &in) const { double kappa2 = -kappa*kappa; // FIXME: For asymmetric, a "DslashCxpay" kernel would improve performance. bool reset = newTmp(&tmp1, in); if (matpcType == QUDA_MATPC_EVEN_EVEN_ASYMMETRIC) { bool reset = newTmp(&tmp2, in); // DiracCloverPC::Dslash applies A^{-1}Dslash Dslash(*tmp1, in, QUDA_ODD_PARITY); Clover(*tmp2, in, QUDA_EVEN_PARITY); // DiracWilson::Dslash applies only Dslash DiracWilson::DslashXpay(out, *tmp1, QUDA_EVEN_PARITY, *tmp2, kappa2); } else if (matpcType == QUDA_MATPC_ODD_ODD_ASYMMETRIC) { // FIXME: It would be nice if I could do something like: cudaColorSpinorField tmp3( in.param() ); // to save copying the data from 'in' bool reset = newTmp(&tmp2, in); // DiracCloverPC::Dslash applies A^{-1}Dslash Dslash(*tmp1, in, QUDA_EVEN_PARITY); Clover(*tmp2, in, QUDA_ODD_PARITY); // DiracWilson::Dslash applies only Dslash DiracWilson::DslashXpay(out, *tmp1, QUDA_ODD_PARITY, *tmp2, kappa2); } else if (!dagger) { // symmetric preconditioning if (matpcType == QUDA_MATPC_EVEN_EVEN) { Dslash(*tmp1, in, QUDA_ODD_PARITY); DslashXpay(out, *tmp1, QUDA_EVEN_PARITY, in, kappa2); } else if (matpcType == QUDA_MATPC_ODD_ODD) { Dslash(*tmp1, in, QUDA_EVEN_PARITY); DslashXpay(out, *tmp1, QUDA_ODD_PARITY, in, kappa2); } else { errorQuda("Invalid matpcType"); } } else { // symmetric preconditioning, dagger if (matpcType == QUDA_MATPC_EVEN_EVEN) { CloverInv(out, in, QUDA_EVEN_PARITY); Dslash(*tmp1, out, QUDA_ODD_PARITY); DiracWilson::DslashXpay(out, *tmp1, QUDA_EVEN_PARITY, in, kappa2); } else if (matpcType == QUDA_MATPC_ODD_ODD) { CloverInv(out, in, QUDA_ODD_PARITY); Dslash(*tmp1, out, QUDA_EVEN_PARITY); DiracWilson::DslashXpay(out, *tmp1, QUDA_ODD_PARITY, in, kappa2); } else { errorQuda("MatPCType %d not valid for DiracCloverPC", matpcType); } } deleteTmp(&tmp1, reset); }
// Apply the even-odd preconditioned clover-improved Dirac operator void DiracCloverPC::M(cudaColorSpinorField &out, const cudaColorSpinorField &in) const { double kappa2 = -kappa*kappa; // FIXME: For asymmetric, a "DslashCxpay" kernel would improve performance. bool reset = newTmp(&tmp1, in); if (matpcType == QUDA_MATPC_EVEN_EVEN_ASYMMETRIC) { Dslash(*tmp1, in, QUDA_ODD_PARITY); Clover(out, in, QUDA_EVEN_PARITY); #ifdef MULTI_GPU // not safe to alias because of partial updates cudaColorSpinorField tmp3(in); #else // safe since out is not read after writing cudaColorSpinorField &tmp3 = out; #endif DiracWilson::DslashXpay(out, *tmp1, QUDA_EVEN_PARITY, tmp3, kappa2); } else if (matpcType == QUDA_MATPC_ODD_ODD_ASYMMETRIC) { Dslash(*tmp1, in, QUDA_EVEN_PARITY); Clover(out, in, QUDA_ODD_PARITY); #ifdef MULTI_GPU // not safe to alias because of partial updates cudaColorSpinorField tmp3(in); #else // safe since out is not read after writing cudaColorSpinorField &tmp3 = out; #endif DiracWilson::DslashXpay(out, *tmp1, QUDA_ODD_PARITY, tmp3, kappa2); } else if (!dagger) { // symmetric preconditioning if (matpcType == QUDA_MATPC_EVEN_EVEN) { Dslash(*tmp1, in, QUDA_ODD_PARITY); DslashXpay(out, *tmp1, QUDA_EVEN_PARITY, in, kappa2); } else if (matpcType == QUDA_MATPC_ODD_ODD) { Dslash(*tmp1, in, QUDA_EVEN_PARITY); DslashXpay(out, *tmp1, QUDA_ODD_PARITY, in, kappa2); } else { errorQuda("Invalid matpcType"); } } else { // symmetric preconditioning, dagger if (matpcType == QUDA_MATPC_EVEN_EVEN) { CloverInv(out, in, QUDA_EVEN_PARITY); Dslash(*tmp1, out, QUDA_ODD_PARITY); DiracWilson::DslashXpay(out, *tmp1, QUDA_EVEN_PARITY, in, kappa2); } else if (matpcType == QUDA_MATPC_ODD_ODD) { CloverInv(out, in, QUDA_ODD_PARITY); Dslash(*tmp1, out, QUDA_EVEN_PARITY); DiracWilson::DslashXpay(out, *tmp1, QUDA_ODD_PARITY, in, kappa2); } else { errorQuda("MatPCType %d not valid for DiracCloverPC", matpcType); } } deleteTmp(&tmp1, reset); }
// Apply the even-odd preconditioned clover-improved Dirac operator void DiracCloverPC::M(cudaColorSpinorField &out, const cudaColorSpinorField &in) const { double kappa2 = -kappa*kappa; bool reset1 = newTmp(&tmp1, in); if (matpcType == QUDA_MATPC_EVEN_EVEN_ASYMMETRIC) { // DiracCloverPC::Dslash applies A^{-1}Dslash Dslash(*tmp1, in, QUDA_ODD_PARITY); // DiracClover::DslashXpay applies (A - kappa^2 D) DiracClover::DslashXpay(out, *tmp1, QUDA_EVEN_PARITY, in, kappa2); } else if (matpcType == QUDA_MATPC_ODD_ODD_ASYMMETRIC) { // DiracCloverPC::Dslash applies A^{-1}Dslash Dslash(*tmp1, in, QUDA_EVEN_PARITY); // DiracClover::DslashXpay applies (A - kappa^2 D) DiracClover::DslashXpay(out, *tmp1, QUDA_ODD_PARITY, in, kappa2); } else if (!dagger) { // symmetric preconditioning if (matpcType == QUDA_MATPC_EVEN_EVEN) { Dslash(*tmp1, in, QUDA_ODD_PARITY); DslashXpay(out, *tmp1, QUDA_EVEN_PARITY, in, kappa2); } else if (matpcType == QUDA_MATPC_ODD_ODD) { Dslash(*tmp1, in, QUDA_EVEN_PARITY); DslashXpay(out, *tmp1, QUDA_ODD_PARITY, in, kappa2); } else { errorQuda("Invalid matpcType"); } } else { // symmetric preconditioning, dagger if (matpcType == QUDA_MATPC_EVEN_EVEN) { CloverInv(out, in, QUDA_EVEN_PARITY); Dslash(*tmp1, out, QUDA_ODD_PARITY); DiracWilson::DslashXpay(out, *tmp1, QUDA_EVEN_PARITY, in, kappa2); } else if (matpcType == QUDA_MATPC_ODD_ODD) { CloverInv(out, in, QUDA_ODD_PARITY); Dslash(*tmp1, out, QUDA_EVEN_PARITY); DiracWilson::DslashXpay(out, *tmp1, QUDA_ODD_PARITY, in, kappa2); } else { errorQuda("MatPCType %d not valid for DiracCloverPC", matpcType); } } deleteTmp(&tmp1, reset1); }
void DiracCloverPC::prepare(cudaColorSpinorField* &src, cudaColorSpinorField* &sol, cudaColorSpinorField &x, cudaColorSpinorField &b, const QudaSolutionType solType) const { // we desire solution to preconditioned system if (solType == QUDA_MATPC_SOLUTION || solType == QUDA_MATPCDAG_MATPC_SOLUTION) { src = &b; sol = &x; return; } bool reset = newTmp(&tmp1, b.Even()); // we desire solution to full system if (matpcType == QUDA_MATPC_EVEN_EVEN) { // src = A_ee^-1 (b_e + k D_eo A_oo^-1 b_o) src = &(x.Odd()); CloverInv(*src, b.Odd(), QUDA_ODD_PARITY); DiracWilson::DslashXpay(*tmp1, *src, QUDA_EVEN_PARITY, b.Even(), kappa); CloverInv(*src, *tmp1, QUDA_EVEN_PARITY); sol = &(x.Even()); } else if (matpcType == QUDA_MATPC_ODD_ODD) { // src = A_oo^-1 (b_o + k D_oe A_ee^-1 b_e) src = &(x.Even()); CloverInv(*src, b.Even(), QUDA_EVEN_PARITY); DiracWilson::DslashXpay(*tmp1, *src, QUDA_ODD_PARITY, b.Odd(), kappa); CloverInv(*src, *tmp1, QUDA_ODD_PARITY); sol = &(x.Odd()); } else if (matpcType == QUDA_MATPC_EVEN_EVEN_ASYMMETRIC) { // src = b_e + k D_eo A_oo^-1 b_o src = &(x.Odd()); CloverInv(*tmp1, b.Odd(), QUDA_ODD_PARITY); // safe even when *tmp1 = b.odd DiracWilson::DslashXpay(*src, *tmp1, QUDA_EVEN_PARITY, b.Even(), kappa); sol = &(x.Even()); } else if (matpcType == QUDA_MATPC_ODD_ODD_ASYMMETRIC) { // src = b_o + k D_oe A_ee^-1 b_e src = &(x.Even()); CloverInv(*tmp1, b.Even(), QUDA_EVEN_PARITY); // safe even when *tmp1 = b.even DiracWilson::DslashXpay(*src, *tmp1, QUDA_ODD_PARITY, b.Odd(), kappa); sol = &(x.Odd()); } else { errorQuda("MatPCType %d not valid for DiracCloverPC", matpcType); } // here we use final solution to store parity solution and parity source // b is now up for grabs if we want deleteTmp(&tmp1, reset); }