// Find the best block size parameters for the Dslash and DslashXpay kernels void DiracDomainWall::Tune(cudaColorSpinorField &out, const cudaColorSpinorField &in, const cudaColorSpinorField &x) { setDslashTuning(QUDA_TUNE_YES); { // Tune Dslash TuneDiracDomainWallDslash dslashTune(*this, out, in); dslashTune.Benchmark(tuneDslash[0]); for (int i=0; i<4; i++) if (commDimPartitioned(i)) dslashTune.Benchmark(tuneDslash[i+1]); } { // Tune DslashXpay TuneDiracDomainWallDslashXpay dslashXpayTune(*this, out, in, x); dslashXpayTune.Benchmark(tuneDslashXpay[0]); for (int i=0; i<4; i++) if (commDimPartitioned(i)) dslashXpayTune.Benchmark(tuneDslashXpay[i+1]); } setDslashTuning(QUDA_TUNE_NO); }
// Find the best block size parameters for the Dslash and DslashXpay kernels void DiracCloverPC::Tune(cudaColorSpinorField &out, const cudaColorSpinorField &in, const cudaColorSpinorField &x) { DiracClover::Tune(out, in, x); setDslashTuning(QUDA_TUNE_YES); { // Tune Dslash TuneDiracCloverDslash dslashTune(*this, out, in); dslashTune.Benchmark(blockDslash); #ifdef OVERLAP_COMMS dslashTune.Benchmark(blockDslashFace); #endif } { // Tune DslashXpay TuneDiracCloverDslashXpay dslashXpayTune(*this, out, in, x); dslashXpayTune.Benchmark(blockDslashXpay); #ifdef OVERLAP_COMMS dslashXpayTune.Benchmark(blockDslashXpayFace); #endif } setDslashTuning(QUDA_TUNE_NO); }