Пример #1
0
// Find the best block size parameters for the Dslash and DslashXpay kernels
void DiracDomainWall::Tune(cudaColorSpinorField &out, const cudaColorSpinorField &in, 
			   const cudaColorSpinorField &x) {

  setDslashTuning(QUDA_TUNE_YES);

  { // Tune Dslash
    TuneDiracDomainWallDslash dslashTune(*this, out, in);
    dslashTune.Benchmark(tuneDslash[0]);
    for (int i=0; i<4; i++) 
      if (commDimPartitioned(i)) 
	dslashTune.Benchmark(tuneDslash[i+1]);
  }

  { // Tune DslashXpay
    TuneDiracDomainWallDslashXpay dslashXpayTune(*this, out, in, x);
    dslashXpayTune.Benchmark(tuneDslashXpay[0]);
    for (int i=0; i<4; i++) 
      if (commDimPartitioned(i)) 
	dslashXpayTune.Benchmark(tuneDslashXpay[i+1]);
  }

  setDslashTuning(QUDA_TUNE_NO);
}
Пример #2
0
// Find the best block size parameters for the Dslash and DslashXpay kernels
void DiracCloverPC::Tune(cudaColorSpinorField &out, const cudaColorSpinorField &in, 
		       const cudaColorSpinorField &x) {
  DiracClover::Tune(out, in, x);

  setDslashTuning(QUDA_TUNE_YES);

  { // Tune Dslash
    TuneDiracCloverDslash dslashTune(*this, out, in);
    dslashTune.Benchmark(blockDslash);
#ifdef OVERLAP_COMMS
    dslashTune.Benchmark(blockDslashFace);
#endif
  }

  { // Tune DslashXpay
    TuneDiracCloverDslashXpay dslashXpayTune(*this, out, in, x);
    dslashXpayTune.Benchmark(blockDslashXpay);
#ifdef OVERLAP_COMMS
    dslashXpayTune.Benchmark(blockDslashXpayFace);
#endif
  }

  setDslashTuning(QUDA_TUNE_NO);
}