Esempio n. 1
0
void DiracStaggered::MdagM(cudaColorSpinorField &out, const cudaColorSpinorField &in) const
{

  if (!initDslash){
    initDslashConstants(*fatGauge, in.Stride());
    initStaggeredConstants(*fatGauge, *longGauge);
  }
  
  bool reset = newTmp(&tmp1, in);
  
  cudaColorSpinorField* mytmp = dynamic_cast<cudaColorSpinorField*>(&(tmp1->Even()));
  cudaColorSpinorField* ineven = dynamic_cast<cudaColorSpinorField*>(&(in.Even()));
  cudaColorSpinorField* inodd = dynamic_cast<cudaColorSpinorField*>(&(in.Odd()));
  cudaColorSpinorField* outeven = dynamic_cast<cudaColorSpinorField*>(&(out.Even()));
  cudaColorSpinorField* outodd = dynamic_cast<cudaColorSpinorField*>(&(out.Odd()));
  
  //even
  Dslash(*mytmp, *ineven, QUDA_ODD_PARITY);  
  DslashXpay(*outeven, *mytmp, QUDA_EVEN_PARITY, *ineven, 4*mass*mass);
  
  //odd
  Dslash(*mytmp, *inodd, QUDA_EVEN_PARITY);  
  DslashXpay(*outodd, *mytmp, QUDA_ODD_PARITY, *inodd, 4*mass*mass);    

  deleteTmp(&tmp1, reset);
}
Esempio n. 2
0
void FaceBuffer::exchangeFacesStart(cudaColorSpinorField &in, int parity,
				    int dagger, int dir, cudaStream_t *stream_p)
{
  if(!commDimPartitioned(dir)){
    return ;
  }

  in.allocateGhostBuffer();   // allocate the ghost buffer if not yet allocated
  
  stream = stream_p;
  
  int back_nbr[4] = {X_BACK_NBR, Y_BACK_NBR, Z_BACK_NBR,T_BACK_NBR};
  int fwd_nbr[4] = {X_FWD_NBR, Y_FWD_NBR, Z_FWD_NBR,T_FWD_NBR};
  int uptags[4] = {XUP, YUP, ZUP, TUP};
  int downtags[4] = {XDOWN, YDOWN, ZDOWN, TDOWN};
  
  // Prepost all receives
  recv_request1[dir] = comm_recv_with_tag(pageable_back_nbr_spinor[dir], nbytes[dir], back_nbr[dir], uptags[dir]);
  recv_request2[dir] = comm_recv_with_tag(pageable_fwd_nbr_spinor[dir], nbytes[dir], fwd_nbr[dir], downtags[dir]);
  
  // gather for backwards send
  in.packGhost(back_nbr_spinor_sendbuf[dir], dir, QUDA_BACKWARDS, 
	       (QudaParity)parity, dagger, &stream[2*dir + sendBackStrmIdx]); CUERR;  
  
  // gather for forwards send
  in.packGhost(fwd_nbr_spinor_sendbuf[dir], dir, QUDA_FORWARDS, 
	       (QudaParity)parity, dagger, &stream[2*dir + sendFwdStrmIdx]); CUERR;
}
Esempio n. 3
0
void Dirac::checkFullSpinor(const cudaColorSpinorField &out, const cudaColorSpinorField &in) const
{
   if (in.SiteSubset() != QUDA_FULL_SITE_SUBSET || out.SiteSubset() != QUDA_FULL_SITE_SUBSET) {
    errorQuda("ColorSpinorFields are not full fields: in = %d, out = %d", 
	      in.SiteSubset(), out.SiteSubset());
  } 
}
Esempio n. 4
0
void DiracWilsonPC::prepare(cudaColorSpinorField* &src, cudaColorSpinorField* &sol,
			    cudaColorSpinorField &x, cudaColorSpinorField &b, 
			    const QudaSolutionType solType) const
{
  // we desire solution to preconditioned system
  if (solType == QUDA_MATPC_SOLUTION || solType == QUDA_MATPCDAG_MATPC_SOLUTION) {
    src = &b;
    sol = &x;
  } else {
    // we desire solution to full system
    if (matpcType == QUDA_MATPC_EVEN_EVEN) {
      // src = b_e + k D_eo b_o
      DslashXpay(x.Odd(), b.Odd(), QUDA_EVEN_PARITY, b.Even(), kappa);
      src = &(x.Odd());
      sol = &(x.Even());
    } else if (matpcType == QUDA_MATPC_ODD_ODD) {
      // src = b_o + k D_oe b_e
      DslashXpay(x.Even(), b.Even(), QUDA_ODD_PARITY, b.Odd(), kappa);
      src = &(x.Even());
      sol = &(x.Odd());
    } else {
      errorQuda("MatPCType %d not valid for DiracWilsonPC", matpcType);
    }
    // here we use final solution to store parity solution and parity source
    // b is now up for grabs if we want
  }

}
Esempio n. 5
0
void FaceBuffer::exchangeFacesPack(cudaColorSpinorField &in, int parity,
				   int dagger, int dir, cudaStream_t *stream_p)
{
  int dim = dir/2;
  if(!commDimPartitioned(dim)) return;

  in.allocateGhostBuffer();   // allocate the ghost buffer if not yet allocated
  stream = stream_p;

  if (dir%2==0) { // sending backwards
#ifdef QMP_COMMS
    // Prepost receive
    QMP_start(mh_from_fwd[dim]);
#endif
    // gather for backwards send
    in.packGhost(dim, QUDA_BACKWARDS, (QudaParity)parity, dagger, &stream[2*dim+sendBackStrmIdx]);  
  } else { // sending forwards

#ifdef QMP_COMMS
    // Prepost receive
    QMP_start(mh_from_back[dim]);
#endif
    // gather for forwards send
    in.packGhost(dim, QUDA_FORWARDS, (QudaParity)parity, dagger, &stream[2*dim+sendFwdStrmIdx]);
  }
}
Esempio n. 6
0
void FaceBuffer::pack(cudaColorSpinorField &in, int parity, int dagger, int dim, cudaStream_t *stream_p)
{
  if(!commDimPartitioned(dim)) return;

  in.allocateGhostBuffer();   // allocate the ghost buffer if not yet allocated
  stream = stream_p;

  in.packGhost(dim, (QudaParity)parity, dagger, &stream[Nstream-1]);  
}
Esempio n. 7
0
  // Full staggered operator
  void DiracStaggered::M(cudaColorSpinorField &out, const cudaColorSpinorField &in) const
  {
    bool reset = newTmp(&tmp1, in.Even());

    DslashXpay(out.Even(), in.Odd(), QUDA_EVEN_PARITY, *tmp1, 2*mass);  
    DslashXpay(out.Odd(), in.Even(), QUDA_ODD_PARITY, *tmp1, 2*mass);
  
    deleteTmp(&tmp1, reset);
  }
Esempio n. 8
0
  void DiracClover::checkParitySpinor(const cudaColorSpinorField &out, const cudaColorSpinorField &in) const
  {
    Dirac::checkParitySpinor(out, in);

    if (out.Volume() != clover.VolumeCB()) {
      errorQuda("Parity spinor volume %d doesn't match clover checkboard volume %d",
		out.Volume(), clover.VolumeCB());
    }
  }
Esempio n. 9
0
void FaceBuffer::scatter(cudaColorSpinorField &out, int dagger, int dir) {
  int dim = dir / 2;
  if(!commDimPartitioned(dim)) return;

  if (dir%2 == 0) {
    out.unpackGhost(fwd_nbr_spinor[dim], dim, QUDA_FORWARDS,  dagger, &stream[2*dim + recFwdStrmIdx]); CUERR;
  } else {
    out.unpackGhost(back_nbr_spinor[dim], dim, QUDA_BACKWARDS,  dagger, &stream[2*dim + recBackStrmIdx]); CUERR;
  }
}
Esempio n. 10
0
void FaceBuffer::gather(cudaColorSpinorField &in, int dagger, int dir)
{
  int dim = dir/2;
  if(!commDimPartitioned(dim)) return;

  if (dir%2==0){ // backwards send
    in.sendGhost(back_nbr_spinor_sendbuf[dim], dim, QUDA_BACKWARDS, dagger, &stream[2*dim + sendBackStrmIdx]);
  } else { // forwards send
    in.sendGhost(fwd_nbr_spinor_sendbuf[dim], dim, QUDA_FORWARDS, dagger, &stream[2*dim + sendFwdStrmIdx]); 
  }
}
Esempio n. 11
0
void FaceBuffer::scatter(cudaColorSpinorField &out, int dagger, int dir)
{
  int dim = dir/2;
  if(!commDimPartitioned(dim)) return;

  if (dir%2==0) {// receive from forwards
    out.unpackGhost(from_fwd_face[dim], dim, QUDA_FORWARDS, dagger, &stream[2*dim+recFwdStrmIdx]); // 0, 2, 4, 6
  } else { // receive from backwards
    out.unpackGhost(from_back_face[dim], dim, QUDA_BACKWARDS, dagger, &stream[2*dim+recBackStrmIdx]); // 1, 3, 5, 7
  }
}
Esempio n. 12
0
void FaceBuffer::exchangeFacesStart(cudaColorSpinorField &in, int dagger, int dir)
{
  int dim = dir/2;
  if(!commDimPartitioned(dim)) return;

  if (dir%2==0) {
    // backwards copy to host
    in.sendGhost(my_back_face[dim], dim, QUDA_BACKWARDS, dagger, &stream[2*dim+sendBackStrmIdx]);  
  } else {
    // forwards copy to host
    in.sendGhost(my_fwd_face[dim], dim, QUDA_FORWARDS, dagger, &stream[2*dim+sendFwdStrmIdx]);
  }
}
Esempio n. 13
0
void DiracStaggered::Dslash(cudaColorSpinorField &out, const cudaColorSpinorField &in, 
			 const QudaParity parity) const
{
  if (!initDslash) {
    initDslashConstants(*fatGauge, in.Stride());
    initStaggeredConstants(*fatGauge, *longGauge);
  }
  checkParitySpinor(in, out);

  setFace(face); // FIXME: temporary hack maintain C linkage for dslashCuda
  staggeredDslashCuda(&out, *fatGauge, *longGauge, &in, parity, dagger, 0, 0, commDim);
  
  flops += 1146*in.Volume();
}
Esempio n. 14
0
// Public method to apply the clover term only
void DiracClover::Clover(cudaColorSpinorField &out, const cudaColorSpinorField &in, const QudaParity parity) const
{
  if (!initDslash) initDslashConstants(gauge, in.Stride());
  if (!initClover) initCloverConstants(clover.Stride());
  checkParitySpinor(in, out, clover);

  // regular clover term
  FullClover cs;
  cs.even = clover.even; cs.odd = clover.odd; cs.evenNorm = clover.evenNorm; cs.oddNorm = clover.oddNorm;
  cs.precision = clover.precision; cs.bytes = clover.bytes, cs.norm_bytes = clover.norm_bytes;
  cloverCuda(&out, gauge, cs, &in, parity);

  flops += 504*in.Volume();
}
Esempio n. 15
0
void FaceBuffer::pack(cudaColorSpinorField &in, int parity,
		      int dagger, int dir, cudaStream_t *stream_p)
{
  int dim = dir/2;
  if(!commDimPartitioned(dim)) return;

  in.allocateGhostBuffer();   // allocate the ghost buffer if not yet allocated  
  stream = stream_p;

  if (dir%2==0){ // backwards send
    in.packGhost(dim, QUDA_BACKWARDS, (QudaParity)parity, dagger, &stream[2*dim + sendBackStrmIdx]);
  } else { // forwards send
    in.packGhost(dim, QUDA_FORWARDS, (QudaParity)parity, dagger, &stream[2*dim + sendFwdStrmIdx]);
  }
}
Esempio n. 16
0
// Public method
void DiracCloverPC::CloverInv(cudaColorSpinorField &out, const cudaColorSpinorField &in, 
			      const QudaParity parity) const
{
  if (!initDslash) initDslashConstants(gauge, in.Stride());
  if (!initClover) initCloverConstants(clover.Stride());
  checkParitySpinor(in, out, clover);

  // needs to be cloverinv
  FullClover cs;
  cs.even = clover.evenInv; cs.odd = clover.oddInv; cs.evenNorm = clover.evenInvNorm; cs.oddNorm = clover.oddInvNorm;
  cs.precision = clover.precision; cs.bytes = clover.bytes, cs.norm_bytes = clover.norm_bytes;
  cloverCuda(&out, gauge, cs, &in, parity, tuneClover);

  flops += 504*in.Volume();
}
Esempio n. 17
0
void DiracClover::checkParitySpinor(const cudaColorSpinorField &out, const cudaColorSpinorField &in,
				    const FullClover &clover) const
{
  Dirac::checkParitySpinor(out, in);

  if (out.Volume() != clover.even.volume) {
    errorQuda("Spinor volume %d doesn't match even clover volume %d",
	      out.Volume(), clover.even.volume);
  }
  if (out.Volume() != clover.odd.volume) {
    errorQuda("Spinor volume %d doesn't match odd clover volume %d",
	      out.Volume(), clover.odd.volume);
  }

}
Esempio n. 18
0
void DiracStaggeredPC::MdagM(cudaColorSpinorField &out, const cudaColorSpinorField &in) const
{
  if (!initDslash){
    initDslashConstants(*fatGauge, in.Stride());
    initStaggeredConstants(*fatGauge, *longGauge);
  }
  
  bool reset = newTmp(&tmp1, in);
  
  QudaParity parity = QUDA_INVALID_PARITY;
  QudaParity other_parity = QUDA_INVALID_PARITY;
  if (matpcType == QUDA_MATPC_EVEN_EVEN) {
    parity = QUDA_EVEN_PARITY;
    other_parity = QUDA_ODD_PARITY;
  } else if (matpcType == QUDA_MATPC_ODD_ODD) {
    parity = QUDA_ODD_PARITY;
    other_parity = QUDA_EVEN_PARITY;
  } else {
    errorQuda("Invalid matpcType(%d) in function\n", matpcType);    
  }
  Dslash(*tmp1, in, other_parity);  
  DslashXpay(out, *tmp1, parity, in, 4*mass*mass);

  deleteTmp(&tmp1, reset);
}
Esempio n. 19
0
// apply hopping term, then clover: (A_ee^-1 D_eo) or (A_oo^-1 D_oe),
// and likewise for dagger: (A_ee^-1 D^dagger_eo) or (A_oo^-1 D^dagger_oe)
// NOTE - this isn't Dslash dagger since order should be reversed!
void DiracCloverPC::Dslash(cudaColorSpinorField &out, const cudaColorSpinorField &in, 
			   const QudaParity parity) const
{
  if (!initDslash) initDslashConstants(gauge, in.Stride());
  if (!initClover) initCloverConstants(clover.Stride());
  checkParitySpinor(in, out, clover);
  checkSpinorAlias(in, out);

  setFace(face); // FIXME: temporary hack maintain C linkage for dslashCuda

  FullClover cs;
  cs.even = clover.evenInv; cs.odd = clover.oddInv; cs.evenNorm = clover.evenInvNorm; cs.oddNorm = clover.oddInvNorm;
  cs.precision = clover.precision; cs.bytes = clover.bytes, cs.norm_bytes = clover.norm_bytes;
  cloverDslashCuda(&out, gauge, cs, &in, parity, dagger, 0, 0.0, commDim);

  flops += (1320+504)*in.Volume();
}
Esempio n. 20
0
void DiracCloverPC::reconstruct(cudaColorSpinorField &x, const cudaColorSpinorField &b,
				const QudaSolutionType solType) const
{
  if (solType == QUDA_MATPC_SOLUTION || solType == QUDA_MATPCDAG_MATPC_SOLUTION) {
    return;
  }

  checkFullSpinor(x, b);

  bool reset = newTmp(&tmp1, b.Even());

  // create full solution

  if (matpcType == QUDA_MATPC_EVEN_EVEN ||
      matpcType == QUDA_MATPC_EVEN_EVEN_ASYMMETRIC) {
    // x_o = A_oo^-1 (b_o + k D_oe x_e)
    DiracWilson::DslashXpay(*tmp1, x.Even(), QUDA_ODD_PARITY, b.Odd(), kappa);
    CloverInv(x.Odd(), *tmp1, QUDA_ODD_PARITY);
  } else if (matpcType == QUDA_MATPC_ODD_ODD ||
      matpcType == QUDA_MATPC_ODD_ODD_ASYMMETRIC) {
    // x_e = A_ee^-1 (b_e + k D_eo x_o)
    DiracWilson::DslashXpay(*tmp1, x.Odd(), QUDA_EVEN_PARITY, b.Even(), kappa);
    CloverInv(x.Even(), *tmp1, QUDA_EVEN_PARITY);
  } else {
    errorQuda("MatPCType %d not valid for DiracCloverPC", matpcType);
  }

  deleteTmp(&tmp1, reset);

}
Esempio n. 21
0
  void DiracStaggered::checkParitySpinor(const cudaColorSpinorField &in, const cudaColorSpinorField &out) const
  {
    if (in.Precision() != out.Precision()) {
      errorQuda("Input and output spinor precisions don't match in dslash_quda");
    }

    if (in.Stride() != out.Stride()) {
      errorQuda("Input %d and output %d spinor strides don't match in dslash_quda", in.Stride(), out.Stride());
    }

    if (in.SiteSubset() != QUDA_PARITY_SITE_SUBSET || out.SiteSubset() != QUDA_PARITY_SITE_SUBSET) {
      errorQuda("ColorSpinorFields are not single parity, in = %d, out = %d", 
		in.SiteSubset(), out.SiteSubset());
    }
  }
Esempio n. 22
0
  void DiracStaggered::Dslash(cudaColorSpinorField &out, const cudaColorSpinorField &in, 
			      const QudaParity parity) const
  {
    checkParitySpinor(in, out);

    staggered::setFace(face1, face2); // FIXME: temporary hack maintain C linkage for dslashCuda
    staggeredDslashCuda(&out, gauge, &in, parity, dagger, 0, 0, commDim, profile);
  
    flops += 570ll*in.Volume();
  }
Esempio n. 23
0
// Apply the even-odd preconditioned clover-improved Dirac operator
void DiracDomainWallPC::M(cudaColorSpinorField &out, const cudaColorSpinorField &in) const
{
  if ( in.Ndim() != 5 || out.Ndim() != 5) errorQuda("Wrong number of dimensions\n");
  double kappa2 = -kappa5*kappa5;

  bool reset = newTmp(&tmp1, in);

  if (matpcType == QUDA_MATPC_EVEN_EVEN) {
    Dslash(*tmp1, in, QUDA_ODD_PARITY);
    DslashXpay(out, *tmp1, QUDA_EVEN_PARITY, in, kappa2); 
  } else if (matpcType == QUDA_MATPC_ODD_ODD) {
    Dslash(*tmp1, in, QUDA_EVEN_PARITY);
    DslashXpay(out, *tmp1, QUDA_ODD_PARITY, in, kappa2); 
  } else {
    errorQuda("MatPCType %d not valid for DiracDomainWallPC", matpcType);
  }

  deleteTmp(&tmp1, reset);
}
Esempio n. 24
0
  void DiracStaggered::DslashXpay(cudaColorSpinorField &out, const cudaColorSpinorField &in, 
				  const QudaParity parity, const cudaColorSpinorField &x,
				  const double &k) const
  {    
    checkParitySpinor(in, out);

    initSpinorConstants(in, profile);
    setFace(face); // FIXME: temporary hack maintain C linkage for dslashCuda
    staggeredDslashCuda(&out, fatGauge, longGauge, &in, parity, dagger, &x, k, commDim, profile);
  
    flops += 1158ll*in.Volume();
  }
Esempio n. 25
0
void FaceBuffer::exchangeFacesWait(cudaColorSpinorField &out, int dagger, int dir)
{
  if(!commDimPartitioned(dir)){
    return;
  }
  
  comm_wait(recv_request2[dir]);  
  comm_wait(send_request2[dir]);
#ifndef GPU_DIRECT
  memcpy(fwd_nbr_spinor[dir], pageable_fwd_nbr_spinor[dir], nbytes[dir]);
#endif
  out.unpackGhost(fwd_nbr_spinor[dir], dir, QUDA_FORWARDS,  dagger, &stream[2*dir + recFwdStrmIdx]); CUERR;

  comm_wait(recv_request1[dir]);
  comm_wait(send_request1[dir]);

#ifndef GPU_DIRECT
  memcpy(back_nbr_spinor[dir], pageable_back_nbr_spinor[dir], nbytes[dir]);  
#endif
  out.unpackGhost(back_nbr_spinor[dir], dir, QUDA_BACKWARDS,  dagger, &stream[2*dir + recBackStrmIdx]); CUERR;
}
Esempio n. 26
0
void DiracWilson::Dslash(cudaColorSpinorField &out, const cudaColorSpinorField &in, 
			 const QudaParity parity) const
{
  initSpinorConstants(in);
  checkParitySpinor(in, out);
  checkSpinorAlias(in, out);

  setFace(face); // FIXME: temporary hack maintain C linkage for dslashCuda

  wilsonDslashCuda(&out, gauge, &in, parity, dagger, 0, 0.0, commDim);

  flops += 1320ll*in.Volume();
}
Esempio n. 27
0
  void DiracWilson::DslashXpay(cudaColorSpinorField &out, const cudaColorSpinorField &in, 
			       const QudaParity parity, const cudaColorSpinorField &x,
			       const double &k) const
  {
    initSpinorConstants(in, profile);
    checkParitySpinor(in, out);
    checkSpinorAlias(in, out);

    setFace(face1,face2); // FIXME: temporary hack maintain C linkage for dslashCuda

    wilsonDslashCuda(&out, gauge, &in, parity, dagger, &x, k, commDim, profile);

    flops += 1368ll*in.Volume();
  }
Esempio n. 28
0
void DiracDomainWall::Dslash(cudaColorSpinorField &out, const cudaColorSpinorField &in, 
			     const QudaParity parity) const
{
  if ( in.Ndim() != 5 || out.Ndim() != 5) errorQuda("Wrong number of dimensions\n");
  if (!initDslash) initDslashConstants(gauge, in.Stride());
  if (!initDomainWall) initDomainWallConstants(in.X(4));
  checkParitySpinor(in, out);
  checkSpinorAlias(in, out);
  
  domainWallDslashCuda(&out, gauge, &in, parity, dagger, 0, mass, 0, tuneDslash);

  long long Ls = in.X(4);
  long long bulk = (Ls-2)*(in.Volume()/Ls);
  long long wall = 2*in.Volume()/Ls;
  flops += 1320LL*(long long)in.Volume() + 96LL*bulk + 120LL*wall;
}
Esempio n. 29
0
void FaceBuffer::exchangeFacesWait(cudaColorSpinorField &out, int dagger, int dir, 
				   cudaEvent_t &scatterStart, struct timeval &commsEnd)
{
  int dim = dir/2;
  if(!commDimPartitioned(dim)) return;

  if (dir%2==0) {// receive from forwards
    // Scatter faces.
    QMP_finish_from_fwd(dim);

    gettimeofday(&commsEnd, NULL);

    // Record the start of the scattering
    CUDA_EVENT_RECORD(scatterStart, stream[2*dim+recFwdStrmIdx]);
    out.unpackGhost(from_fwd_face[dim], dim, QUDA_FORWARDS, dagger, &stream[2*dim+recFwdStrmIdx]); // 0, 2, 4, 6
  } else { // receive from backwards
    QMP_finish_from_back(dim);
    gettimeofday(&commsEnd, NULL);

    // Record the start of the scattering
    CUDA_EVENT_RECORD(scatterStart, stream[2*dim+recBackStrmIdx]);
    out.unpackGhost(from_back_face[dim], dim, QUDA_BACKWARDS, dagger, &stream[2*dim+recBackStrmIdx]); // 1, 3, 5, 7
  }
}
Esempio n. 30
0
  /** Applies the operator (A + k D) */
  void DiracClover::DslashXpay(cudaColorSpinorField &out, const cudaColorSpinorField &in, 
			       const QudaParity parity, const cudaColorSpinorField &x,
			       const double &k) const
  {
    initSpinorConstants(in);
    checkParitySpinor(in, out);
    checkSpinorAlias(in, out);

    setFace(face); // FIXME: temporary hack maintain C linkage for dslashCuda

    FullClover cs;
    cs.even = clover.even; cs.odd = clover.odd; cs.evenNorm = clover.evenNorm; cs.oddNorm = clover.oddNorm;
    cs.precision = clover.precision; cs.bytes = clover.bytes, cs.norm_bytes = clover.norm_bytes;
    asymCloverDslashCuda(&out, gauge, cs, &in, parity, dagger, &x, k, commDim);

    flops += (1320+504+48)*in.Volume();
  }