예제 #1
void DiracStaggered::MdagM(cudaColorSpinorField &out, const cudaColorSpinorField &in) const

  if (!initDslash){
    initDslashConstants(*fatGauge, in.Stride());
    initStaggeredConstants(*fatGauge, *longGauge);
  bool reset = newTmp(&tmp1, in);
  cudaColorSpinorField* mytmp = dynamic_cast<cudaColorSpinorField*>(&(tmp1->Even()));
  cudaColorSpinorField* ineven = dynamic_cast<cudaColorSpinorField*>(&(in.Even()));
  cudaColorSpinorField* inodd = dynamic_cast<cudaColorSpinorField*>(&(in.Odd()));
  cudaColorSpinorField* outeven = dynamic_cast<cudaColorSpinorField*>(&(out.Even()));
  cudaColorSpinorField* outodd = dynamic_cast<cudaColorSpinorField*>(&(out.Odd()));
  Dslash(*mytmp, *ineven, QUDA_ODD_PARITY);  
  DslashXpay(*outeven, *mytmp, QUDA_EVEN_PARITY, *ineven, 4*mass*mass);
  Dslash(*mytmp, *inodd, QUDA_EVEN_PARITY);  
  DslashXpay(*outodd, *mytmp, QUDA_ODD_PARITY, *inodd, 4*mass*mass);    

  deleteTmp(&tmp1, reset);
예제 #2
  void DiracStaggered::MdagM(ColorSpinorField &out, const ColorSpinorField &in) const
    bool reset = newTmp(&tmp1, in);
    Dslash(tmp1->Even(), in.Even(), QUDA_ODD_PARITY);  
    DslashXpay(out.Even(), tmp1->Even(), QUDA_EVEN_PARITY, in.Even(), 4*mass*mass);
    Dslash(tmp1->Even(), in.Odd(), QUDA_EVEN_PARITY);  
    DslashXpay(out.Odd(), tmp1->Even(), QUDA_ODD_PARITY, in.Odd(), 4*mass*mass);    

    deleteTmp(&tmp1, reset);
예제 #3
void DiracStaggeredPC::MdagM(cudaColorSpinorField &out, const cudaColorSpinorField &in) const
  if (!initDslash){
    initDslashConstants(*fatGauge, in.Stride());
    initStaggeredConstants(*fatGauge, *longGauge);
  bool reset = newTmp(&tmp1, in);
  QudaParity parity = QUDA_INVALID_PARITY;
  QudaParity other_parity = QUDA_INVALID_PARITY;
  if (matpcType == QUDA_MATPC_EVEN_EVEN) {
    parity = QUDA_EVEN_PARITY;
    other_parity = QUDA_ODD_PARITY;
  } else if (matpcType == QUDA_MATPC_ODD_ODD) {
    parity = QUDA_ODD_PARITY;
    other_parity = QUDA_EVEN_PARITY;
  } else {
    errorQuda("Invalid matpcType(%d) in function\n", matpcType);    
  Dslash(*tmp1, in, other_parity);  
  DslashXpay(out, *tmp1, parity, in, 4*mass*mass);

  deleteTmp(&tmp1, reset);
예제 #4
void DiracWilsonPC::M(cudaColorSpinorField &out, const cudaColorSpinorField &in) const
  double kappa2 = -kappa*kappa;

  bool reset = newTmp(&tmp1, in);

  if (matpcType == QUDA_MATPC_EVEN_EVEN) {
    Dslash(*tmp1, in, QUDA_ODD_PARITY);
    DslashXpay(out, *tmp1, QUDA_EVEN_PARITY, in, kappa2); 
  } else if (matpcType == QUDA_MATPC_ODD_ODD) {
    Dslash(*tmp1, in, QUDA_EVEN_PARITY);
    DslashXpay(out, *tmp1, QUDA_ODD_PARITY, in, kappa2); 
  } else {
    errorQuda("MatPCType %d not valid for DiracWilsonPC", matpcType);

  deleteTmp(&tmp1, reset);
예제 #5
  // Apply the even-odd preconditioned clover-improved Dirac operator
  void DiracDomainWallPC::M(ColorSpinorField &out, const ColorSpinorField &in) const
    if ( in.Ndim() != 5 || out.Ndim() != 5) errorQuda("Wrong number of dimensions\n");
    double kappa2 = -kappa5*kappa5;

    bool reset = newTmp(&tmp1, in);

    if (matpcType == QUDA_MATPC_EVEN_EVEN) {
      Dslash(*tmp1, in, QUDA_ODD_PARITY);
      DslashXpay(out, *tmp1, QUDA_EVEN_PARITY, in, kappa2); 
    } else if (matpcType == QUDA_MATPC_ODD_ODD) {
      Dslash(*tmp1, in, QUDA_EVEN_PARITY);
      DslashXpay(out, *tmp1, QUDA_ODD_PARITY, in, kappa2); 
    } else {
      errorQuda("MatPCType %d not valid for DiracDomainWallPC", matpcType);

    deleteTmp(&tmp1, reset);
예제 #6
void DiracOpWilson::MatDag(Vector *out, Vector *in) {
  char *fname = "MatDag(V*,V*)";

  int temp_size = GJP.VolNodeSites() * lat.FsiteSize() / 2;

  // points to the even part of fermion source 
  Vector *even_in = (Vector *) ( (IFloat *) in + temp_size );
  // points to the even part of fermion solution
  Vector *even_out = (Vector *) ( (IFloat *) out + temp_size );

  Dslash(out, even_in, CHKB_EVEN, DAG_YES);

  fTimesV1PlusV2((IFloat *)out, -(IFloat) kappa, (IFloat *)out,
    (IFloat *)in, temp_size);

  Dslash(even_out, in, CHKB_ODD, DAG_YES);
  fTimesV1PlusV2((IFloat *)even_out, -(IFloat) kappa, (IFloat *)even_out,
    (IFloat *)even_in, temp_size);
예제 #7
void DiracOpWilson::CalcHmdForceVecs(Vector *chi)
  char *fname = "CalcHmdForceVecs(V*)" ;
  VRB.Func(cname,fname) ;

  if (f_out == 0)
    ERR.Pointer(cname, fname, "f_out") ;

  if (f_in == 0)
    ERR.Pointer(cname, fname, "f_in") ;

// f_out stores (chi,rho), f_in stores (psi,sigma)

  Vector *chi_new, *rho, *psi, *sigma ;

  int f_size_cb = 12 * GJP.VolNodeSites() ;

  chi_new = f_out ;

  chi_new->CopyVec(chi, f_size_cb) ;

  psi = f_in ;

  MatPc(psi,chi) ;

  psi->VecTimesEquFloat(-kappa*kappa,f_size_cb) ;

  rho = (Vector *)((Float *)f_out + f_size_cb) ;

  Dslash(rho, chi, CHKB_ODD, DAG_NO) ;

  sigma = (Vector *)((Float *)f_in + f_size_cb) ;

  Dslash(sigma, psi, CHKB_ODD, DAG_YES) ;

  return ;
예제 #8
// Apply the even-odd preconditioned clover-improved Dirac operator
void DiracCloverPC::M(cudaColorSpinorField &out, const cudaColorSpinorField &in) const
  double kappa2 = -kappa*kappa;

  // FIXME: For asymmetric, a "DslashCxpay" kernel would improve performance.
  bool reset = newTmp(&tmp1, in);

    bool reset = newTmp(&tmp2, in);
    // DiracCloverPC::Dslash applies A^{-1}Dslash
    Dslash(*tmp1, in, QUDA_ODD_PARITY);
    Clover(*tmp2, in, QUDA_EVEN_PARITY);

    // DiracWilson::Dslash applies only Dslash
    DiracWilson::DslashXpay(out, *tmp1, QUDA_EVEN_PARITY, *tmp2, kappa2); 

  } else if (matpcType == QUDA_MATPC_ODD_ODD_ASYMMETRIC) {

    // FIXME: It would be nice if I could do something like: cudaColorSpinorField tmp3( in.param() );
    // to save copying the data from 'in'
    bool reset = newTmp(&tmp2, in);

    // DiracCloverPC::Dslash applies A^{-1}Dslash
    Dslash(*tmp1, in, QUDA_EVEN_PARITY);
    Clover(*tmp2, in, QUDA_ODD_PARITY);

    // DiracWilson::Dslash applies only Dslash
    DiracWilson::DslashXpay(out, *tmp1, QUDA_ODD_PARITY, *tmp2, kappa2);

  } else if (!dagger) { // symmetric preconditioning
    if (matpcType == QUDA_MATPC_EVEN_EVEN) {
      Dslash(*tmp1, in, QUDA_ODD_PARITY);
      DslashXpay(out, *tmp1, QUDA_EVEN_PARITY, in, kappa2); 
    } else if (matpcType == QUDA_MATPC_ODD_ODD) {
      Dslash(*tmp1, in, QUDA_EVEN_PARITY);
      DslashXpay(out, *tmp1, QUDA_ODD_PARITY, in, kappa2); 
    } else {
      errorQuda("Invalid matpcType");
  } else { // symmetric preconditioning, dagger
    if (matpcType == QUDA_MATPC_EVEN_EVEN) {
      CloverInv(out, in, QUDA_EVEN_PARITY); 
      Dslash(*tmp1, out, QUDA_ODD_PARITY);
      DiracWilson::DslashXpay(out, *tmp1, QUDA_EVEN_PARITY, in, kappa2); 
    } else if (matpcType == QUDA_MATPC_ODD_ODD) {
      CloverInv(out, in, QUDA_ODD_PARITY); 
      Dslash(*tmp1, out, QUDA_EVEN_PARITY);
      DiracWilson::DslashXpay(out, *tmp1, QUDA_ODD_PARITY, in, kappa2); 
    } else {
      errorQuda("MatPCType %d not valid for DiracCloverPC", matpcType);
  deleteTmp(&tmp1, reset);
예제 #9
// Apply the even-odd preconditioned clover-improved Dirac operator
void DiracCloverPC::M(cudaColorSpinorField &out, const cudaColorSpinorField &in) const
  double kappa2 = -kappa*kappa;

  // FIXME: For asymmetric, a "DslashCxpay" kernel would improve performance.
  bool reset = newTmp(&tmp1, in);

    Dslash(*tmp1, in, QUDA_ODD_PARITY);
    Clover(out, in, QUDA_EVEN_PARITY);
#ifdef MULTI_GPU // not safe to alias because of partial updates
    cudaColorSpinorField tmp3(in);
#else // safe since out is not read after writing
    cudaColorSpinorField &tmp3 = out; 
    DiracWilson::DslashXpay(out, *tmp1, QUDA_EVEN_PARITY, tmp3, kappa2); 
  } else if (matpcType == QUDA_MATPC_ODD_ODD_ASYMMETRIC) {
    Dslash(*tmp1, in, QUDA_EVEN_PARITY);
    Clover(out, in, QUDA_ODD_PARITY);
#ifdef MULTI_GPU // not safe to alias because of partial updates
    cudaColorSpinorField tmp3(in);
#else // safe since out is not read after writing
    cudaColorSpinorField &tmp3 = out; 
    DiracWilson::DslashXpay(out, *tmp1, QUDA_ODD_PARITY, tmp3, kappa2);
  } else if (!dagger) { // symmetric preconditioning
    if (matpcType == QUDA_MATPC_EVEN_EVEN) {
      Dslash(*tmp1, in, QUDA_ODD_PARITY);
      DslashXpay(out, *tmp1, QUDA_EVEN_PARITY, in, kappa2); 
    } else if (matpcType == QUDA_MATPC_ODD_ODD) {
      Dslash(*tmp1, in, QUDA_EVEN_PARITY);
      DslashXpay(out, *tmp1, QUDA_ODD_PARITY, in, kappa2); 
    } else {
      errorQuda("Invalid matpcType");
  } else { // symmetric preconditioning, dagger
    if (matpcType == QUDA_MATPC_EVEN_EVEN) {
      CloverInv(out, in, QUDA_EVEN_PARITY); 
      Dslash(*tmp1, out, QUDA_ODD_PARITY);
      DiracWilson::DslashXpay(out, *tmp1, QUDA_EVEN_PARITY, in, kappa2); 
    } else if (matpcType == QUDA_MATPC_ODD_ODD) {
      CloverInv(out, in, QUDA_ODD_PARITY); 
      Dslash(*tmp1, out, QUDA_EVEN_PARITY);
      DiracWilson::DslashXpay(out, *tmp1, QUDA_ODD_PARITY, in, kappa2); 
    } else {
      errorQuda("MatPCType %d not valid for DiracCloverPC", matpcType);
  deleteTmp(&tmp1, reset);
예제 #10
  void DiracStaggeredPC::MdagM(ColorSpinorField &out, const ColorSpinorField &in) const
    bool reset = newTmp(&tmp1, in);
    QudaParity parity = QUDA_INVALID_PARITY;
    QudaParity other_parity = QUDA_INVALID_PARITY;
    if (matpcType == QUDA_MATPC_EVEN_EVEN) {
      parity = QUDA_EVEN_PARITY;
      other_parity = QUDA_ODD_PARITY;
    } else if (matpcType == QUDA_MATPC_ODD_ODD) {
      parity = QUDA_ODD_PARITY;
      other_parity = QUDA_EVEN_PARITY;
    } else {
      errorQuda("Invalid matpcType(%d) in function\n", matpcType);    
    Dslash(*tmp1, in, other_parity);  
    DslashXpay(out, *tmp1, parity, in, 4*mass*mass);

    deleteTmp(&tmp1, reset);
예제 #11
  // Apply the even-odd preconditioned clover-improved Dirac operator
  void DiracCloverPC::M(cudaColorSpinorField &out, const cudaColorSpinorField &in) const
    double kappa2 = -kappa*kappa;
    bool reset1 = newTmp(&tmp1, in);

      // DiracCloverPC::Dslash applies A^{-1}Dslash
      Dslash(*tmp1, in, QUDA_ODD_PARITY);
      // DiracClover::DslashXpay applies (A - kappa^2 D)
      DiracClover::DslashXpay(out, *tmp1, QUDA_EVEN_PARITY, in, kappa2);
    } else if (matpcType == QUDA_MATPC_ODD_ODD_ASYMMETRIC) {
      // DiracCloverPC::Dslash applies A^{-1}Dslash
      Dslash(*tmp1, in, QUDA_EVEN_PARITY);
      // DiracClover::DslashXpay applies (A - kappa^2 D)
      DiracClover::DslashXpay(out, *tmp1, QUDA_ODD_PARITY, in, kappa2);
    } else if (!dagger) { // symmetric preconditioning
      if (matpcType == QUDA_MATPC_EVEN_EVEN) {
	Dslash(*tmp1, in, QUDA_ODD_PARITY);
	DslashXpay(out, *tmp1, QUDA_EVEN_PARITY, in, kappa2); 
      } else if (matpcType == QUDA_MATPC_ODD_ODD) {
	Dslash(*tmp1, in, QUDA_EVEN_PARITY);
	DslashXpay(out, *tmp1, QUDA_ODD_PARITY, in, kappa2); 
      } else {
	errorQuda("Invalid matpcType");
    } else { // symmetric preconditioning, dagger
      if (matpcType == QUDA_MATPC_EVEN_EVEN) {
	CloverInv(out, in, QUDA_EVEN_PARITY); 
	Dslash(*tmp1, out, QUDA_ODD_PARITY);
	DiracWilson::DslashXpay(out, *tmp1, QUDA_EVEN_PARITY, in, kappa2); 
      } else if (matpcType == QUDA_MATPC_ODD_ODD) {
	CloverInv(out, in, QUDA_ODD_PARITY); 
	Dslash(*tmp1, out, QUDA_EVEN_PARITY);
	DiracWilson::DslashXpay(out, *tmp1, QUDA_ODD_PARITY, in, kappa2); 
      } else {
	errorQuda("MatPCType %d not valid for DiracCloverPC", matpcType);
    deleteTmp(&tmp1, reset1);
예제 #12
// int MatInv(Vector *out, Vector *in, 
//            Float *true_res, PreserveType prs_in);
// The inverse of the unconditioned Dirac Operator 
// using Conjugate gradient.
// If true_res !=0 the value of the true residual is returned
// in true_res.
// *true_res = |src - MatPcDagMatPc * sol| / |src|
// prs_in is used to specify if the source
// in should be preserved or not. If not the memory usage
// is less by half the size of a fermion vector.
// The function returns the total number of CG iterations.
int DiracOpWilson::MatInv(Vector *out, 
			  Vector *in, 
			  Float *true_res,
			  PreserveType prs_in) {
  char *fname = "MatInv(V*,V*,F*)";
  Vector *temp2;

  int temp_size = GJP.VolNodeSites() * lat.FsiteSize() / 2;

  // check out if converted
  //for (int ii = 0; ii < 2 * temp_size; ii++) {
  //  VRB.Result(cname, fname, "in[%d] = %e\n", ii, 
  //  *((Float *)in + ii));
  //  VRB.Result(cname, fname, "out[%d] = %e\n", ii, 
  //  *((Float *)out + ii));

  Vector *temp = (Vector *) smalloc(temp_size * sizeof(Float));
  if (temp == 0) ERR.Pointer(cname, fname, "temp");
  VRB.Smalloc(cname,fname, "temp", temp, temp_size * sizeof(Float));

  if(prs_in == PRESERVE_YES){
    temp2 = (Vector *) smalloc(2*temp_size * sizeof(Float));
    if (temp2 == 0) ERR.Pointer(cname, fname, "temp2");
    VRB.Smalloc(cname,fname, "temp2", temp2, temp_size * sizeof(Float));
  // save source
  if(prs_in == PRESERVE_YES){
    moveMem((Float *)temp2, (Float *)in, 2*temp_size*sizeof(Float));

#if 0
  IFloat *temp_p = (IFloat *)in;
  for(int ii = 0; ii< GJP.VolNodeSites();ii++){
    for(int jj = 0; jj< lat.FsiteSize();jj++){
      if (fabs(*temp_p)>1e-7){
        printf("i=%d j=%d\n",ii,jj);

  // points to the even part of fermion source 
  Vector *even_in = (Vector *) ( (Float *) in + temp_size );

  // points to the even part of fermion solution
  Vector *even_out = (Vector *) ( (Float *) out + temp_size );

  Dslash(temp, even_in, CHKB_EVEN, DAG_NO);

  fTimesV1PlusV2((Float *)temp, (Float) kappa, (Float *)temp,
    (Float *)in, temp_size);

#if 0
  IFloat *temp_p = (IFloat *)temp;
  for(int ii = 0; ii< GJP.VolNodeSites();ii++){
    for(int jj = 0; jj< lat.FsiteSize();jj++){
      if (fabs(*temp_p)>1e-7){
        printf("i=%d j=%d\n",ii,jj);

  int iter;
  switch (dirac_arg->Inverter) {
  case CG:
    MatPcDag(in, temp);
    iter = InvCg(out,in,true_res);
  case BICGSTAB:
    iter = BiCGstab(out,temp,0.0,dirac_arg->bicgstab_n,true_res);
    ERR.General(cname,fname,"InverterType %d not implemented\n",

  Dslash(temp, out, CHKB_ODD, DAG_NO);

  fTimesV1PlusV2((Float *)even_out, (Float) kappa, (Float *)temp,
    (Float *) even_in, temp_size);

  VRB.Sfree(cname, fname, "temp", temp);

  // restore source
  if(prs_in == PRESERVE_YES){
    moveMem((Float *)in, (Float *)temp2, 2*temp_size*sizeof(Float));

#if 0
  IFloat *temp_p = (IFloat *)in;
  for(int ii = 0; ii< GJP.VolNodeSites();ii++){
    for(int jj = 0; jj< lat.FsiteSize();jj++){
      if (fabs(*temp_p)>1e-7){
        printf("i=%d j=%d\n",ii,jj);

#if 0
  IFloat *temp_p = (IFloat *)temp2;
  for(int ii = 0; ii< GJP.VolNodeSites();ii++){
    for(int jj = 0; jj< lat.FsiteSize();jj++){
      if (fabs(*temp_p)>1e-7){
        printf("i=%d j=%d\n",ii,jj);

  if(prs_in == PRESERVE_YES){
    VRB.Sfree(cname, fname, "temp2", temp2);

  return iter;
예제 #13
// int MatInv(Vector *out, Vector *in, 
//            Float *true_res, PreserveType prs_in);
// The inverse of the unconditioned Dirac Operator 
// using Conjugate gradient.
// If true_res !=0 the value of the true residual is returned
// in true_res.
// *true_res = |src - MatPcDagMatPc * sol| / |src|
// prs_in is used to specify if the source
// in should be preserved or not. If not the memory usage
// is less by half the size of a fermion vector.
// The function returns the total number of CG iterations.
int DiracOpWilson::MatInv(Vector *out, 
			  Vector *in, 
			  Float *true_res,
			  PreserveType prs_in) {
  char *fname = "MatInv(V*,V*,F*)";
  Vector *temp2;

  int temp_size = GJP.VolNodeSites() * lat.FsiteSize() / 2;

  // check out if converted
  //for (int ii = 0; ii < 2 * temp_size; ii++) {
  //  VRB.Result(cname, fname, "in[%d] = %e\n", ii, 
  //  *((IFloat *)in + ii));
  //  VRB.Result(cname, fname, "out[%d] = %e\n", ii, 
  //  *((IFloat *)out + ii));

  Vector *temp = (Vector *) smalloc(temp_size * sizeof(Float));
  if (temp == 0) ERR.Pointer(cname, fname, "temp");
  VRB.Smalloc(cname,fname, "temp", temp, temp_size * sizeof(Float));

  if(prs_in == PRESERVE_YES){
    temp2 = (Vector *) smalloc(temp_size * sizeof(Float));
    if (temp2 == 0) ERR.Pointer(cname, fname, "temp2");
    VRB.Smalloc(cname,fname, "temp2", temp2, temp_size * sizeof(Float));

  // points to the even part of fermion source 
  Vector *even_in = (Vector *) ( (IFloat *) in + temp_size );

  // points to the even part of fermion solution
  Vector *even_out = (Vector *) ( (IFloat *) out + temp_size );

  Dslash(temp, even_in, CHKB_EVEN, DAG_NO);

  fTimesV1PlusV2((IFloat *)temp, (IFloat) kappa, (IFloat *)temp,
    (IFloat *)in, temp_size);

  // save source
  if(prs_in == PRESERVE_YES){
    moveMem((IFloat *)temp2, (IFloat *)in, 
		temp_size * sizeof(IFloat) / sizeof(char));

  MatPcDag(in, temp);

  int iter = InvCg(out,in,true_res);

  // restore source
  if(prs_in == PRESERVE_YES){
    moveMem((IFloat *)in, (IFloat *)temp2, 
		temp_size * sizeof(IFloat) / sizeof(char));

  Dslash(temp, out, CHKB_ODD, DAG_NO);

  fTimesV1PlusV2((IFloat *)even_out, (IFloat) kappa, (IFloat *)temp,
    (IFloat *) even_in, temp_size);

  VRB.Sfree(cname, fname, "temp", temp);

  if(prs_in == PRESERVE_YES){
    VRB.Sfree(cname, fname, "temp2", temp2);

  return iter;