Example #1
0
/* Returns the projected matrix A and the error of each eigenvector */
void
constructArray_qdp(QDP_ColorVector **eigVec, Matrix *A, QLA_Real *err,
		   QDP_Subset subset)
{
  QLA_Complex cc;
  QLA_Real rr;
  QDP_ColorVector *res, *grad;
  int i, j, Nvecs;

  Nvecs = A->N;
  res = QDP_create_V();
  grad = QDP_create_V();

  for(i=0; i<Nvecs; i++) {
    Matrix_Vec_mult_qdp(eigVec[i], res, subset);
    QDP_r_eq_re_V_dot_V(&rr, res, eigVec[i], subset);
    A->M[i][i].real = rr;
    A->M[i][i].imag = 0.0;
    QDP_V_eq_V(grad, res, subset);
    QDP_V_meq_r_times_V(grad, &rr, eigVec[i], subset);
    QDP_r_eq_norm2_V(&rr, grad, subset);
    err[i] = sqrt(rr);
    for(j=i+1; j<Nvecs; j++) {
      QDP_c_eq_V_dot_V(&cc, res, eigVec[j], subset);
      A->M[i][j].real = QLA_real(cc);
      A->M[i][j].imag = QLA_imag(cc);
      A->M[j][i].real = QLA_real(cc);
      A->M[j][i].imag = -QLA_imag(cc);
    }
  }

  QDP_destroy_V(grad);
  QDP_destroy_V(res);
}
Example #2
0
/* deallocates the tags and the temporaries the Matrix_Vec_mult needs */
void
cleanup_Matrix()
{
  int i;

  QDP_destroy_V(temp0);
  for(i=0; i<16; i++) {
    QDP_destroy_V(temp1[i]);
    QDP_destroy_V(temp2[i]);
  }
}
Example #3
0
void
RotateBasis_qdp(QDP_ColorVector **eigVec, Matrix *V, QDP_Subset subset)
{
  QLA_Complex z;
  QDP_ColorVector **Tmp;
  int i, j, N;

  N = V->N;
  /* Allocate the temporary vectors needed */
  Tmp = malloc(N*sizeof(QDP_ColorVector *));
  for(i=0; i<N; i++) Tmp[i] = QDP_create_V();

  for(i=0; i<N; i++) {
    QDP_V_eq_zero(Tmp[i], subset);
    for(j=0; j<N; j++) {
      QLA_real(z) = V->M[j][i].real;
      QLA_imag(z) = V->M[j][i].imag;
      QDP_V_peq_c_times_V(Tmp[i], &z, eigVec[j], subset);
    }
  }

  /* Copy rotated basis to the eigVec and free temporaries */
  for(i=0; i<N; i++) {
    QDP_V_eq_V(eigVec[i], Tmp[i], subset);
    normalize_qdp(eigVec[i], subset);
    QDP_destroy_V(Tmp[i]);
  }
  free(Tmp);
}
Example #4
0
int
Kalkreuter(su3_vector **eigVec, double *eigVal, Real Tolerance,
	   Real RelTol, int Nvecs, int MaxIter,
	   int Restart, int Kiters, int parity,
	   ferm_links_t *fn)
{
  //QLA_Real *ev;
  QDP_ColorVector **vec;
  QDP_Subset subset;
  int i, its;
  su3_matrix *t_fatlink;
  su3_matrix *t_longlink;

#ifdef DEBUG
  if(QDP_this_node==0) printf("begin Kalkreuter\n");
#endif

  if(parity==EVEN) subset = QDP_even;
  else if(parity==ODD) subset = QDP_odd;
  else subset = QDP_all;

  t_longlink = fn->lng;
  t_fatlink = fn->fat;

  set4_M_from_field(FATLINKS, t_fatlink, EVENANDODD);
  set4_M_from_field(LONGLINKS, t_longlink, EVENANDODD);

  //ev = malloc(Nvecs*sizeof(QLA_Real));
  vec = malloc(Nvecs*sizeof(QDP_ColorVector *));
  for(i=0; i<Nvecs; i++) {
    vec[i] = QDP_create_V();
    //ev[i] = eigVal[i];
    set_V_from_field(vec[i], eigVec[i],EVENANDODD);
  }

  its = Kalkreuter_qdp(vec, eigVal, Tolerance, RelTol, Nvecs, MaxIter, Restart,
		       Kiters, subset);

  for(i=0; i<Nvecs; i++) {
    //eigVal[i] = ev[i];
    set_field_from_V(eigVec[i], vec[i],EVENANDODD);
    QDP_destroy_V(vec[i]);
  }
  free(vec);
  //free(ev);
#ifdef DEBUG
  if(QDP_this_node==0) printf("end Kalkreuter\n");
#endif
  return its;
}
void
QOP_asqtad_force_multi_asvec_qdp(QOP_info_t *info, QDP_ColorMatrix *links[],
				 QDP_ColorMatrix *force[], QOP_asqtad_coeffs_t *coef,
				 REAL eps[], QDP_ColorVector *xin[], int nsrc)
{
#define NC QDP_get_nc(xin[0])
  REAL coeff[nsrc];
  REAL OneLink[nsrc], Lepage[nsrc], Naik[nsrc], FiveSt[nsrc], ThreeSt[nsrc], SevenSt[nsrc];
  REAL mNaik[nsrc], mLepage[nsrc], mFiveSt[nsrc], mThreeSt[nsrc], mSevenSt[nsrc];

  QDP_ColorVector *P3[8][nsrc];

  QDP_ColorVector *P5[8][nsrc];
  QDP_ColorVector *P5tmp[8][8][nsrc];
  QDP_ColorVector *P5s[4][nsrc];
  QDP_ColorVector *P5tmps[4][8][nsrc];

  //QDP_ColorVector *xin[nsrc];
  QDP_ColorVector *xintmp[8][nsrc];
  QDP_ColorVector *Pmu[nsrc];
  QDP_ColorVector *Pmutmp[8][nsrc];
  QDP_ColorVector *Pnumu[nsrc];
  QDP_ColorVector *Pnumutmp[8][nsrc];
  QDP_ColorVector *Prhonumu[nsrc];
  QDP_ColorVector *Prhonumutmp[8][nsrc];
  QDP_ColorVector *P7[nsrc];
  QDP_ColorVector *P7tmp[8][nsrc];
  QDP_ColorVector *P7rho[nsrc];
  QDP_ColorVector *ttv[nsrc];

  int i, dir;
  int mu, nu, rho, sig;

  double nflop1 = 253935;
  double nflop2 = 433968;
  double nflop = nflop1 + (nflop2-nflop1)*(nsrc-1);
  double dtime;
  dtime = -QOP_time();

  ASQTAD_FORCE_BEGIN;

  QOP_trace("test 1\n");
  /* setup parallel transport */
  QDP_ColorMatrix *tmpmat = QDP_create_M();
  for(i=0; i<QOP_common.ndim; i++) {
    fbshift[i] = QDP_neighbor[i];
    fbshiftdir[i] = QDP_forward;
    fblink[i] = links[i];
    fbshift[OPP_DIR(i)] = QDP_neighbor[i];
    fbshiftdir[OPP_DIR(i)] = QDP_backward;
    fblink[OPP_DIR(i)] = QDP_create_M();
    QDP_M_eq_sM(tmpmat, fblink[i], QDP_neighbor[i], QDP_backward, QDP_all);
    QDP_M_eq_Ma(fblink[OPP_DIR(i)], tmpmat, QDP_all);
  }

  tv = ttv;
  for(i=0; i<nsrc; i++) {
    tv[i] = QDP_create_V();
  }

  QOP_trace("test 2\n");
  /* Allocate temporary vectors */
  for(i=0; i<nsrc; i++) {
    Pmu[i] = QDP_create_V();
    Pnumu[i] = QDP_create_V();
    Prhonumu[i] = QDP_create_V();
    P7[i] = QDP_create_V();
    P7rho[i] = QDP_create_V();
    for(dir=0; dir<8; dir++) {
      xintmp[dir][i] = QDP_create_V();
      Pmutmp[dir][i] = QDP_create_V();
      Pnumutmp[dir][i] = QDP_create_V();
      Prhonumutmp[dir][i] = QDP_create_V();
      P7tmp[dir][i] = QDP_create_V();
    }
#if 1
    for(mu=0; mu<4; mu++) {
      P5s[mu][i] = QDP_create_V();
      for(dir=0; dir<8; dir++) {
	P5tmps[mu][dir][i] = QDP_create_V();
      }
    }
#else
    for(mu=0; mu<8; mu++) {
      P5[mu][i] = QDP_create_V();
      for(dir=0; dir<8; dir++) {
	P5tmp[mu][dir][i] = QDP_create_V();
	//printf("%p %p\n", P5tmp[mu][dir][i], &(P5tmp[mu][dir][i])); fflush(stdout);
	if(P5tmp[mu][dir][i]==NULL) {
	  fprintf(stderr, "error: can't create V\n");
	  QDP_abort();
	}
      }
    }
#endif
  }
  //printf("%p\n", P5tmp[0][4][0]); fflush(stdout);

  for(mu=0; mu<8; mu++) {
    for(i=0; i<nsrc; i++) {
      P3[mu][i] = QDP_create_V();
      //P5[mu][i] = QDP_create_V();
    }
  }

  for(mu=0; mu<4; mu++) {
    tempmom_qdp[mu] = force[mu];
    QDP_M_eqm_M(tempmom_qdp[mu], tempmom_qdp[mu], QDP_odd);
  }

  /* Path coefficients times fermion epsilon */
  /* Load path coefficients from table */
  for(i=0; i<nsrc; i++) {
    OneLink[i] = coef->one_link     * eps[i];
    Naik[i]    = coef->naik         * eps[i]; mNaik[i]    = -Naik[i];
    ThreeSt[i] = coef->three_staple * eps[i]; mThreeSt[i] = -ThreeSt[i];
    FiveSt[i]  = coef->five_staple  * eps[i]; mFiveSt[i]  = -FiveSt[i];
    SevenSt[i] = coef->seven_staple * eps[i]; mSevenSt[i] = -SevenSt[i];
    Lepage[i]  = coef->lepage       * eps[i]; mLepage[i]  = -Lepage[i];
  }

#if 0
  printf("nsrc = %i\n", nsrc);
  printf("coeffs = %g %g %g %g %g %g\n", OneLink[0], ThreeSt[0], FiveSt[0],
	 SevenSt[0], Lepage[0], Naik[0]);
#endif

  /* *************************************** */

  QOP_trace("start force loop\n");
  for(mu=0; mu<8; mu++) {
    //u_shift_hw_fermion(temp_x_qdp, Pmu, OPP_DIR(mu), temp_hw[OPP_DIR(mu)]);
    u_shift_color_vecs(xin, Pmu, OPP_DIR(mu), nsrc, xintmp[OPP_DIR(mu)]);

    for(sig=0; sig<8; sig++) if( (sig!=mu)&&(sig!=OPP_DIR(mu)) ) {
      //u_shift_hw_fermion(Pmu, P3[sig], sig, temp_hw[sig]);
      u_shift_color_vecs(Pmu, P3[sig], sig, nsrc, Pmutmp[sig]);

      if(GOES_FORWARDS(sig)) {
	/* Add the force F_sig[x+mu]:         x--+             *
	 *                                   |   |             *
	 *                                   o   o             *
	 * the 1 link in the path: - (numbering starts form 0) */
	add_forces_to_mom(P3[sig], Pmu, sig, mThreeSt, nsrc);
      }
    }

    for(nu=0; nu<8; nu++) if( (nu!=mu)&&(nu!=OPP_DIR(mu)) ) {
      int nP5 = 0;
      //Pnumu = hw_qdp[OPP_DIR(nu)];
      //u_shift_hw_fermion(Pmu, Pnumu, OPP_DIR(nu), temp_hw[OPP_DIR(nu)]);
      u_shift_color_vecs(Pmu, Pnumu, OPP_DIR(nu), nsrc, Pmutmp[OPP_DIR(nu)]);
      //QDP_V_veq_V(Pnumu, P3[OPP_DIR(nu)], QDP_all, nsrc);
      for(sig=0; sig<8; sig++) if( (sig!=mu)&&(sig!=OPP_DIR(mu)) &&
				   (sig!=nu)&&(sig!=OPP_DIR(nu)) ) {
#if 1
	for(i=0; i<nsrc; i++) {
	  P5[sig][i] = P5s[nP5][i];
	  for(dir=0; dir<8; dir++) P5tmp[sig][dir][i] = P5tmps[nP5][dir][i];
	}
#endif
	nP5++;
	//u_shift_hw_fermion(Pnumu, P5[sig], sig, temp_hw[sig]);
	u_shift_color_vecs(Pnumu, P5[sig], sig, nsrc, Pnumutmp[sig]);

	if(GOES_FORWARDS(sig)) {
	  /* Add the force F_sig[x+mu+nu]:      x--+             *
	   *                                   |   |             *
	   *                                   o   o             *
	   * the 2 link in the path: + (numbering starts form 0) */
	  add_forces_to_mom(P5[sig], Pnumu, sig, FiveSt, nsrc);
	}
      }
      QOP_trace("test 4\n");
      for(rho=0; rho<8; rho++) if( (rho!=mu)&&(rho!=OPP_DIR(mu)) &&
				   (rho!=nu)&&(rho!=OPP_DIR(nu)) ) {
	//Prhonumu = hw_qdp[OPP_DIR(rho)];
	//u_shift_hw_fermion(Pnumu, Prhonumu, OPP_DIR(rho), 
	//		 temp_hw[OPP_DIR(rho)] );
	  u_shift_color_vecs(Pnumu, Prhonumu, OPP_DIR(rho), nsrc,
			     Pnumutmp[OPP_DIR(rho)]);
	  //QDP_V_veq_V(Prhonumu, P5[OPP_DIR(rho)], QDP_all, nsrc);
	for(sig=0; sig<8; sig++) if( (sig!=mu )&&(sig!=OPP_DIR(mu )) &&
				     (sig!=nu )&&(sig!=OPP_DIR(nu )) &&
				     (sig!=rho)&&(sig!=OPP_DIR(rho)) ) {
	  /* Length 7 paths */
	  //P7 = hw_qdp[sig];
	  //u_shift_hw_fermion(Prhonumu, P7, sig, temp_hw[sig] );
  QOP_trace("test 43\n");
	  u_shift_color_vecs(Prhonumu, P7, sig, nsrc, Prhonumutmp[sig]);
  QOP_trace("test 44\n");
	  //QDP_V_eq_r_times_V(P7[0], &SevenSt[0], P7[0], QDP_all);
	  //QDP_V_eq_r_times_V(P7[1], &SevenSt[1], P7[1], QDP_all);
	  if(GOES_FORWARDS(sig)) {
	    /* Add the force F_sig[x+mu+nu+rho]:  x--+             *
	     *                                   |   |             *
	     *                                   o   o             *
	     * the 3 link in the path: - (numbering starts form 0) */
  QOP_trace("test 45\n");
	    add_forces_to_mom(P7, Prhonumu, sig, mSevenSt, nsrc);
  QOP_trace("test 46\n");
	    //mom_meq_force(P7, Prhonumu, sig);
	  }
	  /* Add the force F_rho the 2(4) link in the path: +     */
	  //P7rho = hw_qdp[rho];
	  //u_shift_hw_fermion(P7, P7rho, rho, temp_hw[rho]);
  QOP_trace("test 47\n");
	  u_shift_color_vecs(P7, P7rho, rho, nsrc, P7tmp[rho]);
  QOP_trace("test 48\n");
	  side_link_forces(rho,sig,SevenSt,Pnumu,P7,Prhonumu,P7rho, nsrc);
  QOP_trace("test 49\n");
	  //side_link_3f_force2(rho,sig,Pnumu,P7,Prhonumu,P7rho);
	  /* Add the P7rho vector to P5 */
	  for(i=0; i<nsrc; i++) {
	    if(FiveSt[i]!=0) coeff[i] = SevenSt[i]/FiveSt[i];
	    else coeff[i] = 0;
  QOP_trace("test 410\n");
	    QDP_V_peq_r_times_V(P5[sig][i], &coeff[i], P7rho[i], QDP_all);
  QOP_trace("test 411\n");
	  }
	} /* sig */
      } /* rho */
  QOP_trace("test 5\n");
#define P5nu P7
      for(sig=0; sig<8; sig++) if( (sig!=mu)&&(sig!=OPP_DIR(mu)) &&
				   (sig!=nu)&&(sig!=OPP_DIR(nu)) ) {
	/* Length 5 paths */
	/* Add the force F_nu the 1(3) link in the path: -     */
	//P5nu = hw_qdp[nu];
	//u_shift_hw_fermion(P5[sig], P5nu, nu, temp_hw[nu]);
	u_shift_color_vecs(P5[sig], P5nu, nu, nsrc, P5tmp[sig][nu]);
	side_link_forces(nu, sig, mFiveSt, Pmu, P5[sig], Pnumu, P5nu, nsrc);
	/* Add the P5nu vector to P3 */
	for(i=0; i<nsrc; i++) {
	  if(ThreeSt[i]!=0) coeff[i] = FiveSt[i]/ThreeSt[i]; 
	  else coeff[i] = 0;
	  QDP_V_peq_r_times_V(P3[sig][i], &coeff[i], P5nu[i], QDP_all);
	}
      } /* sig */
    } /* nu */

#define Pmumu Pnumu
#define Pmumutmp Pnumutmp
#define P5sig Prhonumu
#define P5sigtmp Prhonumutmp
#define P3mu P7
#define Popmu P7
#define Pmumumu P7
    /* Now the Lepage term... It is the same as 5-link paths with
       nu=mu and FiveSt=Lepage. */
    //u_shift_hw_fermion(Pmu, Pmumu, OPP_DIR(mu), temp_hw[OPP_DIR(mu)] );
    u_shift_color_vecs(Pmu, Pmumu, OPP_DIR(mu), nsrc, Pmutmp[OPP_DIR(mu)]);

    for(sig=0; sig<8; sig++) if( (sig!=mu)&&(sig!=OPP_DIR(mu)) ) {
      //P5sig = hw_qdp[sig];
      //u_shift_hw_fermion(Pmumu, P5sig, sig, temp_hw[sig]);
      u_shift_color_vecs(Pmumu, P5sig, sig, nsrc, Pmumutmp[sig]);
      if(GOES_FORWARDS(sig)) {
	/* Add the force F_sig[x+mu+nu]:      x--+             *
	 *                                   |   |             *
	 *                                   o   o             *
	 * the 2 link in the path: + (numbering starts form 0) */
	add_forces_to_mom(P5sig, Pmumu, sig, Lepage, nsrc);
      }
      /* Add the force F_nu the 1(3) link in the path: -     */
      //P5nu = hw_qdp[mu];
      //u_shift_hw_fermion(P5sig, P5nu, mu, temp_hw[mu]);
      u_shift_color_vecs(P5sig, P5nu, mu, nsrc, P5sigtmp[mu]);
      side_link_forces(mu, sig, mLepage, Pmu, P5sig, Pmumu, P5nu, nsrc);
      /* Add the P5nu vector to P3 */
      for(i=0; i<nsrc; i++) {
	if(ThreeSt[i]!=0) coeff[i] = Lepage[i]/ThreeSt[i];
	else coeff[i] = 0;
	QDP_V_peq_r_times_V(P3[sig][i], &coeff[i], P5nu[i], QDP_all);
      }

      /* Length 3 paths (Not the Naik term) */
      /* Add the force F_mu the 0(2) link in the path: +     */
      if(GOES_FORWARDS(mu)) {
	//P3mu = hw_qdp[mu];  /* OK to clobber P5nu */
	//u_shift_hw_fermion(P3[sig], P3mu, mu, temp_hw[mu]);
	//u_shift_color_vecs(P3[sig], P3mu, mu, 2, temp_hw[mu]);
	for(i=0; i<nsrc; i++) {
	  QDP_V_eq_V(P5sig[i], P3[sig][i], QDP_all);
	}
	u_shift_color_vecs(P5sig, P3mu, mu, nsrc, P5sigtmp[mu]);
      }
      /* The above shift is not needed if mu is backwards */
      side_link_forces(mu, sig, ThreeSt, xin, P3[sig], Pmu, P3mu, nsrc);
    }

    /* Finally the OneLink and the Naik term */
    if(GOES_BACKWARDS(mu)) {
      /* Do only the forward terms in the Dslash */
      /* Because I have shifted with OPP_DIR(mu) Pmu is a forward *
       * shift.                                                   */
      /* The one link */
      add_forces_to_mom(Pmu, xin, OPP_DIR(mu), OneLink, nsrc);
      /* For the same reason Pmumu is the forward double link */

      /* Popmu is a backward shift */
      //Popmu = hw_qdp[mu]; /* OK to clobber P3mu */
      //u_shift_hw_fermion(xin, Popmu, mu, temp_hw[mu]);
      u_shift_color_vecs(xin, Popmu, mu, nsrc, xintmp[mu]);
      /* The Naik */
      /* link no 1: - */
      add_forces_to_mom(Pmumu, Popmu, OPP_DIR(mu), mNaik, nsrc);
      /* Pmumumu can overwrite Popmu which is no longer needed */
      //Pmumumu = hw_qdp[OPP_DIR(mu)];
      //u_shift_hw_fermion(Pmumu, Pmumumu, OPP_DIR(mu), temp_hw[OPP_DIR(mu)]);
      u_shift_color_vecs(Pmumu, Pmumumu, OPP_DIR(mu), nsrc, Pmumutmp[OPP_DIR(mu)]);
      /* link no 0: + */
      add_forces_to_mom(Pmumumu, xin, OPP_DIR(mu), Naik, nsrc);
    } else {
      /* The rest of the Naik terms */
      //Popmu = hw_qdp[mu]; /* OK to clobber P3mu */
      //u_shift_hw_fermion(xin, Popmu, mu, temp_hw[mu]);
      u_shift_color_vecs(xin, Popmu, mu, nsrc, xintmp[mu]);
      /* link no 2: + */
      /* Pmumu is double backward shift */
      add_forces_to_mom(Popmu, Pmumu, mu, Naik, nsrc);
    }
    /* Here we have to do together the Naik term and the one link term */

  }/* mu */
  QOP_trace("test 6\n");
  QOP_trace("test 7\n");

  for(mu=0; mu<4; mu++) {
    QDP_M_eq_M(tmpmat, tempmom_qdp[mu], QDP_even);
    QDP_M_eqm_M(tmpmat, tempmom_qdp[mu], QDP_odd);
    QDP_M_eq_antiherm_M(tempmom_qdp[mu], tmpmat, QDP_all);
  }
  QDP_destroy_M(tmpmat);

  //printf("%p\n", P5tmp[0][4][0]); fflush(stdout);
  //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); }
  /* Free temporary vectors */
  for(i=0; i<nsrc; i++) {
    QDP_destroy_V(Pmu[i]);
    QDP_destroy_V(Pnumu[i]);
    QDP_destroy_V(Prhonumu[i]);
    QDP_destroy_V(P7[i]);
    QDP_destroy_V(P7rho[i]);
    //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); }
    for(dir=0; dir<8; dir++) {
      QDP_destroy_V(xintmp[dir][i]);
      QDP_destroy_V(Pmutmp[dir][i]);
      QDP_destroy_V(Pnumutmp[dir][i]);
      QDP_destroy_V(Prhonumutmp[dir][i]);
      QDP_destroy_V(P7tmp[dir][i]);
    }
    //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); }
    for(mu=0; mu<4; mu++) {
      //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); }
      QDP_destroy_V(P5s[mu][i]);
      //QDP_destroy_V(P5[mu][i]);
      //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); }
      for(dir=0; dir<8; dir++) {
	//if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); }
	QDP_destroy_V(P5tmps[mu][dir][i]);
	//printf("%p\n", P5tmp[mu][dir][i]); fflush(stdout);
	//QDP_destroy_V(P5tmp[mu][dir][i]);
	//if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); }
      }
      //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); }
    }
    //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); }
  }

  //if(QDP_this_node==0) { printf("here3\n"); fflush(stdout); }
  for(mu=0; mu<8; mu++) {
    for(i=0; i<nsrc; i++) {
      QDP_destroy_V(P3[mu][i]);
    }
    //QDP_destroy_V(P5[mu][0]);
    //QDP_destroy_V(P5[mu][1]);
  }

  for(i=0; i<nsrc; i++) {
    QDP_destroy_V(tv[i]);
  }

  //if(QDP_this_node==0) { printf("here4\n"); fflush(stdout); }
  for(i=4; i<8; i++) {
    QDP_destroy_M(fblink[i]);
  }

  dtime += QOP_time();
  info->final_sec = dtime;
  info->final_flop = nflop*QDP_sites_on_node;
  info->status = QOP_SUCCESS;

  ASQTAD_FORCE_END;
#undef NC
}
Example #6
0
void
asqtadInvert(QOP_info_t *info, QOP_FermionLinksAsqtad *fla,
	     QOP_invert_arg_t *invarg, QOP_resid_arg_t *residarg[],
	     QLA_Real *masses, int nm, QDP_ColorVector *prop[],
	     QDP_ColorVector *source)
{
#define NC QDP_get_nc(source)
  QDP_Subset sub=QDP_all;
  if(invarg->evenodd==QOP_EVEN) {
    sub = QDP_even;
  } else if(invarg->evenodd==QOP_ODD) {
    sub = QDP_odd;
  }
  QOP_F_FermionLinksAsqtad *ffla = QOP_FD_asqtad_create_L_from_L(fla);
  QLA_F_Real m[nm];
  QDP_F_ColorVector *x[nm], *b;
  for(int i=0; i<nm; i++) {
    m[i] = masses[i];
    x[i] = QDP_F_create_V();
    QDP_F_V_eq_zero(x[i], sub);
    residarg[i]->final_iter = 0;
    residarg[i]->final_restart = 0;
  }
  b = QDP_F_create_V();
  QDP_FD_V_eq_V(b, source, sub);

  double flops=0, secs=0;
  int its=0;
  double preres = 1e-4;

  if(nm>1) {
    QOP_resid_arg_t **ra = residarg;
    QLA_F_Real *mp = m;
    QDP_F_ColorVector **xp = x;
    switch(QOP_Nc) {
#ifdef HAVE_NC1
    case 1: QOP_F1_asqtad_invert_multi_qdp(info, (QOP_F1_FermionLinksAsqtad*)ffla, invarg, &ra, &mp, &nm, (QDP_F1_ColorVector***)&xp, (QDP_F1_ColorVector**)&b, 1);
      break;
#endif
#ifdef HAVE_NC2
    case 2: QOP_F2_asqtad_invert_multi_qdp(info, (QOP_F2_FermionLinksAsqtad*)ffla, invarg, &ra, &mp, &nm, (QDP_F2_ColorVector***)&xp, (QDP_F2_ColorVector**)&b, 1);
      break;
#endif
#ifdef HAVE_NC3
    case 3: QOP_F3_asqtad_invert_multi_qdp(info, (QOP_F3_FermionLinksAsqtad*)ffla, invarg, &ra, &mp, &nm, (QDP_F3_ColorVector***)&xp, (QDP_F3_ColorVector**)&b, 1);
      break;
#endif
    default: QOP_F_asqtad_invert_multi_qdp(info, ffla, invarg, &ra, &mp, &nm, &xp, &b, 1);
    }
    for(int i=0; i<nm; i++) {
      QDP_DF_V_eq_V(prop[i], x[i], sub);
    }
    its += residarg[0]->final_iter;
    secs += info->final_sec;
    flops += info->final_flop;
  }

  QLA_Real norm2in;
  QDP_r_eq_norm2_V(&norm2in, source, sub);
  QDP_ColorVector *Dprop, *r;
  r = QDP_create_V();
  Dprop = QDP_create_V();

  for(int i=0; i<nm; i++) {
    QLA_Real rsq, rsqmin;
    int iters=0, restarts=0;
    QOP_resid_arg_t res_arg = *residarg[i];
    QLA_Real rsqstop = res_arg.rsqmin*norm2in;

    while(1) {
      get_resid(r, prop[i], source, fla, masses[i], invarg->evenodd, Dprop);
      QDP_r_eq_norm2_V(&rsq, r, sub);
      //printf0("its: %i  resid[%i]/in = %g\n", its, i, sqrt(rsq/norm2in));
      if(rsq<rsqstop) break;
      if(restarts>invarg->max_restarts) break;
      if(iters>0) restarts++;

      rsqmin = preres*preres;
      if(rsqstop/rsq>rsqmin) rsqmin = rsqstop/rsq;
      rsqmin *= 0.99;
      res_arg.rsqmin = rsqmin;

      if(1) {
	QDP_FD_V_eq_V(b, r, sub);
	QDP_F_V_eq_zero(x[i], sub);
	switch(QOP_Nc) {
#ifdef HAVE_NC1
	case 1: QOP_F1_asqtad_invert_qdp(info, (QOP_F1_FermionLinksAsqtad*)ffla, invarg, &res_arg, m[i], (QDP_F1_ColorVector*)x[i], (QDP_F1_ColorVector*)b);
	  break;
#endif
#ifdef HAVE_NC2
	case 2: QOP_F2_asqtad_invert_qdp(info, (QOP_F2_FermionLinksAsqtad*)ffla, invarg, &res_arg, m[i], (QDP_F2_ColorVector*)x[i], (QDP_F2_ColorVector*)b);
	  break;
#endif
#ifdef HAVE_NC3
	case 3: QOP_F3_asqtad_invert_qdp(info, (QOP_F3_FermionLinksAsqtad*)ffla, invarg, &res_arg, m[i], (QDP_F3_ColorVector*)x[i], (QDP_F3_ColorVector*)b);
	  break;
#endif
	default: QOP_F_asqtad_invert_qdp(info, ffla, invarg, &res_arg, m[i], x[i], b);
	}
	QDP_DF_V_eq_V(Dprop, x[i], sub);
      } else {
	QDP_V_eq_zero(Dprop, sub);
	QOP_asqtad_invert_qdp(info, fla, invarg, &res_arg, masses[i], Dprop, r);
      }
      residarg[i]->final_restart++;
      residarg[i]->final_iter += res_arg.final_iter;
      its += res_arg.final_iter;
      iters += res_arg.final_iter;
      secs += info->final_sec;
      flops += info->final_flop;
      QDP_V_peq_V(prop[i], Dprop, sub);
    }
    //printf0("iters = %4i  secs = %8f  mflops = %.0f resid = %g\n", its,
    //secs, 1e-6*flops/secs, sqrt(rsq/norm2in));
  }
  info->final_sec = secs;
  info->final_flop = flops;
  QDP_destroy_V(r);
  QDP_destroy_V(Dprop);
  QDP_F_destroy_V(b);
  for(int i=0; i<nm; i++) {
    QDP_F_destroy_V(x[i]);
  }
  QOP_F_asqtad_destroy_L(ffla);
#undef NC
}
Example #7
0
int
Kalkreuter_qdp(QDP_ColorVector **eigVec, double *eigVal, Real Tolerance, 
	       Real RelTol, int Nvecs, int MaxIter, int Restart, int Kiters,
	       QDP_Subset subset)
{
  QLA_Real max_error = 1.0e+10;
  QLA_Real min_grad;
  QLA_Real *grad, *err;
  Matrix Array, V;
  QDP_ColorVector *vec;
  int total_iters=0;
  int i, j;
  int iter = 0;

#ifdef DEBUG
  if(QDP_this_node==0) printf("begin Kalkreuter_qdp\n");
#endif

  prepare_Matrix();

  Array = AllocateMatrix(Nvecs);  /* Allocate the array */
  V = AllocateMatrix(Nvecs);      /* Allocate the Eigenvector matrix */

  vec = QDP_create_V();
  grad = malloc(Nvecs*sizeof(QLA_Real));
  err = malloc(Nvecs*sizeof(QLA_Real));

  /* Initiallize all the eigenvectors to a random vector */
  for(j=0; j<Nvecs; j++) {
    grad[j] = 1.0e+10;
    QDP_V_eq_gaussian_S(eigVec[j], rand_state, QDP_all);
    eigVal[j] = 1.0e+16;
    //project_out_qdp(eigVec[j], eigVec, j, subset);
    //normalize_qdp(eigVec[j], subset);
  }

#if 0
  constructArray_qdp(eigVec, &Array, grad, subset);
  Jacobi(&Array, &V, JACOBI_TOL);
  sort_eigenvectors(&Array, &V);
  RotateBasis_qdp(eigVec, &V, subset);
#endif

  while( (max_error>Tolerance) && (iter<Kiters) ) {
    iter++;

    min_grad = grad[0]/eigVal[0];
    for(i=1; i<Nvecs; i++) {
      if(grad[i]<min_grad*eigVal[i]) min_grad = grad[i]/eigVal[i];
    }

    RelTol = 0.3;
    for(j=0; j<Nvecs; j++) {
      if(grad[j]>Tolerance*eigVal[j]) {
	QLA_Real rt;
	rt = RelTol*min_grad*eigVal[j]/grad[j];
	//rt = 1e-5/grad[j];
	if(rt>RelTol) rt = RelTol;
	//rt = RelTol;
	QDP_V_eq_V(vec, eigVec[j], QDP_all);
	total_iters += Rayleigh_min_qdp(vec, eigVec, Tolerance, rt,
					j, MaxIter, Restart, subset);
	QDP_V_eq_V(eigVec[j], vec, QDP_all);
      }
    }
    constructArray_qdp(eigVec, &Array, grad, subset);

    for(i=0; i<Nvecs; i++)
      node0_printf("quot(%i) = %g +/- %8e |grad|=%g\n",
		   i, Array.M[i][i].real, err[i], grad[i]);

#ifdef DEBUG
    node0_printf("Eigenvalues before diagonalization\n");
    for(i=0;i<Nvecs;i++)
      node0_printf("quot(%i) = %g |grad|=%g\n",i,Array.M[i][i].real,grad[i]);
#endif

    Jacobi(&Array, &V, JACOBI_TOL);
    sort_eigenvectors(&Array, &V);
    RotateBasis_qdp(eigVec, &V, subset);
    constructArray_qdp(eigVec, &Array, grad, subset);

    /* find the maximum error */
    max_error = 0.0;
    for(i=0; i<Nvecs; i++) {
      err[i] = eigVal[i];
      eigVal[i] = Array.M[i][i].real;
      err[i] = fabs(err[i] - eigVal[i])/(1.0 - RelTol*RelTol);
      if(eigVal[i]>1e-10) {
#ifndef STRICT_CONVERGENCE
	if(err[i]/eigVal[i]>max_error) max_error = err[i]/eigVal[i];
#else
	if(grad[i]/eigVal[i]>max_error) max_error = grad[i]/eigVal[i];
#endif
      }
    }

    node0_printf("\nEigenvalues after diagonalization at iteration %i\n",iter);
    for(i=0; i<Nvecs; i++)
      node0_printf("quot(%i) = %g +/- %8e |grad|=%g\n",
		   i, eigVal[i], err[i], grad[i]);
  }

  node0_printf("BEGIN RESULTS\n");
  for(i=0;i<Nvecs;i++){
    node0_printf("Eigenvalue(%i) = %g +/- %8e\n",
		 i,eigVal[i],err[i]);
  }

#if 0
  node0_printf("BEGIN EIGENVALUES\n");
  for(i=0; i<Nvecs; i++) {
    double ev, er;
    ev = sqrt(eigVal[i]);
    er = err[i]/(2*ev);
    node0_printf("%.8g\t%g\n", ev, er);
  }
  node0_printf("END EIGENVALUES\n");

  {
    QDP_Writer *qw;
    QDP_String *md;
    char evstring[100], *fn="eigenvecs.out";
    md = QDP_string_create();

    sprintf(evstring, "%i", Nvecs);
    QDP_string_set(md, evstring);
    qw = QDP_open_write(md, fn, QDP_SINGLEFILE);

    for(i=0; i<Nvecs; i++) {
      double ev, er;
      ev = sqrt(eigVal[i]);
      er = err[i]/(2*ev);
      sprintf(evstring, "%.8g\t%g", ev, er);
      QDP_string_set(md, evstring);
      QDP_write_V(qw, md, eigVec[i]);
    }
    QDP_close_write(qw);
    QDP_string_destroy(md);
  }

#endif

  /** Deallocate the arrays **/
  deAllocate(&V) ;
  deAllocate(&Array) ;
  free(err);
  free(grad);
  QDP_destroy_V(vec);
  cleanup_Matrix();
#ifdef DEBUG
  if(QDP_this_node==0) printf("end Kalkreuter_qdp\n");
#endif
  return total_iters;
}
Example #8
0
int
Rayleigh_min_qdp(QDP_ColorVector *vec, QDP_ColorVector **eigVec,
		 Real Tolerance,  Real RelTol, int Nvecs, int MaxIter,
		 int Restart, QDP_Subset subset)
{
  QLA_Complex cc;
  QLA_Real beta, cos_theta, sin_theta;
  QLA_Real quot, P_norm, theta, real_vecMp, pMp;
  QLA_Real g_norm, old_g_norm, start_g_norm;
  QDP_ColorVector *Mvec, *grad, *P, *MP;
  int iter;

#ifdef DEBUG
  if(QDP_this_node==0) printf("begin Rayleigh_min_qdp\n");
#endif

  Mvec = QDP_create_V();
  grad = QDP_create_V();
  //oldgrad = QDP_create_V();
  P = QDP_create_V();
  MP = QDP_create_V();

  project_out_qdp(vec, eigVec, Nvecs, subset);
  normalize_qdp(vec, subset);
  Matrix_Vec_mult_qdp(vec, Mvec, subset);
  project_out_qdp(Mvec, eigVec, Nvecs, subset);

  /* Compute the quotient quot=vev*M*vec */
  QDP_r_eq_re_V_dot_V(&quot, vec, Mvec, subset);
  /* quot is real since M is hermitian. quot = vec*M*vec */
#ifdef DEBUG
  if(QDP_this_node==0) printf("Rayleigh_min: Start -- quot=%g\n", quot);
#endif
  /* Compute the grad=M*vec - quot*vec */
  QDP_V_eq_V(grad, Mvec, QDP_all);
  QDP_V_meq_r_times_V(grad, &quot, vec, subset);
  /* set P (the search direction) equal to grad */
  QDP_V_eq_V(P, grad, QDP_all);
  /* compute the norms of P and grad */
  QDP_r_eq_norm2_V(&P_norm, P, subset);
  P_norm = sqrt(P_norm);
  QDP_r_eq_norm2_V(&g_norm, grad, subset);
  g_norm = sqrt(g_norm);
  start_g_norm = g_norm;
  //QDP_V_eq_V(oldgrad, grad, subset);
#ifdef DEBUG
  if(QDP_this_node==0) printf("Rayleigh_min: Start -- g_norm=%g\n", g_norm);
#endif  

  iter = 0;
  while( (g_norm>Tolerance*quot) &&
	 ( ((iter<MaxIter)&&(g_norm/start_g_norm>RelTol)) || (iter<MINITER) )
	 ) {
    iter++;
    Matrix_Vec_mult_qdp(P, MP, subset);
    QDP_r_eq_re_V_dot_V(&real_vecMp, vec, MP, subset);
    QDP_r_eq_re_V_dot_V(&pMp, P, MP, subset);
    theta = 0.5*atan(2.0*real_vecMp/(quot*P_norm - pMp/P_norm));
    sin_theta = sin(theta);
    cos_theta = cos(theta);
    if(sin_theta*cos_theta*real_vecMp>0) {
      theta = theta - 0.5*M_PI;  /* chose the minimum not the maximum */
      sin_theta = sin(theta);  /* the sin,cos calls can be avoided */
      cos_theta = cos(theta);
    }
    sin_theta = sin_theta/P_norm;
    /* vec = cos(theta)*vec +sin(theta)*P/p_norm */
    //dax_p_by_qdp(cos_theta, vec, sin_theta, P, subset);
    QDP_V_eq_r_times_V(vec, &cos_theta, vec, subset);
    QDP_V_peq_r_times_V(vec, &sin_theta, P, subset);
    /* Mvec = cos(theta)*Mvec +sin(theta)*MP/p_norm */
    //dax_p_by_qdp(cos_theta, Mvec, sin_theta, MP, subset);
    QDP_V_eq_r_times_V(Mvec, &cos_theta, Mvec, subset);
    QDP_V_peq_r_times_V(Mvec, &sin_theta, MP, subset);
    /* renormalize vec ... */
    if( iter%Restart == 0 ) {
#ifdef DEBUG
      {
	QLA_Real vec_norm;
	if(QDP_this_node==0) printf("Renormalizing...");
	QDP_r_eq_norm2_V(&vec_norm, vec, subset);
	if(QDP_this_node==0) printf("  norm: %g\n", sqrt(vec_norm));
      }
#endif
      /* Project vec on the orthogonal complement of eigVec */
      project_out_qdp(vec, eigVec, Nvecs, subset);
      normalize_qdp(vec, subset);
      Matrix_Vec_mult_qdp(vec, Mvec, subset);
      /* Recompute the quotient */
      QDP_r_eq_re_V_dot_V(&quot, vec, Mvec, subset);
      /* Recompute the grad */
      QDP_V_eq_V(grad, Mvec, QDP_all);
      QDP_V_meq_r_times_V(grad, &quot, vec, subset);
      //QDP_r_eq_norm2_V(&g_norm, grad, subset);
      //printf("g_norm = %g\n", g_norm);
      /* Project P on the orthogonal complement of eigVec */
      //QDP_r_eq_norm2_V(&P_norm, P, subset);
      //printf("P_norm = %g\n", P_norm);
      project_out_qdp(P, eigVec, Nvecs, subset);
      //QDP_r_eq_norm2_V(&P_norm, P, subset);
      //printf("P_norm = %g\n", P_norm);
      /* make P orthogonal to vec */
      QDP_c_eq_V_dot_V(&cc, vec, P, subset);
      //printf("cc = %g\n", QLA_real(cc));
      QDP_V_meq_c_times_V(P, &cc, vec, subset);
      //QDP_r_eq_norm2_V(&P_norm, P, subset);
      //printf("P_norm = %g\n", P_norm);
      /* make P orthogonal to grad */
      QDP_c_eq_V_dot_V(&cc, grad, P, subset);
      //printf("cc = %g\n", QLA_real(cc));
      QDP_V_meq_c_times_V(P, &cc, grad, subset);
      QDP_r_eq_norm2_V(&P_norm, P, subset);
      P_norm = sqrt(P_norm);
    }
    QDP_r_eq_re_V_dot_V(&quot, vec, Mvec, subset);
#ifdef DEBUG
    node0_printf("Rayleigh_min: %i, quot=%8g g=%8g b=%6g P:%6g\n",
		 iter, quot, g_norm, beta, P_norm);
#endif
    old_g_norm = g_norm;

    QDP_V_eq_V(grad, Mvec, QDP_all);
    QDP_V_meq_r_times_V(grad, &quot, vec, subset);

    //QDP_V_meq_V(oldgrad, grad, subset);
    //QDP_r_eq_re_V_dot_V(&g_norm, oldgrad, grad, subset);
    //QDP_V_eq_V(oldgrad, grad, subset);

    QDP_r_eq_norm2_V(&g_norm, grad, subset);
    g_norm = sqrt(g_norm);

    beta = cos_theta*g_norm*g_norm/(old_g_norm*old_g_norm);
    if( beta>2.0 ) beta = 2.0;  /* Cut off beta */

    QDP_c_eq_V_dot_V(&cc, vec, P, subset);
    QLA_real(cc) *= beta;
    QLA_imag(cc) *= beta;
    QDP_V_eq_r_times_V_plus_V(P, &beta, P, grad, subset);
    QDP_V_meq_c_times_V(P, &cc, vec, subset);
    QDP_r_eq_norm2_V(&P_norm, P, subset);
    P_norm = sqrt(P_norm);
  }
  project_out_qdp(vec, eigVec, Nvecs, subset);
  normalize_qdp(vec, subset);
  QDP_destroy_V(MP);
  QDP_destroy_V(P);
  //QDP_destroy_V(oldgrad);
  QDP_destroy_V(grad);
  QDP_destroy_V(Mvec);

  iter++;
#ifdef DEBUG
  if(QDP_this_node==0) printf("end Rayleigh_min_qdp\n");
#endif
  return iter;
}
/* Smearing level 0 */
static void 
QOP_hisq_force_multi_smearing0_fnmat(QOP_info_t *info,  
				     REAL *residues,
				     QDP_ColorVector *x[], 
				     int nterms, 
				     QDP_ColorMatrix *force_accum[4],
				     QDP_ColorMatrix *force_accum_naik[4])
{
  int term;
  int i,k;
  int dir;
  REAL coeff;

  QDP_ColorMatrix *tmat;
  QDP_ColorMatrix *oprod_along_path[MAX_PATH_LENGTH+1];
  QDP_ColorMatrix *mat_tmp0;
  QDP_ColorVector *tsrc[2], *vec_tmp[2];
  size_t nflops = 0;

  if( nterms==0 )return;

  mat_tmp0   = QDP_create_M();
  tmat       = QDP_create_M();
  tsrc[0] = QDP_create_V();
  tsrc[1] = QDP_create_V();
  vec_tmp[0] = QDP_create_V();
  vec_tmp[1] = QDP_create_V();

  for(i=0;i<=MAX_PATH_LENGTH;i++){
    oprod_along_path[i] = QDP_create_M();
  }

  // clear force accumulators
  
  for(dir=XUP;dir<=TUP;dir++)
    QDP_M_eq_zero(force_accum[dir], QDP_all);

  for(dir=XUP;dir<=TUP;dir++){ //AB loop on directions, path table is not needed
    k=0; // which vec_tmp we are using (0 or 1)
    QDP_V_eq_V(tsrc[k], x[0], QDP_all);
    QDP_V_eq_sV(vec_tmp[k], tsrc[k], 
		fnshift(OPP_DIR(dir)), fndir(OPP_DIR(dir)), QDP_all);
    QDP_M_eq_zero(oprod_along_path[0], QDP_all);

    for(term=0;term<nterms;term++){
      if(term<nterms-1) {
	QDP_V_eq_V(tsrc[1-k], x[term+1], QDP_all);
	QDP_V_eq_sV(vec_tmp[1-k], tsrc[1-k], 
		    fnshift(OPP_DIR(dir)), fndir(OPP_DIR(dir)), QDP_all);
      }
      //QDP_M_eq_V_times_Va(tmat, x[term], vec_tmp[k], QDP_all);
      QDP_M_eq_V_times_Va(tmat, tsrc[k], vec_tmp[k], QDP_all);
      nflops += 54;
      QDP_discard_V(vec_tmp[k]);
      QDP_M_peq_r_times_M(oprod_along_path[0], &residues[term], tmat, 
			  QDP_all);
      nflops += 36;
      
      k=1-k; // swap 0 and 1
    } // end loop over terms in rational function expansion 

    link_gather_connection_qdp(oprod_along_path[1], oprod_along_path[0], tmat,
			       dir );
    coeff = 1.;
    QDP_M_peq_r_times_M(force_accum[dir],&coeff,oprod_along_path[1],QDP_all);
    nflops += 36;

  } // end of loop on directions //


  // *** Naik part *** /
  
  // clear force accumulators
  for(dir=XUP;dir<=TUP;dir++)
    QDP_M_eq_zero(force_accum_naik[dir], QDP_all);


  for(dir=XUP;dir<=TUP;dir++){ //AB loop on directions, path table is not needed
    k=0; // which vec_tmp we are using (0 or 1)
    QDP_V_eq_V(tsrc[k], x[0], QDP_all);
    QDP_V_eq_sV(vec_tmp[k], tsrc[k], fnshift(OPP_3_DIR( DIR3(dir) )), 
		fndir(OPP_3_DIR( DIR3(dir) )), QDP_all);

    QDP_M_eq_zero(oprod_along_path[0], QDP_all);

    for(term=0;term<nterms;term++){
      if(term<nterms-1) {
	QDP_V_eq_V(tsrc[1-k], x[term+1], QDP_all);
	QDP_V_eq_sV(vec_tmp[1-k], tsrc[1-k], fnshift(OPP_3_DIR( DIR3(dir) )), 
		    fndir(OPP_3_DIR( DIR3(dir) )), QDP_all);
      }
      //QDP_M_eq_V_times_Va(tmat, x[term], vec_tmp[k], QDP_all);
      QDP_M_eq_V_times_Va(tmat, tsrc[k], vec_tmp[k], QDP_all);
      nflops += 54;
      QDP_discard_V(vec_tmp[k]);
      QDP_M_peq_r_times_M(oprod_along_path[0], &residues[term], tmat, QDP_all);
      nflops += 36;

      k=1-k; // swap 0 and 1
    } // end loop over terms in rational function expansion 

    link_gather_connection_qdp(oprod_along_path[1], oprod_along_path[0], tmat, 
			       DIR3(dir) );
    coeff = 1; // fermion_eps is outside this routine in "wrapper" routine
    QDP_M_peq_r_times_M(force_accum_naik[dir],&coeff,
			oprod_along_path[1],QDP_all);
    nflops += 36;
  } // end of loop on directions 

  QDP_destroy_V( tsrc[0] );
  QDP_destroy_V( tsrc[1] );
  QDP_destroy_V( vec_tmp[0] );
  QDP_destroy_V( vec_tmp[1] );
  QDP_destroy_M( mat_tmp0 );
  QDP_destroy_M( tmat );
  for(i=0;i<=MAX_PATH_LENGTH;i++){
    QDP_destroy_M( oprod_along_path[i] );
  }

  info->final_flop = ((double)nflops)*QDP_sites_on_node;
  return;
} //hisq_force_multi_smearing0_fnmat
/* Smearing level i*/
static void 
QOP_hisq_force_multi_smearing_fnmat(QOP_info_t *info, 
				    QDP_ColorMatrix * gf[4],
				    REAL *residues,
				    QDP_ColorVector *x[], 
				    int nterms, 
				    QDP_ColorMatrix *force_accum[4],
				    QDP_ColorMatrix *force_accum_old[4],
				    QDP_ColorMatrix *force_accum_naik_old[4],
				    int internal_num_q_paths,
				    Q_path *internal_q_paths_sorted,
				    int *internal_netbackdir_table)
{
  int i,j,k,lastdir=-99,ipath,ilink;
  int length,dir,odir;
  REAL coeff;

  QDP_ColorMatrix *tmat;
  QDP_ColorMatrix *oprod_along_path[MAX_PATH_LENGTH+1];
  QDP_ColorMatrix *mats_along_path[MAX_PATH_LENGTH+1];
  QDP_ColorMatrix *mat_tmp0,*mat_tmp1, *stmp[8];;
  QDP_ColorVector *vec_tmp[2];

  int netbackdir;
  size_t nflops = 0;

// table of net path displacements (backwards from usual convention)

  Q_path *this_path;	// pointer to current path

  /* Allocate fields */
  for(i=0;i<=MAX_PATH_LENGTH;i++){
    oprod_along_path[i] = QDP_create_M();
  }
  for(i=1;i<=MAX_PATH_LENGTH;i++){ 
    // 0 element is never used (it's unit matrix)
    mats_along_path[i] = QDP_create_M();
  }

  mat_tmp0   = QDP_create_M();
  mat_tmp1   = QDP_create_M();
  for(i=0; i<8; i++) stmp[i] = QDP_create_M();
  tmat       = QDP_create_M();
  vec_tmp[0] = QDP_create_V();
  vec_tmp[1] = QDP_create_V();
 
  // clear force accumulators
  for(dir=XUP;dir<=TUP;dir++)
    QDP_M_eq_zero(force_accum[dir], QDP_all);

  // loop over paths, and loop over links in path 
  for( ipath=0; ipath<internal_num_q_paths; ipath++ ){
    this_path = &(internal_q_paths_sorted[ipath]); 
    if(this_path->forwback== -1)continue;	// skip backwards dslash 
    length = this_path->length;
    netbackdir = internal_netbackdir_table[ipath];

    // move f(i-1) force from current site in positive direction,
    //  this corresponds to outer product |X><Y| calculated at the endpoint of the path 
    if( netbackdir<8) { // Not a Naik path
      link_gather_connection_qdp(oprod_along_path[0] , 
				 force_accum_old[OPP_DIR(netbackdir)],
				 tmat, netbackdir );
    }
    else { // Naik path
      if( NULL==force_accum_naik_old ) {
        QOP_printf0( "hisq_force_multi_smearing_fnmat:  mismatch:\n" );
        QOP_printf0( "force_accum_naik_old is NULL, but path table contains Naik paths(!)\n" );
        exit(0);
      }
      // CONVERSION FROM 3-LINK DIRECTION TO 1-LINK DIRECTION
      link_gather_connection_qdp(oprod_along_path[0] , 
				 force_accum_naik_old[OPP_DIR(netbackdir-8)],
				 tmat, netbackdir );
    }

    // figure out how much of the outer products along the path must be
    // recomputed. j is last one needing recomputation. k is first one.
    j=length-1; // default is recompute all
    if( GOES_BACKWARDS(this_path->dir[0]) ) k=1; else k=0;

    for(ilink=j;ilink>=k;ilink--){
      link_transport_connection_qdp( oprod_along_path[length-ilink], 
				     oprod_along_path[length-ilink-1], gf,
				     mat_tmp0, stmp, this_path->dir[ilink]  );
      nflops += 198;
    }

    // maintain an array of transports "to this point" along the path.
    //	Don't recompute beginning parts of path if same as last path 
    ilink=0; // first link where new transport is needed
    // Sometimes we don't need the matrix for the last link
    if( GOES_FORWARDS(this_path->dir[length-1]) ) k=length-1; else k=length;

    for( ; ilink<k; ilink++ ){
      if( ilink==0 ){
        dir = this_path->dir[0];
	if( GOES_FORWARDS(dir) ){
	  QDP_M_eq_sM(tmat, gf[dir], QDP_neighbor[dir],
		      QDP_backward, QDP_all);
	  QDP_M_eq_Ma(mats_along_path[1], tmat, QDP_all);
	  QDP_discard_M(tmat);
	}
	else{
	  QDP_M_eq_M(mats_along_path[1], gf[OPP_DIR(dir)], QDP_all);
	}
      }
      else { // ilink != 0
        dir = OPP_DIR(this_path->dir[ilink]);

	link_transport_connection_qdp( mats_along_path[ilink+1], 
				       mats_along_path[ilink], gf,
				       mat_tmp0, stmp, dir );
	nflops += 198;
      }
    } // end loop over links

    // A path has (length+1) points, counting the ends.  At first
    //	 point, no "down" direction links have their momenta "at this
    //	 point". At last, no "up" ... 
    if( GOES_FORWARDS(this_path->dir[length-1]) ) k=length-1; else k=length;
    for( ilink=0; ilink<=k; ilink++ ){
      if(ilink<length)dir = this_path->dir[ilink];
      else dir=NODIR;
      coeff = this_path->coeff;
      if( (ilink%2)==1 )coeff = -coeff;
      // add in contribution to the force 
      if( ilink<length && GOES_FORWARDS(dir) ){
	link_gather_connection_qdp(mat_tmp1, 
		       oprod_along_path[length-ilink-1], tmat, dir );
        if(ilink==0) 
	  {
	    QDP_M_eq_M(mat_tmp0,mat_tmp1,QDP_all);
	  }
        else
	  {
	    QDP_M_eq_M_times_Ma(mat_tmp0, mats_along_path[ilink], 
				mat_tmp1, QDP_all);
	    nflops += 198;
	    QDP_M_eq_Ma(mat_tmp1,mat_tmp0,QDP_all);
	  }
	QDP_M_peq_r_times_M(force_accum[dir],&coeff,mat_tmp1,QDP_all);
	nflops += 36;
      }
      if( ilink>0 && GOES_BACKWARDS(lastdir) ){
	odir = OPP_DIR(lastdir);
        if( ilink==1 ){
	  QDP_M_eq_M(mat_tmp0,oprod_along_path[length-ilink],QDP_all);
	  QDP_M_eq_Ma(mat_tmp1,mat_tmp0,QDP_all);
	}
        else{
	  link_gather_connection_qdp(mat_tmp1, mats_along_path[ilink-1], 
				     tmat, odir );
	  QDP_M_eq_M_times_Ma(mat_tmp0, oprod_along_path[length-ilink], 
			      mat_tmp1, QDP_all);
	  nflops += 198;
	  QDP_M_eq_Ma(mat_tmp1, mat_tmp0, QDP_all);
        }
	QDP_M_peq_r_times_M(force_accum[odir],&coeff,mat_tmp1,QDP_all);
	nflops += 36;
      }
      lastdir = dir;
    } // end loop over links in path //
  } // end loop over paths //

  QDP_destroy_V( vec_tmp[0] );
  QDP_destroy_V( vec_tmp[1] );
  QDP_destroy_M( mat_tmp0 );
  QDP_destroy_M( mat_tmp1 );
  QDP_destroy_M( tmat );
  for(i=0; i<8; i++) QDP_destroy_M(stmp[i]);
  for(i=0;i<=MAX_PATH_LENGTH;i++){
    QDP_destroy_M( oprod_along_path[i] );
  }
  for(i=1;i<=MAX_PATH_LENGTH;i++){
    QDP_destroy_M( mats_along_path[i] );
  }

  info->final_flop = ((double)nflops)*QDP_sites_on_node;

  return;
}//hisq_force_multi_smearing_fnmat