/* Returns the projected matrix A and the error of each eigenvector */ void constructArray_qdp(QDP_ColorVector **eigVec, Matrix *A, QLA_Real *err, QDP_Subset subset) { QLA_Complex cc; QLA_Real rr; QDP_ColorVector *res, *grad; int i, j, Nvecs; Nvecs = A->N; res = QDP_create_V(); grad = QDP_create_V(); for(i=0; i<Nvecs; i++) { Matrix_Vec_mult_qdp(eigVec[i], res, subset); QDP_r_eq_re_V_dot_V(&rr, res, eigVec[i], subset); A->M[i][i].real = rr; A->M[i][i].imag = 0.0; QDP_V_eq_V(grad, res, subset); QDP_V_meq_r_times_V(grad, &rr, eigVec[i], subset); QDP_r_eq_norm2_V(&rr, grad, subset); err[i] = sqrt(rr); for(j=i+1; j<Nvecs; j++) { QDP_c_eq_V_dot_V(&cc, res, eigVec[j], subset); A->M[i][j].real = QLA_real(cc); A->M[i][j].imag = QLA_imag(cc); A->M[j][i].real = QLA_real(cc); A->M[j][i].imag = -QLA_imag(cc); } } QDP_destroy_V(grad); QDP_destroy_V(res); }
/* deallocates the tags and the temporaries the Matrix_Vec_mult needs */ void cleanup_Matrix() { int i; QDP_destroy_V(temp0); for(i=0; i<16; i++) { QDP_destroy_V(temp1[i]); QDP_destroy_V(temp2[i]); } }
void RotateBasis_qdp(QDP_ColorVector **eigVec, Matrix *V, QDP_Subset subset) { QLA_Complex z; QDP_ColorVector **Tmp; int i, j, N; N = V->N; /* Allocate the temporary vectors needed */ Tmp = malloc(N*sizeof(QDP_ColorVector *)); for(i=0; i<N; i++) Tmp[i] = QDP_create_V(); for(i=0; i<N; i++) { QDP_V_eq_zero(Tmp[i], subset); for(j=0; j<N; j++) { QLA_real(z) = V->M[j][i].real; QLA_imag(z) = V->M[j][i].imag; QDP_V_peq_c_times_V(Tmp[i], &z, eigVec[j], subset); } } /* Copy rotated basis to the eigVec and free temporaries */ for(i=0; i<N; i++) { QDP_V_eq_V(eigVec[i], Tmp[i], subset); normalize_qdp(eigVec[i], subset); QDP_destroy_V(Tmp[i]); } free(Tmp); }
int Kalkreuter(su3_vector **eigVec, double *eigVal, Real Tolerance, Real RelTol, int Nvecs, int MaxIter, int Restart, int Kiters, int parity, ferm_links_t *fn) { //QLA_Real *ev; QDP_ColorVector **vec; QDP_Subset subset; int i, its; su3_matrix *t_fatlink; su3_matrix *t_longlink; #ifdef DEBUG if(QDP_this_node==0) printf("begin Kalkreuter\n"); #endif if(parity==EVEN) subset = QDP_even; else if(parity==ODD) subset = QDP_odd; else subset = QDP_all; t_longlink = fn->lng; t_fatlink = fn->fat; set4_M_from_field(FATLINKS, t_fatlink, EVENANDODD); set4_M_from_field(LONGLINKS, t_longlink, EVENANDODD); //ev = malloc(Nvecs*sizeof(QLA_Real)); vec = malloc(Nvecs*sizeof(QDP_ColorVector *)); for(i=0; i<Nvecs; i++) { vec[i] = QDP_create_V(); //ev[i] = eigVal[i]; set_V_from_field(vec[i], eigVec[i],EVENANDODD); } its = Kalkreuter_qdp(vec, eigVal, Tolerance, RelTol, Nvecs, MaxIter, Restart, Kiters, subset); for(i=0; i<Nvecs; i++) { //eigVal[i] = ev[i]; set_field_from_V(eigVec[i], vec[i],EVENANDODD); QDP_destroy_V(vec[i]); } free(vec); //free(ev); #ifdef DEBUG if(QDP_this_node==0) printf("end Kalkreuter\n"); #endif return its; }
void QOP_asqtad_force_multi_asvec_qdp(QOP_info_t *info, QDP_ColorMatrix *links[], QDP_ColorMatrix *force[], QOP_asqtad_coeffs_t *coef, REAL eps[], QDP_ColorVector *xin[], int nsrc) { #define NC QDP_get_nc(xin[0]) REAL coeff[nsrc]; REAL OneLink[nsrc], Lepage[nsrc], Naik[nsrc], FiveSt[nsrc], ThreeSt[nsrc], SevenSt[nsrc]; REAL mNaik[nsrc], mLepage[nsrc], mFiveSt[nsrc], mThreeSt[nsrc], mSevenSt[nsrc]; QDP_ColorVector *P3[8][nsrc]; QDP_ColorVector *P5[8][nsrc]; QDP_ColorVector *P5tmp[8][8][nsrc]; QDP_ColorVector *P5s[4][nsrc]; QDP_ColorVector *P5tmps[4][8][nsrc]; //QDP_ColorVector *xin[nsrc]; QDP_ColorVector *xintmp[8][nsrc]; QDP_ColorVector *Pmu[nsrc]; QDP_ColorVector *Pmutmp[8][nsrc]; QDP_ColorVector *Pnumu[nsrc]; QDP_ColorVector *Pnumutmp[8][nsrc]; QDP_ColorVector *Prhonumu[nsrc]; QDP_ColorVector *Prhonumutmp[8][nsrc]; QDP_ColorVector *P7[nsrc]; QDP_ColorVector *P7tmp[8][nsrc]; QDP_ColorVector *P7rho[nsrc]; QDP_ColorVector *ttv[nsrc]; int i, dir; int mu, nu, rho, sig; double nflop1 = 253935; double nflop2 = 433968; double nflop = nflop1 + (nflop2-nflop1)*(nsrc-1); double dtime; dtime = -QOP_time(); ASQTAD_FORCE_BEGIN; QOP_trace("test 1\n"); /* setup parallel transport */ QDP_ColorMatrix *tmpmat = QDP_create_M(); for(i=0; i<QOP_common.ndim; i++) { fbshift[i] = QDP_neighbor[i]; fbshiftdir[i] = QDP_forward; fblink[i] = links[i]; fbshift[OPP_DIR(i)] = QDP_neighbor[i]; fbshiftdir[OPP_DIR(i)] = QDP_backward; fblink[OPP_DIR(i)] = QDP_create_M(); QDP_M_eq_sM(tmpmat, fblink[i], QDP_neighbor[i], QDP_backward, QDP_all); QDP_M_eq_Ma(fblink[OPP_DIR(i)], tmpmat, QDP_all); } tv = ttv; for(i=0; i<nsrc; i++) { tv[i] = QDP_create_V(); } QOP_trace("test 2\n"); /* Allocate temporary vectors */ for(i=0; i<nsrc; i++) { Pmu[i] = QDP_create_V(); Pnumu[i] = QDP_create_V(); Prhonumu[i] = QDP_create_V(); P7[i] = QDP_create_V(); P7rho[i] = QDP_create_V(); for(dir=0; dir<8; dir++) { xintmp[dir][i] = QDP_create_V(); Pmutmp[dir][i] = QDP_create_V(); Pnumutmp[dir][i] = QDP_create_V(); Prhonumutmp[dir][i] = QDP_create_V(); P7tmp[dir][i] = QDP_create_V(); } #if 1 for(mu=0; mu<4; mu++) { P5s[mu][i] = QDP_create_V(); for(dir=0; dir<8; dir++) { P5tmps[mu][dir][i] = QDP_create_V(); } } #else for(mu=0; mu<8; mu++) { P5[mu][i] = QDP_create_V(); for(dir=0; dir<8; dir++) { P5tmp[mu][dir][i] = QDP_create_V(); //printf("%p %p\n", P5tmp[mu][dir][i], &(P5tmp[mu][dir][i])); fflush(stdout); if(P5tmp[mu][dir][i]==NULL) { fprintf(stderr, "error: can't create V\n"); QDP_abort(); } } } #endif } //printf("%p\n", P5tmp[0][4][0]); fflush(stdout); for(mu=0; mu<8; mu++) { for(i=0; i<nsrc; i++) { P3[mu][i] = QDP_create_V(); //P5[mu][i] = QDP_create_V(); } } for(mu=0; mu<4; mu++) { tempmom_qdp[mu] = force[mu]; QDP_M_eqm_M(tempmom_qdp[mu], tempmom_qdp[mu], QDP_odd); } /* Path coefficients times fermion epsilon */ /* Load path coefficients from table */ for(i=0; i<nsrc; i++) { OneLink[i] = coef->one_link * eps[i]; Naik[i] = coef->naik * eps[i]; mNaik[i] = -Naik[i]; ThreeSt[i] = coef->three_staple * eps[i]; mThreeSt[i] = -ThreeSt[i]; FiveSt[i] = coef->five_staple * eps[i]; mFiveSt[i] = -FiveSt[i]; SevenSt[i] = coef->seven_staple * eps[i]; mSevenSt[i] = -SevenSt[i]; Lepage[i] = coef->lepage * eps[i]; mLepage[i] = -Lepage[i]; } #if 0 printf("nsrc = %i\n", nsrc); printf("coeffs = %g %g %g %g %g %g\n", OneLink[0], ThreeSt[0], FiveSt[0], SevenSt[0], Lepage[0], Naik[0]); #endif /* *************************************** */ QOP_trace("start force loop\n"); for(mu=0; mu<8; mu++) { //u_shift_hw_fermion(temp_x_qdp, Pmu, OPP_DIR(mu), temp_hw[OPP_DIR(mu)]); u_shift_color_vecs(xin, Pmu, OPP_DIR(mu), nsrc, xintmp[OPP_DIR(mu)]); for(sig=0; sig<8; sig++) if( (sig!=mu)&&(sig!=OPP_DIR(mu)) ) { //u_shift_hw_fermion(Pmu, P3[sig], sig, temp_hw[sig]); u_shift_color_vecs(Pmu, P3[sig], sig, nsrc, Pmutmp[sig]); if(GOES_FORWARDS(sig)) { /* Add the force F_sig[x+mu]: x--+ * * | | * * o o * * the 1 link in the path: - (numbering starts form 0) */ add_forces_to_mom(P3[sig], Pmu, sig, mThreeSt, nsrc); } } for(nu=0; nu<8; nu++) if( (nu!=mu)&&(nu!=OPP_DIR(mu)) ) { int nP5 = 0; //Pnumu = hw_qdp[OPP_DIR(nu)]; //u_shift_hw_fermion(Pmu, Pnumu, OPP_DIR(nu), temp_hw[OPP_DIR(nu)]); u_shift_color_vecs(Pmu, Pnumu, OPP_DIR(nu), nsrc, Pmutmp[OPP_DIR(nu)]); //QDP_V_veq_V(Pnumu, P3[OPP_DIR(nu)], QDP_all, nsrc); for(sig=0; sig<8; sig++) if( (sig!=mu)&&(sig!=OPP_DIR(mu)) && (sig!=nu)&&(sig!=OPP_DIR(nu)) ) { #if 1 for(i=0; i<nsrc; i++) { P5[sig][i] = P5s[nP5][i]; for(dir=0; dir<8; dir++) P5tmp[sig][dir][i] = P5tmps[nP5][dir][i]; } #endif nP5++; //u_shift_hw_fermion(Pnumu, P5[sig], sig, temp_hw[sig]); u_shift_color_vecs(Pnumu, P5[sig], sig, nsrc, Pnumutmp[sig]); if(GOES_FORWARDS(sig)) { /* Add the force F_sig[x+mu+nu]: x--+ * * | | * * o o * * the 2 link in the path: + (numbering starts form 0) */ add_forces_to_mom(P5[sig], Pnumu, sig, FiveSt, nsrc); } } QOP_trace("test 4\n"); for(rho=0; rho<8; rho++) if( (rho!=mu)&&(rho!=OPP_DIR(mu)) && (rho!=nu)&&(rho!=OPP_DIR(nu)) ) { //Prhonumu = hw_qdp[OPP_DIR(rho)]; //u_shift_hw_fermion(Pnumu, Prhonumu, OPP_DIR(rho), // temp_hw[OPP_DIR(rho)] ); u_shift_color_vecs(Pnumu, Prhonumu, OPP_DIR(rho), nsrc, Pnumutmp[OPP_DIR(rho)]); //QDP_V_veq_V(Prhonumu, P5[OPP_DIR(rho)], QDP_all, nsrc); for(sig=0; sig<8; sig++) if( (sig!=mu )&&(sig!=OPP_DIR(mu )) && (sig!=nu )&&(sig!=OPP_DIR(nu )) && (sig!=rho)&&(sig!=OPP_DIR(rho)) ) { /* Length 7 paths */ //P7 = hw_qdp[sig]; //u_shift_hw_fermion(Prhonumu, P7, sig, temp_hw[sig] ); QOP_trace("test 43\n"); u_shift_color_vecs(Prhonumu, P7, sig, nsrc, Prhonumutmp[sig]); QOP_trace("test 44\n"); //QDP_V_eq_r_times_V(P7[0], &SevenSt[0], P7[0], QDP_all); //QDP_V_eq_r_times_V(P7[1], &SevenSt[1], P7[1], QDP_all); if(GOES_FORWARDS(sig)) { /* Add the force F_sig[x+mu+nu+rho]: x--+ * * | | * * o o * * the 3 link in the path: - (numbering starts form 0) */ QOP_trace("test 45\n"); add_forces_to_mom(P7, Prhonumu, sig, mSevenSt, nsrc); QOP_trace("test 46\n"); //mom_meq_force(P7, Prhonumu, sig); } /* Add the force F_rho the 2(4) link in the path: + */ //P7rho = hw_qdp[rho]; //u_shift_hw_fermion(P7, P7rho, rho, temp_hw[rho]); QOP_trace("test 47\n"); u_shift_color_vecs(P7, P7rho, rho, nsrc, P7tmp[rho]); QOP_trace("test 48\n"); side_link_forces(rho,sig,SevenSt,Pnumu,P7,Prhonumu,P7rho, nsrc); QOP_trace("test 49\n"); //side_link_3f_force2(rho,sig,Pnumu,P7,Prhonumu,P7rho); /* Add the P7rho vector to P5 */ for(i=0; i<nsrc; i++) { if(FiveSt[i]!=0) coeff[i] = SevenSt[i]/FiveSt[i]; else coeff[i] = 0; QOP_trace("test 410\n"); QDP_V_peq_r_times_V(P5[sig][i], &coeff[i], P7rho[i], QDP_all); QOP_trace("test 411\n"); } } /* sig */ } /* rho */ QOP_trace("test 5\n"); #define P5nu P7 for(sig=0; sig<8; sig++) if( (sig!=mu)&&(sig!=OPP_DIR(mu)) && (sig!=nu)&&(sig!=OPP_DIR(nu)) ) { /* Length 5 paths */ /* Add the force F_nu the 1(3) link in the path: - */ //P5nu = hw_qdp[nu]; //u_shift_hw_fermion(P5[sig], P5nu, nu, temp_hw[nu]); u_shift_color_vecs(P5[sig], P5nu, nu, nsrc, P5tmp[sig][nu]); side_link_forces(nu, sig, mFiveSt, Pmu, P5[sig], Pnumu, P5nu, nsrc); /* Add the P5nu vector to P3 */ for(i=0; i<nsrc; i++) { if(ThreeSt[i]!=0) coeff[i] = FiveSt[i]/ThreeSt[i]; else coeff[i] = 0; QDP_V_peq_r_times_V(P3[sig][i], &coeff[i], P5nu[i], QDP_all); } } /* sig */ } /* nu */ #define Pmumu Pnumu #define Pmumutmp Pnumutmp #define P5sig Prhonumu #define P5sigtmp Prhonumutmp #define P3mu P7 #define Popmu P7 #define Pmumumu P7 /* Now the Lepage term... It is the same as 5-link paths with nu=mu and FiveSt=Lepage. */ //u_shift_hw_fermion(Pmu, Pmumu, OPP_DIR(mu), temp_hw[OPP_DIR(mu)] ); u_shift_color_vecs(Pmu, Pmumu, OPP_DIR(mu), nsrc, Pmutmp[OPP_DIR(mu)]); for(sig=0; sig<8; sig++) if( (sig!=mu)&&(sig!=OPP_DIR(mu)) ) { //P5sig = hw_qdp[sig]; //u_shift_hw_fermion(Pmumu, P5sig, sig, temp_hw[sig]); u_shift_color_vecs(Pmumu, P5sig, sig, nsrc, Pmumutmp[sig]); if(GOES_FORWARDS(sig)) { /* Add the force F_sig[x+mu+nu]: x--+ * * | | * * o o * * the 2 link in the path: + (numbering starts form 0) */ add_forces_to_mom(P5sig, Pmumu, sig, Lepage, nsrc); } /* Add the force F_nu the 1(3) link in the path: - */ //P5nu = hw_qdp[mu]; //u_shift_hw_fermion(P5sig, P5nu, mu, temp_hw[mu]); u_shift_color_vecs(P5sig, P5nu, mu, nsrc, P5sigtmp[mu]); side_link_forces(mu, sig, mLepage, Pmu, P5sig, Pmumu, P5nu, nsrc); /* Add the P5nu vector to P3 */ for(i=0; i<nsrc; i++) { if(ThreeSt[i]!=0) coeff[i] = Lepage[i]/ThreeSt[i]; else coeff[i] = 0; QDP_V_peq_r_times_V(P3[sig][i], &coeff[i], P5nu[i], QDP_all); } /* Length 3 paths (Not the Naik term) */ /* Add the force F_mu the 0(2) link in the path: + */ if(GOES_FORWARDS(mu)) { //P3mu = hw_qdp[mu]; /* OK to clobber P5nu */ //u_shift_hw_fermion(P3[sig], P3mu, mu, temp_hw[mu]); //u_shift_color_vecs(P3[sig], P3mu, mu, 2, temp_hw[mu]); for(i=0; i<nsrc; i++) { QDP_V_eq_V(P5sig[i], P3[sig][i], QDP_all); } u_shift_color_vecs(P5sig, P3mu, mu, nsrc, P5sigtmp[mu]); } /* The above shift is not needed if mu is backwards */ side_link_forces(mu, sig, ThreeSt, xin, P3[sig], Pmu, P3mu, nsrc); } /* Finally the OneLink and the Naik term */ if(GOES_BACKWARDS(mu)) { /* Do only the forward terms in the Dslash */ /* Because I have shifted with OPP_DIR(mu) Pmu is a forward * * shift. */ /* The one link */ add_forces_to_mom(Pmu, xin, OPP_DIR(mu), OneLink, nsrc); /* For the same reason Pmumu is the forward double link */ /* Popmu is a backward shift */ //Popmu = hw_qdp[mu]; /* OK to clobber P3mu */ //u_shift_hw_fermion(xin, Popmu, mu, temp_hw[mu]); u_shift_color_vecs(xin, Popmu, mu, nsrc, xintmp[mu]); /* The Naik */ /* link no 1: - */ add_forces_to_mom(Pmumu, Popmu, OPP_DIR(mu), mNaik, nsrc); /* Pmumumu can overwrite Popmu which is no longer needed */ //Pmumumu = hw_qdp[OPP_DIR(mu)]; //u_shift_hw_fermion(Pmumu, Pmumumu, OPP_DIR(mu), temp_hw[OPP_DIR(mu)]); u_shift_color_vecs(Pmumu, Pmumumu, OPP_DIR(mu), nsrc, Pmumutmp[OPP_DIR(mu)]); /* link no 0: + */ add_forces_to_mom(Pmumumu, xin, OPP_DIR(mu), Naik, nsrc); } else { /* The rest of the Naik terms */ //Popmu = hw_qdp[mu]; /* OK to clobber P3mu */ //u_shift_hw_fermion(xin, Popmu, mu, temp_hw[mu]); u_shift_color_vecs(xin, Popmu, mu, nsrc, xintmp[mu]); /* link no 2: + */ /* Pmumu is double backward shift */ add_forces_to_mom(Popmu, Pmumu, mu, Naik, nsrc); } /* Here we have to do together the Naik term and the one link term */ }/* mu */ QOP_trace("test 6\n"); QOP_trace("test 7\n"); for(mu=0; mu<4; mu++) { QDP_M_eq_M(tmpmat, tempmom_qdp[mu], QDP_even); QDP_M_eqm_M(tmpmat, tempmom_qdp[mu], QDP_odd); QDP_M_eq_antiherm_M(tempmom_qdp[mu], tmpmat, QDP_all); } QDP_destroy_M(tmpmat); //printf("%p\n", P5tmp[0][4][0]); fflush(stdout); //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); } /* Free temporary vectors */ for(i=0; i<nsrc; i++) { QDP_destroy_V(Pmu[i]); QDP_destroy_V(Pnumu[i]); QDP_destroy_V(Prhonumu[i]); QDP_destroy_V(P7[i]); QDP_destroy_V(P7rho[i]); //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); } for(dir=0; dir<8; dir++) { QDP_destroy_V(xintmp[dir][i]); QDP_destroy_V(Pmutmp[dir][i]); QDP_destroy_V(Pnumutmp[dir][i]); QDP_destroy_V(Prhonumutmp[dir][i]); QDP_destroy_V(P7tmp[dir][i]); } //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); } for(mu=0; mu<4; mu++) { //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); } QDP_destroy_V(P5s[mu][i]); //QDP_destroy_V(P5[mu][i]); //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); } for(dir=0; dir<8; dir++) { //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); } QDP_destroy_V(P5tmps[mu][dir][i]); //printf("%p\n", P5tmp[mu][dir][i]); fflush(stdout); //QDP_destroy_V(P5tmp[mu][dir][i]); //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); } } //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); } } //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); } } //if(QDP_this_node==0) { printf("here3\n"); fflush(stdout); } for(mu=0; mu<8; mu++) { for(i=0; i<nsrc; i++) { QDP_destroy_V(P3[mu][i]); } //QDP_destroy_V(P5[mu][0]); //QDP_destroy_V(P5[mu][1]); } for(i=0; i<nsrc; i++) { QDP_destroy_V(tv[i]); } //if(QDP_this_node==0) { printf("here4\n"); fflush(stdout); } for(i=4; i<8; i++) { QDP_destroy_M(fblink[i]); } dtime += QOP_time(); info->final_sec = dtime; info->final_flop = nflop*QDP_sites_on_node; info->status = QOP_SUCCESS; ASQTAD_FORCE_END; #undef NC }
void asqtadInvert(QOP_info_t *info, QOP_FermionLinksAsqtad *fla, QOP_invert_arg_t *invarg, QOP_resid_arg_t *residarg[], QLA_Real *masses, int nm, QDP_ColorVector *prop[], QDP_ColorVector *source) { #define NC QDP_get_nc(source) QDP_Subset sub=QDP_all; if(invarg->evenodd==QOP_EVEN) { sub = QDP_even; } else if(invarg->evenodd==QOP_ODD) { sub = QDP_odd; } QOP_F_FermionLinksAsqtad *ffla = QOP_FD_asqtad_create_L_from_L(fla); QLA_F_Real m[nm]; QDP_F_ColorVector *x[nm], *b; for(int i=0; i<nm; i++) { m[i] = masses[i]; x[i] = QDP_F_create_V(); QDP_F_V_eq_zero(x[i], sub); residarg[i]->final_iter = 0; residarg[i]->final_restart = 0; } b = QDP_F_create_V(); QDP_FD_V_eq_V(b, source, sub); double flops=0, secs=0; int its=0; double preres = 1e-4; if(nm>1) { QOP_resid_arg_t **ra = residarg; QLA_F_Real *mp = m; QDP_F_ColorVector **xp = x; switch(QOP_Nc) { #ifdef HAVE_NC1 case 1: QOP_F1_asqtad_invert_multi_qdp(info, (QOP_F1_FermionLinksAsqtad*)ffla, invarg, &ra, &mp, &nm, (QDP_F1_ColorVector***)&xp, (QDP_F1_ColorVector**)&b, 1); break; #endif #ifdef HAVE_NC2 case 2: QOP_F2_asqtad_invert_multi_qdp(info, (QOP_F2_FermionLinksAsqtad*)ffla, invarg, &ra, &mp, &nm, (QDP_F2_ColorVector***)&xp, (QDP_F2_ColorVector**)&b, 1); break; #endif #ifdef HAVE_NC3 case 3: QOP_F3_asqtad_invert_multi_qdp(info, (QOP_F3_FermionLinksAsqtad*)ffla, invarg, &ra, &mp, &nm, (QDP_F3_ColorVector***)&xp, (QDP_F3_ColorVector**)&b, 1); break; #endif default: QOP_F_asqtad_invert_multi_qdp(info, ffla, invarg, &ra, &mp, &nm, &xp, &b, 1); } for(int i=0; i<nm; i++) { QDP_DF_V_eq_V(prop[i], x[i], sub); } its += residarg[0]->final_iter; secs += info->final_sec; flops += info->final_flop; } QLA_Real norm2in; QDP_r_eq_norm2_V(&norm2in, source, sub); QDP_ColorVector *Dprop, *r; r = QDP_create_V(); Dprop = QDP_create_V(); for(int i=0; i<nm; i++) { QLA_Real rsq, rsqmin; int iters=0, restarts=0; QOP_resid_arg_t res_arg = *residarg[i]; QLA_Real rsqstop = res_arg.rsqmin*norm2in; while(1) { get_resid(r, prop[i], source, fla, masses[i], invarg->evenodd, Dprop); QDP_r_eq_norm2_V(&rsq, r, sub); //printf0("its: %i resid[%i]/in = %g\n", its, i, sqrt(rsq/norm2in)); if(rsq<rsqstop) break; if(restarts>invarg->max_restarts) break; if(iters>0) restarts++; rsqmin = preres*preres; if(rsqstop/rsq>rsqmin) rsqmin = rsqstop/rsq; rsqmin *= 0.99; res_arg.rsqmin = rsqmin; if(1) { QDP_FD_V_eq_V(b, r, sub); QDP_F_V_eq_zero(x[i], sub); switch(QOP_Nc) { #ifdef HAVE_NC1 case 1: QOP_F1_asqtad_invert_qdp(info, (QOP_F1_FermionLinksAsqtad*)ffla, invarg, &res_arg, m[i], (QDP_F1_ColorVector*)x[i], (QDP_F1_ColorVector*)b); break; #endif #ifdef HAVE_NC2 case 2: QOP_F2_asqtad_invert_qdp(info, (QOP_F2_FermionLinksAsqtad*)ffla, invarg, &res_arg, m[i], (QDP_F2_ColorVector*)x[i], (QDP_F2_ColorVector*)b); break; #endif #ifdef HAVE_NC3 case 3: QOP_F3_asqtad_invert_qdp(info, (QOP_F3_FermionLinksAsqtad*)ffla, invarg, &res_arg, m[i], (QDP_F3_ColorVector*)x[i], (QDP_F3_ColorVector*)b); break; #endif default: QOP_F_asqtad_invert_qdp(info, ffla, invarg, &res_arg, m[i], x[i], b); } QDP_DF_V_eq_V(Dprop, x[i], sub); } else { QDP_V_eq_zero(Dprop, sub); QOP_asqtad_invert_qdp(info, fla, invarg, &res_arg, masses[i], Dprop, r); } residarg[i]->final_restart++; residarg[i]->final_iter += res_arg.final_iter; its += res_arg.final_iter; iters += res_arg.final_iter; secs += info->final_sec; flops += info->final_flop; QDP_V_peq_V(prop[i], Dprop, sub); } //printf0("iters = %4i secs = %8f mflops = %.0f resid = %g\n", its, //secs, 1e-6*flops/secs, sqrt(rsq/norm2in)); } info->final_sec = secs; info->final_flop = flops; QDP_destroy_V(r); QDP_destroy_V(Dprop); QDP_F_destroy_V(b); for(int i=0; i<nm; i++) { QDP_F_destroy_V(x[i]); } QOP_F_asqtad_destroy_L(ffla); #undef NC }
int Kalkreuter_qdp(QDP_ColorVector **eigVec, double *eigVal, Real Tolerance, Real RelTol, int Nvecs, int MaxIter, int Restart, int Kiters, QDP_Subset subset) { QLA_Real max_error = 1.0e+10; QLA_Real min_grad; QLA_Real *grad, *err; Matrix Array, V; QDP_ColorVector *vec; int total_iters=0; int i, j; int iter = 0; #ifdef DEBUG if(QDP_this_node==0) printf("begin Kalkreuter_qdp\n"); #endif prepare_Matrix(); Array = AllocateMatrix(Nvecs); /* Allocate the array */ V = AllocateMatrix(Nvecs); /* Allocate the Eigenvector matrix */ vec = QDP_create_V(); grad = malloc(Nvecs*sizeof(QLA_Real)); err = malloc(Nvecs*sizeof(QLA_Real)); /* Initiallize all the eigenvectors to a random vector */ for(j=0; j<Nvecs; j++) { grad[j] = 1.0e+10; QDP_V_eq_gaussian_S(eigVec[j], rand_state, QDP_all); eigVal[j] = 1.0e+16; //project_out_qdp(eigVec[j], eigVec, j, subset); //normalize_qdp(eigVec[j], subset); } #if 0 constructArray_qdp(eigVec, &Array, grad, subset); Jacobi(&Array, &V, JACOBI_TOL); sort_eigenvectors(&Array, &V); RotateBasis_qdp(eigVec, &V, subset); #endif while( (max_error>Tolerance) && (iter<Kiters) ) { iter++; min_grad = grad[0]/eigVal[0]; for(i=1; i<Nvecs; i++) { if(grad[i]<min_grad*eigVal[i]) min_grad = grad[i]/eigVal[i]; } RelTol = 0.3; for(j=0; j<Nvecs; j++) { if(grad[j]>Tolerance*eigVal[j]) { QLA_Real rt; rt = RelTol*min_grad*eigVal[j]/grad[j]; //rt = 1e-5/grad[j]; if(rt>RelTol) rt = RelTol; //rt = RelTol; QDP_V_eq_V(vec, eigVec[j], QDP_all); total_iters += Rayleigh_min_qdp(vec, eigVec, Tolerance, rt, j, MaxIter, Restart, subset); QDP_V_eq_V(eigVec[j], vec, QDP_all); } } constructArray_qdp(eigVec, &Array, grad, subset); for(i=0; i<Nvecs; i++) node0_printf("quot(%i) = %g +/- %8e |grad|=%g\n", i, Array.M[i][i].real, err[i], grad[i]); #ifdef DEBUG node0_printf("Eigenvalues before diagonalization\n"); for(i=0;i<Nvecs;i++) node0_printf("quot(%i) = %g |grad|=%g\n",i,Array.M[i][i].real,grad[i]); #endif Jacobi(&Array, &V, JACOBI_TOL); sort_eigenvectors(&Array, &V); RotateBasis_qdp(eigVec, &V, subset); constructArray_qdp(eigVec, &Array, grad, subset); /* find the maximum error */ max_error = 0.0; for(i=0; i<Nvecs; i++) { err[i] = eigVal[i]; eigVal[i] = Array.M[i][i].real; err[i] = fabs(err[i] - eigVal[i])/(1.0 - RelTol*RelTol); if(eigVal[i]>1e-10) { #ifndef STRICT_CONVERGENCE if(err[i]/eigVal[i]>max_error) max_error = err[i]/eigVal[i]; #else if(grad[i]/eigVal[i]>max_error) max_error = grad[i]/eigVal[i]; #endif } } node0_printf("\nEigenvalues after diagonalization at iteration %i\n",iter); for(i=0; i<Nvecs; i++) node0_printf("quot(%i) = %g +/- %8e |grad|=%g\n", i, eigVal[i], err[i], grad[i]); } node0_printf("BEGIN RESULTS\n"); for(i=0;i<Nvecs;i++){ node0_printf("Eigenvalue(%i) = %g +/- %8e\n", i,eigVal[i],err[i]); } #if 0 node0_printf("BEGIN EIGENVALUES\n"); for(i=0; i<Nvecs; i++) { double ev, er; ev = sqrt(eigVal[i]); er = err[i]/(2*ev); node0_printf("%.8g\t%g\n", ev, er); } node0_printf("END EIGENVALUES\n"); { QDP_Writer *qw; QDP_String *md; char evstring[100], *fn="eigenvecs.out"; md = QDP_string_create(); sprintf(evstring, "%i", Nvecs); QDP_string_set(md, evstring); qw = QDP_open_write(md, fn, QDP_SINGLEFILE); for(i=0; i<Nvecs; i++) { double ev, er; ev = sqrt(eigVal[i]); er = err[i]/(2*ev); sprintf(evstring, "%.8g\t%g", ev, er); QDP_string_set(md, evstring); QDP_write_V(qw, md, eigVec[i]); } QDP_close_write(qw); QDP_string_destroy(md); } #endif /** Deallocate the arrays **/ deAllocate(&V) ; deAllocate(&Array) ; free(err); free(grad); QDP_destroy_V(vec); cleanup_Matrix(); #ifdef DEBUG if(QDP_this_node==0) printf("end Kalkreuter_qdp\n"); #endif return total_iters; }
int Rayleigh_min_qdp(QDP_ColorVector *vec, QDP_ColorVector **eigVec, Real Tolerance, Real RelTol, int Nvecs, int MaxIter, int Restart, QDP_Subset subset) { QLA_Complex cc; QLA_Real beta, cos_theta, sin_theta; QLA_Real quot, P_norm, theta, real_vecMp, pMp; QLA_Real g_norm, old_g_norm, start_g_norm; QDP_ColorVector *Mvec, *grad, *P, *MP; int iter; #ifdef DEBUG if(QDP_this_node==0) printf("begin Rayleigh_min_qdp\n"); #endif Mvec = QDP_create_V(); grad = QDP_create_V(); //oldgrad = QDP_create_V(); P = QDP_create_V(); MP = QDP_create_V(); project_out_qdp(vec, eigVec, Nvecs, subset); normalize_qdp(vec, subset); Matrix_Vec_mult_qdp(vec, Mvec, subset); project_out_qdp(Mvec, eigVec, Nvecs, subset); /* Compute the quotient quot=vev*M*vec */ QDP_r_eq_re_V_dot_V(", vec, Mvec, subset); /* quot is real since M is hermitian. quot = vec*M*vec */ #ifdef DEBUG if(QDP_this_node==0) printf("Rayleigh_min: Start -- quot=%g\n", quot); #endif /* Compute the grad=M*vec - quot*vec */ QDP_V_eq_V(grad, Mvec, QDP_all); QDP_V_meq_r_times_V(grad, ", vec, subset); /* set P (the search direction) equal to grad */ QDP_V_eq_V(P, grad, QDP_all); /* compute the norms of P and grad */ QDP_r_eq_norm2_V(&P_norm, P, subset); P_norm = sqrt(P_norm); QDP_r_eq_norm2_V(&g_norm, grad, subset); g_norm = sqrt(g_norm); start_g_norm = g_norm; //QDP_V_eq_V(oldgrad, grad, subset); #ifdef DEBUG if(QDP_this_node==0) printf("Rayleigh_min: Start -- g_norm=%g\n", g_norm); #endif iter = 0; while( (g_norm>Tolerance*quot) && ( ((iter<MaxIter)&&(g_norm/start_g_norm>RelTol)) || (iter<MINITER) ) ) { iter++; Matrix_Vec_mult_qdp(P, MP, subset); QDP_r_eq_re_V_dot_V(&real_vecMp, vec, MP, subset); QDP_r_eq_re_V_dot_V(&pMp, P, MP, subset); theta = 0.5*atan(2.0*real_vecMp/(quot*P_norm - pMp/P_norm)); sin_theta = sin(theta); cos_theta = cos(theta); if(sin_theta*cos_theta*real_vecMp>0) { theta = theta - 0.5*M_PI; /* chose the minimum not the maximum */ sin_theta = sin(theta); /* the sin,cos calls can be avoided */ cos_theta = cos(theta); } sin_theta = sin_theta/P_norm; /* vec = cos(theta)*vec +sin(theta)*P/p_norm */ //dax_p_by_qdp(cos_theta, vec, sin_theta, P, subset); QDP_V_eq_r_times_V(vec, &cos_theta, vec, subset); QDP_V_peq_r_times_V(vec, &sin_theta, P, subset); /* Mvec = cos(theta)*Mvec +sin(theta)*MP/p_norm */ //dax_p_by_qdp(cos_theta, Mvec, sin_theta, MP, subset); QDP_V_eq_r_times_V(Mvec, &cos_theta, Mvec, subset); QDP_V_peq_r_times_V(Mvec, &sin_theta, MP, subset); /* renormalize vec ... */ if( iter%Restart == 0 ) { #ifdef DEBUG { QLA_Real vec_norm; if(QDP_this_node==0) printf("Renormalizing..."); QDP_r_eq_norm2_V(&vec_norm, vec, subset); if(QDP_this_node==0) printf(" norm: %g\n", sqrt(vec_norm)); } #endif /* Project vec on the orthogonal complement of eigVec */ project_out_qdp(vec, eigVec, Nvecs, subset); normalize_qdp(vec, subset); Matrix_Vec_mult_qdp(vec, Mvec, subset); /* Recompute the quotient */ QDP_r_eq_re_V_dot_V(", vec, Mvec, subset); /* Recompute the grad */ QDP_V_eq_V(grad, Mvec, QDP_all); QDP_V_meq_r_times_V(grad, ", vec, subset); //QDP_r_eq_norm2_V(&g_norm, grad, subset); //printf("g_norm = %g\n", g_norm); /* Project P on the orthogonal complement of eigVec */ //QDP_r_eq_norm2_V(&P_norm, P, subset); //printf("P_norm = %g\n", P_norm); project_out_qdp(P, eigVec, Nvecs, subset); //QDP_r_eq_norm2_V(&P_norm, P, subset); //printf("P_norm = %g\n", P_norm); /* make P orthogonal to vec */ QDP_c_eq_V_dot_V(&cc, vec, P, subset); //printf("cc = %g\n", QLA_real(cc)); QDP_V_meq_c_times_V(P, &cc, vec, subset); //QDP_r_eq_norm2_V(&P_norm, P, subset); //printf("P_norm = %g\n", P_norm); /* make P orthogonal to grad */ QDP_c_eq_V_dot_V(&cc, grad, P, subset); //printf("cc = %g\n", QLA_real(cc)); QDP_V_meq_c_times_V(P, &cc, grad, subset); QDP_r_eq_norm2_V(&P_norm, P, subset); P_norm = sqrt(P_norm); } QDP_r_eq_re_V_dot_V(", vec, Mvec, subset); #ifdef DEBUG node0_printf("Rayleigh_min: %i, quot=%8g g=%8g b=%6g P:%6g\n", iter, quot, g_norm, beta, P_norm); #endif old_g_norm = g_norm; QDP_V_eq_V(grad, Mvec, QDP_all); QDP_V_meq_r_times_V(grad, ", vec, subset); //QDP_V_meq_V(oldgrad, grad, subset); //QDP_r_eq_re_V_dot_V(&g_norm, oldgrad, grad, subset); //QDP_V_eq_V(oldgrad, grad, subset); QDP_r_eq_norm2_V(&g_norm, grad, subset); g_norm = sqrt(g_norm); beta = cos_theta*g_norm*g_norm/(old_g_norm*old_g_norm); if( beta>2.0 ) beta = 2.0; /* Cut off beta */ QDP_c_eq_V_dot_V(&cc, vec, P, subset); QLA_real(cc) *= beta; QLA_imag(cc) *= beta; QDP_V_eq_r_times_V_plus_V(P, &beta, P, grad, subset); QDP_V_meq_c_times_V(P, &cc, vec, subset); QDP_r_eq_norm2_V(&P_norm, P, subset); P_norm = sqrt(P_norm); } project_out_qdp(vec, eigVec, Nvecs, subset); normalize_qdp(vec, subset); QDP_destroy_V(MP); QDP_destroy_V(P); //QDP_destroy_V(oldgrad); QDP_destroy_V(grad); QDP_destroy_V(Mvec); iter++; #ifdef DEBUG if(QDP_this_node==0) printf("end Rayleigh_min_qdp\n"); #endif return iter; }
/* Smearing level 0 */ static void QOP_hisq_force_multi_smearing0_fnmat(QOP_info_t *info, REAL *residues, QDP_ColorVector *x[], int nterms, QDP_ColorMatrix *force_accum[4], QDP_ColorMatrix *force_accum_naik[4]) { int term; int i,k; int dir; REAL coeff; QDP_ColorMatrix *tmat; QDP_ColorMatrix *oprod_along_path[MAX_PATH_LENGTH+1]; QDP_ColorMatrix *mat_tmp0; QDP_ColorVector *tsrc[2], *vec_tmp[2]; size_t nflops = 0; if( nterms==0 )return; mat_tmp0 = QDP_create_M(); tmat = QDP_create_M(); tsrc[0] = QDP_create_V(); tsrc[1] = QDP_create_V(); vec_tmp[0] = QDP_create_V(); vec_tmp[1] = QDP_create_V(); for(i=0;i<=MAX_PATH_LENGTH;i++){ oprod_along_path[i] = QDP_create_M(); } // clear force accumulators for(dir=XUP;dir<=TUP;dir++) QDP_M_eq_zero(force_accum[dir], QDP_all); for(dir=XUP;dir<=TUP;dir++){ //AB loop on directions, path table is not needed k=0; // which vec_tmp we are using (0 or 1) QDP_V_eq_V(tsrc[k], x[0], QDP_all); QDP_V_eq_sV(vec_tmp[k], tsrc[k], fnshift(OPP_DIR(dir)), fndir(OPP_DIR(dir)), QDP_all); QDP_M_eq_zero(oprod_along_path[0], QDP_all); for(term=0;term<nterms;term++){ if(term<nterms-1) { QDP_V_eq_V(tsrc[1-k], x[term+1], QDP_all); QDP_V_eq_sV(vec_tmp[1-k], tsrc[1-k], fnshift(OPP_DIR(dir)), fndir(OPP_DIR(dir)), QDP_all); } //QDP_M_eq_V_times_Va(tmat, x[term], vec_tmp[k], QDP_all); QDP_M_eq_V_times_Va(tmat, tsrc[k], vec_tmp[k], QDP_all); nflops += 54; QDP_discard_V(vec_tmp[k]); QDP_M_peq_r_times_M(oprod_along_path[0], &residues[term], tmat, QDP_all); nflops += 36; k=1-k; // swap 0 and 1 } // end loop over terms in rational function expansion link_gather_connection_qdp(oprod_along_path[1], oprod_along_path[0], tmat, dir ); coeff = 1.; QDP_M_peq_r_times_M(force_accum[dir],&coeff,oprod_along_path[1],QDP_all); nflops += 36; } // end of loop on directions // // *** Naik part *** / // clear force accumulators for(dir=XUP;dir<=TUP;dir++) QDP_M_eq_zero(force_accum_naik[dir], QDP_all); for(dir=XUP;dir<=TUP;dir++){ //AB loop on directions, path table is not needed k=0; // which vec_tmp we are using (0 or 1) QDP_V_eq_V(tsrc[k], x[0], QDP_all); QDP_V_eq_sV(vec_tmp[k], tsrc[k], fnshift(OPP_3_DIR( DIR3(dir) )), fndir(OPP_3_DIR( DIR3(dir) )), QDP_all); QDP_M_eq_zero(oprod_along_path[0], QDP_all); for(term=0;term<nterms;term++){ if(term<nterms-1) { QDP_V_eq_V(tsrc[1-k], x[term+1], QDP_all); QDP_V_eq_sV(vec_tmp[1-k], tsrc[1-k], fnshift(OPP_3_DIR( DIR3(dir) )), fndir(OPP_3_DIR( DIR3(dir) )), QDP_all); } //QDP_M_eq_V_times_Va(tmat, x[term], vec_tmp[k], QDP_all); QDP_M_eq_V_times_Va(tmat, tsrc[k], vec_tmp[k], QDP_all); nflops += 54; QDP_discard_V(vec_tmp[k]); QDP_M_peq_r_times_M(oprod_along_path[0], &residues[term], tmat, QDP_all); nflops += 36; k=1-k; // swap 0 and 1 } // end loop over terms in rational function expansion link_gather_connection_qdp(oprod_along_path[1], oprod_along_path[0], tmat, DIR3(dir) ); coeff = 1; // fermion_eps is outside this routine in "wrapper" routine QDP_M_peq_r_times_M(force_accum_naik[dir],&coeff, oprod_along_path[1],QDP_all); nflops += 36; } // end of loop on directions QDP_destroy_V( tsrc[0] ); QDP_destroy_V( tsrc[1] ); QDP_destroy_V( vec_tmp[0] ); QDP_destroy_V( vec_tmp[1] ); QDP_destroy_M( mat_tmp0 ); QDP_destroy_M( tmat ); for(i=0;i<=MAX_PATH_LENGTH;i++){ QDP_destroy_M( oprod_along_path[i] ); } info->final_flop = ((double)nflops)*QDP_sites_on_node; return; } //hisq_force_multi_smearing0_fnmat
/* Smearing level i*/ static void QOP_hisq_force_multi_smearing_fnmat(QOP_info_t *info, QDP_ColorMatrix * gf[4], REAL *residues, QDP_ColorVector *x[], int nterms, QDP_ColorMatrix *force_accum[4], QDP_ColorMatrix *force_accum_old[4], QDP_ColorMatrix *force_accum_naik_old[4], int internal_num_q_paths, Q_path *internal_q_paths_sorted, int *internal_netbackdir_table) { int i,j,k,lastdir=-99,ipath,ilink; int length,dir,odir; REAL coeff; QDP_ColorMatrix *tmat; QDP_ColorMatrix *oprod_along_path[MAX_PATH_LENGTH+1]; QDP_ColorMatrix *mats_along_path[MAX_PATH_LENGTH+1]; QDP_ColorMatrix *mat_tmp0,*mat_tmp1, *stmp[8];; QDP_ColorVector *vec_tmp[2]; int netbackdir; size_t nflops = 0; // table of net path displacements (backwards from usual convention) Q_path *this_path; // pointer to current path /* Allocate fields */ for(i=0;i<=MAX_PATH_LENGTH;i++){ oprod_along_path[i] = QDP_create_M(); } for(i=1;i<=MAX_PATH_LENGTH;i++){ // 0 element is never used (it's unit matrix) mats_along_path[i] = QDP_create_M(); } mat_tmp0 = QDP_create_M(); mat_tmp1 = QDP_create_M(); for(i=0; i<8; i++) stmp[i] = QDP_create_M(); tmat = QDP_create_M(); vec_tmp[0] = QDP_create_V(); vec_tmp[1] = QDP_create_V(); // clear force accumulators for(dir=XUP;dir<=TUP;dir++) QDP_M_eq_zero(force_accum[dir], QDP_all); // loop over paths, and loop over links in path for( ipath=0; ipath<internal_num_q_paths; ipath++ ){ this_path = &(internal_q_paths_sorted[ipath]); if(this_path->forwback== -1)continue; // skip backwards dslash length = this_path->length; netbackdir = internal_netbackdir_table[ipath]; // move f(i-1) force from current site in positive direction, // this corresponds to outer product |X><Y| calculated at the endpoint of the path if( netbackdir<8) { // Not a Naik path link_gather_connection_qdp(oprod_along_path[0] , force_accum_old[OPP_DIR(netbackdir)], tmat, netbackdir ); } else { // Naik path if( NULL==force_accum_naik_old ) { QOP_printf0( "hisq_force_multi_smearing_fnmat: mismatch:\n" ); QOP_printf0( "force_accum_naik_old is NULL, but path table contains Naik paths(!)\n" ); exit(0); } // CONVERSION FROM 3-LINK DIRECTION TO 1-LINK DIRECTION link_gather_connection_qdp(oprod_along_path[0] , force_accum_naik_old[OPP_DIR(netbackdir-8)], tmat, netbackdir ); } // figure out how much of the outer products along the path must be // recomputed. j is last one needing recomputation. k is first one. j=length-1; // default is recompute all if( GOES_BACKWARDS(this_path->dir[0]) ) k=1; else k=0; for(ilink=j;ilink>=k;ilink--){ link_transport_connection_qdp( oprod_along_path[length-ilink], oprod_along_path[length-ilink-1], gf, mat_tmp0, stmp, this_path->dir[ilink] ); nflops += 198; } // maintain an array of transports "to this point" along the path. // Don't recompute beginning parts of path if same as last path ilink=0; // first link where new transport is needed // Sometimes we don't need the matrix for the last link if( GOES_FORWARDS(this_path->dir[length-1]) ) k=length-1; else k=length; for( ; ilink<k; ilink++ ){ if( ilink==0 ){ dir = this_path->dir[0]; if( GOES_FORWARDS(dir) ){ QDP_M_eq_sM(tmat, gf[dir], QDP_neighbor[dir], QDP_backward, QDP_all); QDP_M_eq_Ma(mats_along_path[1], tmat, QDP_all); QDP_discard_M(tmat); } else{ QDP_M_eq_M(mats_along_path[1], gf[OPP_DIR(dir)], QDP_all); } } else { // ilink != 0 dir = OPP_DIR(this_path->dir[ilink]); link_transport_connection_qdp( mats_along_path[ilink+1], mats_along_path[ilink], gf, mat_tmp0, stmp, dir ); nflops += 198; } } // end loop over links // A path has (length+1) points, counting the ends. At first // point, no "down" direction links have their momenta "at this // point". At last, no "up" ... if( GOES_FORWARDS(this_path->dir[length-1]) ) k=length-1; else k=length; for( ilink=0; ilink<=k; ilink++ ){ if(ilink<length)dir = this_path->dir[ilink]; else dir=NODIR; coeff = this_path->coeff; if( (ilink%2)==1 )coeff = -coeff; // add in contribution to the force if( ilink<length && GOES_FORWARDS(dir) ){ link_gather_connection_qdp(mat_tmp1, oprod_along_path[length-ilink-1], tmat, dir ); if(ilink==0) { QDP_M_eq_M(mat_tmp0,mat_tmp1,QDP_all); } else { QDP_M_eq_M_times_Ma(mat_tmp0, mats_along_path[ilink], mat_tmp1, QDP_all); nflops += 198; QDP_M_eq_Ma(mat_tmp1,mat_tmp0,QDP_all); } QDP_M_peq_r_times_M(force_accum[dir],&coeff,mat_tmp1,QDP_all); nflops += 36; } if( ilink>0 && GOES_BACKWARDS(lastdir) ){ odir = OPP_DIR(lastdir); if( ilink==1 ){ QDP_M_eq_M(mat_tmp0,oprod_along_path[length-ilink],QDP_all); QDP_M_eq_Ma(mat_tmp1,mat_tmp0,QDP_all); } else{ link_gather_connection_qdp(mat_tmp1, mats_along_path[ilink-1], tmat, odir ); QDP_M_eq_M_times_Ma(mat_tmp0, oprod_along_path[length-ilink], mat_tmp1, QDP_all); nflops += 198; QDP_M_eq_Ma(mat_tmp1, mat_tmp0, QDP_all); } QDP_M_peq_r_times_M(force_accum[odir],&coeff,mat_tmp1,QDP_all); nflops += 36; } lastdir = dir; } // end loop over links in path // } // end loop over paths // QDP_destroy_V( vec_tmp[0] ); QDP_destroy_V( vec_tmp[1] ); QDP_destroy_M( mat_tmp0 ); QDP_destroy_M( mat_tmp1 ); QDP_destroy_M( tmat ); for(i=0; i<8; i++) QDP_destroy_M(stmp[i]); for(i=0;i<=MAX_PATH_LENGTH;i++){ QDP_destroy_M( oprod_along_path[i] ); } for(i=1;i<=MAX_PATH_LENGTH;i++){ QDP_destroy_M( mats_along_path[i] ); } info->final_flop = ((double)nflops)*QDP_sites_on_node; return; }//hisq_force_multi_smearing_fnmat