static int qopqdp_gauge_zero(lua_State *L) { qassert(lua_gettop(L)==1); gauge_t *g = qopqdp_gauge_check(L, -1); for(int i=0; i<g->nd; i++) { QDP_M_eq_zero(g->links[i], QDP_all_L(g->qlat)); } return 0; }
void QOP_hisq_force_multi_fnmat2_qdp(QOP_info_t *info, QOP_FermionLinksHisq *flh, QDP_ColorMatrix *force[], QOP_hisq_coeffs_t *hisq_coeff, REAL *residues, QDP_ColorVector *x[], int *n_orders_naik) { #define NC QDP_get_nc(force[0]) double dtime = QOP_time(); QDP_ColorMatrix *deriv[4]; for(int mu=0; mu<4; mu++) { deriv[mu] = QDP_create_M(); QDP_M_eq_zero(deriv[mu], QDP_all); } QOP_hisq_deriv_multi_fnmat2_qdp(info, flh, deriv, hisq_coeff, residues, x, n_orders_naik); // contraction with the link in question should be done here, // after contributions from all levels of smearing are taken into account // Put antihermitian traceless part into momentum // add force to momentum QDP_ColorMatrix *mtmp = QDP_create_M(); for(int dir=0; dir<4; dir++) { QDP_M_eq_M_times_Ma(mtmp, flh->U_links[dir], deriv[dir], QDP_all); QDP_M_eq_antiherm_M(deriv[dir], mtmp, QDP_all); QDP_M_peq_M(force[dir], deriv[dir], QDP_all); } info->final_flop += (4.*(198+24+18))*QDP_sites_on_node; QDP_destroy_M(mtmp); for(int mu=0; mu<4; mu++) { QDP_destroy_M(deriv[mu]); } info->final_sec = QOP_time() - dtime; //QOP_printf0("HISQ force flops = %g\n", info->final_flop); #undef NC }
void QOPPC(symanzik_1loop_gauge_force1) (QOP_info_t *info, QOP_GaugeField *gauge, QOP_Force *force, QOP_gauge_coeffs_t *coeffs, REAL eps) { REAL Plaq, Rect, Pgm ; QDP_ColorMatrix *tempmom_qdp[4]; QDP_ColorMatrix *Amu[6]; // products of 2 links Unu(x)*Umu(x+nu) QDP_ColorMatrix *tmpmat; QDP_ColorMatrix *tmpmat1; QDP_ColorMatrix *tmpmat2; QDP_ColorMatrix *staples; QDP_ColorMatrix *tmpmat3; QDP_ColorMatrix *tmpmat4; int i, k; int mu, nu, sig; double dtime; //REAL eb3 = -eps*beta/3.0; REAL eb3 = -eps/3.0; int j[3][2] = {{1,2}, {0,2}, {0,1}}; // QOP_printf0("beta: %e, eb3: %e\n", beta, eb3); dtime = -QOP_time(); for(mu=0; mu<4; mu++) { tempmom_qdp[mu] = QDP_create_M(); QDP_M_eq_zero(tempmom_qdp[mu], QDP_all); } tmpmat = QDP_create_M(); for(i=0; i<QOP_common.ndim; i++) { fblink[i] = gauge->links[i]; fblink[OPP_DIR(i)] = QDP_create_M(); QDP_M_eq_sM(tmpmat, fblink[i], QDP_neighbor[i], QDP_backward, QDP_all); QDP_M_eq_Ma(fblink[OPP_DIR(i)], tmpmat, QDP_all); } for(i=0; i<6; i++) { Amu[i] = QDP_create_M(); } staples = QDP_create_M(); tmpmat1 = QDP_create_M(); tmpmat2 = QDP_create_M(); tmpmat3 = QDP_create_M(); tmpmat4 = QDP_create_M(); Plaq = coeffs->plaquette; Rect = coeffs->rectangle; Pgm = coeffs->parallelogram; //Construct 3-staples and rectangles for(mu=0; mu<4; mu++) { i=0; for(nu=0; nu<4; nu++) { if(nu!=mu){ // tmpmat1 = Umu(x+nu) QDP_M_eq_sM(tmpmat1, fblink[mu], QDP_neighbor[nu], QDP_forward, QDP_all); QDP_M_eq_M_times_M(Amu[i], fblink[nu], tmpmat1, QDP_all); //tmpmat2 = Umu(x-nu) QDP_M_eq_sM(tmpmat2, fblink[mu], QDP_neighbor[nu], QDP_backward, QDP_all); QDP_M_eq_M_times_M(Amu[i+3], fblink[OPP_DIR(nu)], tmpmat2, QDP_all); //tmpmat = U_{nu}(x+mu) QDP_M_eq_sM(tmpmat, fblink[nu], QDP_neighbor[mu], QDP_forward, QDP_all); QDP_M_eq_M_times_Ma(staples, Amu[i], tmpmat, QDP_all); QDP_M_peq_r_times_M(tempmom_qdp[mu], &Plaq, staples, QDP_all); //tmpmat = U_{-nu}(x+mu) QDP_M_eq_sM(tmpmat, fblink[OPP_DIR(nu)], QDP_neighbor[mu], QDP_forward, QDP_all); QDP_M_eq_Ma_times_M(tmpmat3, fblink[OPP_DIR(nu)], staples, QDP_all); QDP_M_eq_M_times_M(tmpmat4, tmpmat3, tmpmat, QDP_all); QDP_M_eq_sM(tmpmat, tmpmat4, QDP_neighbor[nu], QDP_forward, QDP_all); QDP_M_peq_r_times_M(tempmom_qdp[mu], &Rect, tmpmat, QDP_all); QDP_M_eq_Ma_times_M(tmpmat4, tmpmat2, tmpmat3, QDP_all); QDP_M_eq_sM(tmpmat, tmpmat4, QDP_neighbor[nu], QDP_forward, QDP_all); QDP_M_eq_sM(tmpmat3, tmpmat, QDP_neighbor[mu], QDP_backward, QDP_all); QDP_M_peq_r_times_M(tempmom_qdp[nu], &Rect, tmpmat3, QDP_all); //tmpmat = U_{-nu}(x+mu) QDP_M_eq_sM(tmpmat, fblink[OPP_DIR(nu)], QDP_neighbor[mu], QDP_forward, QDP_all); QDP_M_eq_M_times_Ma(tmpmat3, tmpmat2, tmpmat, QDP_all); QDP_M_eq_M_times_Ma(tmpmat, tmpmat3, staples, QDP_all); QDP_M_eq_sM(tmpmat3, tmpmat, QDP_neighbor[nu], QDP_forward, QDP_all); QDP_M_peq_r_times_M(tempmom_qdp[nu], &Rect, tmpmat3, QDP_all); //tmpmat = U_{-nu}(x+mu) QDP_M_eq_sM(tmpmat, fblink[OPP_DIR(nu)], QDP_neighbor[mu], QDP_forward, QDP_all); QDP_M_eq_M_times_Ma(staples, Amu[i+3], tmpmat, QDP_all); QDP_M_peq_r_times_M(tempmom_qdp[mu], &Plaq, staples, QDP_all); QDP_M_eq_Ma_times_M(tmpmat3, fblink[nu], staples, QDP_all); QDP_M_eq_sM(tmpmat, fblink[nu], QDP_neighbor[mu], QDP_forward, QDP_all); QDP_M_eq_M_times_M(tmpmat4, tmpmat3, tmpmat, QDP_all); QDP_M_eq_sM(tmpmat, tmpmat4, QDP_neighbor[nu], QDP_backward, QDP_all); QDP_M_peq_r_times_M(tempmom_qdp[mu], &Rect, tmpmat, QDP_all); QDP_M_eq_Ma_times_M(tmpmat, tmpmat3, tmpmat1, QDP_all); QDP_M_eq_sM(tmpmat4, tmpmat, QDP_neighbor[mu], QDP_backward, QDP_all); QDP_M_peq_r_times_M(tempmom_qdp[nu], &Rect, tmpmat4, QDP_all); QDP_M_eq_sM(tmpmat, fblink[nu], QDP_neighbor[mu], QDP_forward, QDP_all); QDP_M_eq_M_times_M(tmpmat3, staples, tmpmat, QDP_all); QDP_M_eq_M_times_Ma(tmpmat4, tmpmat3, tmpmat1, QDP_all); QDP_M_peq_r_times_M(tempmom_qdp[nu], &Rect, tmpmat4, QDP_all); i++; } } // Construct the pgm staples and add them to force QDP_M_eq_zero(staples, QDP_all); i=0; for(nu=0; nu<4; nu++){ if(nu!=mu){ k=0; for(sig=0; sig<4;sig ++){ if(sig!=mu && nu!=sig){ // the nu_sig_mu ... staple and 3 reflections //tmpmat = Amu["sig"](x+nu) QDP_M_eq_sM(tmpmat, Amu[j[i][k]], QDP_neighbor[nu], QDP_forward, QDP_all); //tmpmat1 = Unu(x)*Amu["sig"](x+nu) QDP_M_eq_M_times_M(tmpmat1, fblink[nu], tmpmat, QDP_all); //tmpmat3 = Unu(x+mu+sig) QDP_M_eq_sM(tmpmat, fblink[nu], QDP_neighbor[mu], QDP_forward, QDP_all); QDP_M_eq_sM(tmpmat3, tmpmat, QDP_neighbor[sig], QDP_forward, QDP_all); // HERE? //tmpmat2 = Unu(x)*Amu["sig"](x+nu)*adj(Unu(x+mu+sig)) QDP_M_eq_M_times_Ma(tmpmat2, tmpmat1, tmpmat3, QDP_all); //tmpmat = Usig(x+mu) QDP_M_eq_sM(tmpmat, fblink[sig], QDP_neighbor[mu], QDP_forward, QDP_all); //tmpmat1 = Unu(x)*Amu["sig"](x+nu)*adj(Unu(x+mu+sig))*adj(Usig(x+mu)) QDP_M_eq_M_times_Ma(tmpmat1, tmpmat2, tmpmat, QDP_all); QDP_M_peq_M(staples, tmpmat1, QDP_all); //tmpmat = Amu["sig"](x-nu) QDP_M_eq_sM(tmpmat, Amu[j[i][k]], QDP_neighbor[nu], QDP_backward, QDP_all); //tmpmat1 = U_{-nu}(x)*Amu["sig"](x-nu) QDP_M_eq_M_times_M(tmpmat1, fblink[OPP_DIR(nu)], tmpmat, QDP_all); //tmpmat3 = U_{-nu}(x+mu+sig) QDP_M_eq_sM(tmpmat, fblink[OPP_DIR(nu)], QDP_neighbor[mu], QDP_forward, QDP_all); QDP_M_eq_sM(tmpmat3, tmpmat, QDP_neighbor[sig], QDP_forward, QDP_all); // HERE? //tmpmat2 = U_{-nu}nu(x)*Amu["sig"](x-nu)*adj(Unu(x+mu+sig)) QDP_M_eq_M_times_Ma(tmpmat2, tmpmat1, tmpmat3, QDP_all); //tmpmat = Usig(x+mu) QDP_M_eq_sM(tmpmat, fblink[sig], QDP_neighbor[mu], QDP_forward, QDP_all); //tmpmat1 = U_{-nu}(x)*Amu["sig"](x-nu)*adj(Unu(x+mu+sig))*adj(Usig(x+mu)) QDP_M_eq_M_times_Ma(tmpmat1, tmpmat2, tmpmat, QDP_all); QDP_M_peq_M(staples, tmpmat1, QDP_all); //tmpmat = Amu["-sig"](x-nu) QDP_M_eq_sM(tmpmat, Amu[j[i][k]+3], QDP_neighbor[nu], QDP_backward, QDP_all); //tmpmat1 = U_{-nu}(x)*Amu["-sig"](x-nu) QDP_M_eq_M_times_M(tmpmat1, fblink[OPP_DIR(nu)], tmpmat, QDP_all); //tmpmat = U_{-nu}(x+mu-sig) QDP_M_eq_sM(tmpmat, fblink[OPP_DIR(nu)], QDP_neighbor[mu], QDP_forward, QDP_all); QDP_M_eq_sM(tmpmat3, tmpmat, QDP_neighbor[sig], QDP_backward, QDP_all); // HERE? //tmpmat2 = U_{-nu}nu(x)*Amu["-sig"](x-nu)*adj(Unu(x+mu-sig)) QDP_M_eq_M_times_Ma(tmpmat2, tmpmat1, tmpmat3, QDP_all); //tmpmat = U_{-sig}(x+mu) QDP_M_eq_sM(tmpmat, fblink[OPP_DIR(sig)], QDP_neighbor[mu], QDP_forward, QDP_all); //tmpmat1 = U_{-nu}(x)*Amu["-sig"](x-nu)*adj(Unu(x+mu-sig))*adj(U_{-sig}(x+mu)) QDP_M_eq_M_times_Ma(tmpmat1, tmpmat2, tmpmat, QDP_all); QDP_M_peq_M(staples, tmpmat1, QDP_all); //tmpmat = Amu["-sig"](x+nu) QDP_M_eq_sM(tmpmat, Amu[j[i][k]+3], QDP_neighbor[nu], QDP_forward, QDP_all); //tmpmat1 = Unu(x)*Amu["-sig"](x+nu) QDP_M_eq_M_times_M(tmpmat1, fblink[nu], tmpmat, QDP_all); //tmpmat3 = Unu(x+mu-sig) QDP_M_eq_sM(tmpmat, fblink[nu], QDP_neighbor[mu], QDP_forward, QDP_all); QDP_M_eq_sM(tmpmat3, tmpmat, QDP_neighbor[sig], QDP_backward, QDP_all); // HERE? //tmpmat2 = Unu(x)*Amu["-sig"](x+nu)*adj(Unu(x+mu-sig)) QDP_M_eq_M_times_Ma(tmpmat2, tmpmat1, tmpmat3, QDP_all); //tmpmat = U_{-sig}(x+mu) QDP_M_eq_sM(tmpmat, fblink[OPP_DIR(sig)], QDP_neighbor[mu], QDP_forward, QDP_all); //tmpmat1 = Unu(x)*Amu["sig"](x+nu)*adj(Unu(x+mu+sig))*adj(Usig(x+mu)) QDP_M_eq_M_times_Ma(tmpmat1, tmpmat2, tmpmat, QDP_all); QDP_M_peq_M(staples, tmpmat1, QDP_all); k++; }//close if sig!=nu ... }//close sig loop i++; }// close if nu!=mu }//close the pgm nu loop QDP_M_peq_r_times_M(tempmom_qdp[mu], &Pgm, staples, QDP_all); }// closes the mu loop #ifdef CHKSUM QLA_ColorMatrix qcm; QLA_Complex det, chk; QLA_c_eq_r(chk, 0); #endif for(mu=0; mu<4; mu++){ QDP_M_eq_M_times_Ma(tmpmat, fblink[mu], tempmom_qdp[mu], QDP_all); // HERE? QDP_M_eq_r_times_M_plus_M( tempmom_qdp[mu], &eb3, tmpmat, force->force[mu], QDP_all);// HERE? QDP_M_eq_antiherm_M(force->force[mu], tempmom_qdp[mu], QDP_all);// HERE #ifdef CHKSUM QDP_m_eq_sum_M(&qcm, force->force[mu], QDP_all); QLA_C_eq_det_M(&det, &qcm); QLA_c_peq_c(chk, det); #endif } #ifdef CHKSUM QOP_printf0("chksum: %g %g\n", QLA_real(chk), QLA_imag(chk)); #endif //DESTROY various fields QDP_destroy_M(tmpmat); QDP_destroy_M(tmpmat1); QDP_destroy_M(tmpmat2); QDP_destroy_M(tmpmat3); QDP_destroy_M(staples); QDP_destroy_M(tmpmat4); for(mu=0; mu<4; mu++){ QDP_destroy_M(tempmom_qdp[mu]); } for(i=0; i<6; i++) { QDP_destroy_M(Amu[i]); } for(i=4; i<8; i++) { QDP_destroy_M(fblink[i]); } dtime += QOP_time(); double nflop = 96720; info->final_sec = dtime; info->final_flop = nflop*QDP_sites_on_node; info->status = QOP_SUCCESS; //QOP_printf0("Time in slow g_force: %e\n", info->final_sec); }
/* Smearing level 0 */ static void QOP_hisq_force_multi_smearing0_fnmat(QOP_info_t *info, REAL *residues, QDP_ColorVector *x[], int nterms, QDP_ColorMatrix *force_accum[4], QDP_ColorMatrix *force_accum_naik[4]) { int term; int i,k; int dir; REAL coeff; QDP_ColorMatrix *tmat; QDP_ColorMatrix *oprod_along_path[MAX_PATH_LENGTH+1]; QDP_ColorMatrix *mat_tmp0; QDP_ColorVector *tsrc[2], *vec_tmp[2]; size_t nflops = 0; if( nterms==0 )return; mat_tmp0 = QDP_create_M(); tmat = QDP_create_M(); tsrc[0] = QDP_create_V(); tsrc[1] = QDP_create_V(); vec_tmp[0] = QDP_create_V(); vec_tmp[1] = QDP_create_V(); for(i=0;i<=MAX_PATH_LENGTH;i++){ oprod_along_path[i] = QDP_create_M(); } // clear force accumulators for(dir=XUP;dir<=TUP;dir++) QDP_M_eq_zero(force_accum[dir], QDP_all); for(dir=XUP;dir<=TUP;dir++){ //AB loop on directions, path table is not needed k=0; // which vec_tmp we are using (0 or 1) QDP_V_eq_V(tsrc[k], x[0], QDP_all); QDP_V_eq_sV(vec_tmp[k], tsrc[k], fnshift(OPP_DIR(dir)), fndir(OPP_DIR(dir)), QDP_all); QDP_M_eq_zero(oprod_along_path[0], QDP_all); for(term=0;term<nterms;term++){ if(term<nterms-1) { QDP_V_eq_V(tsrc[1-k], x[term+1], QDP_all); QDP_V_eq_sV(vec_tmp[1-k], tsrc[1-k], fnshift(OPP_DIR(dir)), fndir(OPP_DIR(dir)), QDP_all); } //QDP_M_eq_V_times_Va(tmat, x[term], vec_tmp[k], QDP_all); QDP_M_eq_V_times_Va(tmat, tsrc[k], vec_tmp[k], QDP_all); nflops += 54; QDP_discard_V(vec_tmp[k]); QDP_M_peq_r_times_M(oprod_along_path[0], &residues[term], tmat, QDP_all); nflops += 36; k=1-k; // swap 0 and 1 } // end loop over terms in rational function expansion link_gather_connection_qdp(oprod_along_path[1], oprod_along_path[0], tmat, dir ); coeff = 1.; QDP_M_peq_r_times_M(force_accum[dir],&coeff,oprod_along_path[1],QDP_all); nflops += 36; } // end of loop on directions // // *** Naik part *** / // clear force accumulators for(dir=XUP;dir<=TUP;dir++) QDP_M_eq_zero(force_accum_naik[dir], QDP_all); for(dir=XUP;dir<=TUP;dir++){ //AB loop on directions, path table is not needed k=0; // which vec_tmp we are using (0 or 1) QDP_V_eq_V(tsrc[k], x[0], QDP_all); QDP_V_eq_sV(vec_tmp[k], tsrc[k], fnshift(OPP_3_DIR( DIR3(dir) )), fndir(OPP_3_DIR( DIR3(dir) )), QDP_all); QDP_M_eq_zero(oprod_along_path[0], QDP_all); for(term=0;term<nterms;term++){ if(term<nterms-1) { QDP_V_eq_V(tsrc[1-k], x[term+1], QDP_all); QDP_V_eq_sV(vec_tmp[1-k], tsrc[1-k], fnshift(OPP_3_DIR( DIR3(dir) )), fndir(OPP_3_DIR( DIR3(dir) )), QDP_all); } //QDP_M_eq_V_times_Va(tmat, x[term], vec_tmp[k], QDP_all); QDP_M_eq_V_times_Va(tmat, tsrc[k], vec_tmp[k], QDP_all); nflops += 54; QDP_discard_V(vec_tmp[k]); QDP_M_peq_r_times_M(oprod_along_path[0], &residues[term], tmat, QDP_all); nflops += 36; k=1-k; // swap 0 and 1 } // end loop over terms in rational function expansion link_gather_connection_qdp(oprod_along_path[1], oprod_along_path[0], tmat, DIR3(dir) ); coeff = 1; // fermion_eps is outside this routine in "wrapper" routine QDP_M_peq_r_times_M(force_accum_naik[dir],&coeff, oprod_along_path[1],QDP_all); nflops += 36; } // end of loop on directions QDP_destroy_V( tsrc[0] ); QDP_destroy_V( tsrc[1] ); QDP_destroy_V( vec_tmp[0] ); QDP_destroy_V( vec_tmp[1] ); QDP_destroy_M( mat_tmp0 ); QDP_destroy_M( tmat ); for(i=0;i<=MAX_PATH_LENGTH;i++){ QDP_destroy_M( oprod_along_path[i] ); } info->final_flop = ((double)nflops)*QDP_sites_on_node; return; } //hisq_force_multi_smearing0_fnmat
void QOP_hisq_force_multi_wrapper_fnmat(QOP_info_t *info, QOP_FermionLinksHisq *flh, QOP_Force *Force, QOP_hisq_coeffs_t *hisq_coeff, REAL *residues, QDP_ColorVector *x[], int *n_orders_naik) { double dtime = QDP_time(); int i, ipath, dir; REAL coeff_mult; double *eps_naik = hisq_coeff->eps_naik; int n_naiks = hisq_coeff->n_naiks; QOP_hisq_unitarize_method_t umethod = hisq_coeff->umethod; // Quark paths sorted by net displacement and last directions static Q_path *q_paths_sorted_1 = NULL; static Q_path *q_paths_sorted_2 = NULL; static Q_path *q_paths_sorted_3 = NULL; static int *netbackdir_table_1 = NULL; static int *netbackdir_table_2 = NULL; static int *netbackdir_table_3 = NULL; static int first_force = 1; if(first_force == 1) QOP_make_paths_and_dirs_hisq(hisq_coeff, umethod); int num_q_paths_1 = qop_get_num_q_paths_1(); int num_q_paths_2 = qop_get_num_q_paths_2(); int num_q_paths_3 = qop_get_num_q_paths_3(); Q_path *q_paths_1 = qop_get_q_paths_1(); Q_path *q_paths_2 = qop_get_q_paths_2(); Q_path *q_paths_3 = qop_get_q_paths_3(); Q_path *q_paths_sorted_current = NULL; int *netbackdir_table_current = NULL; int inaik; int n_naik_shift; double final_flop = 0.; size_t nflops = 0; QDP_ColorMatrix * force[4] = {Force->force[0], Force->force[1], Force->force[2], Force->force[3]}; int num_q_paths_current,n_orders_naik_current;//==nterms QDP_ColorMatrix *force_accum_0[4]; QDP_ColorMatrix *force_accum_0_naik[4]; QDP_ColorMatrix *force_accum_1[4]; QDP_ColorMatrix *force_accum_1u[4]; QDP_ColorMatrix *force_accum_2[4]; QDP_ColorMatrix *force_final[4]; QDP_ColorMatrix *Ugf[4], *Vgf[4], *Wgf[4]; int nterms = 0, n_order_naik_total; for(inaik = 0; inaik < n_naiks; inaik++) nterms += n_orders_naik[inaik]; n_order_naik_total = nterms; for(i=0;i<4;i++) { Ugf[i] = flh->U_links[i]; Vgf[i] = flh->V_links[i]; Wgf[i] = flh->W_unitlinks[i]; } QDP_ColorMatrix *tmat; QDP_ColorMatrix *mat_tmp0; REAL treal; if( first_force==1 ){ if( q_paths_sorted_1==NULL ) q_paths_sorted_1 = (Q_path *)malloc( num_q_paths_1*sizeof(Q_path) ); if(netbackdir_table_1==NULL ) netbackdir_table_1 = (int *)malloc( num_q_paths_1*sizeof(int) ); if( q_paths_sorted_2==NULL ) q_paths_sorted_2 = (Q_path *)malloc( num_q_paths_2*sizeof(Q_path) ); if(netbackdir_table_2==NULL ) netbackdir_table_2 = (int *)malloc( num_q_paths_2*sizeof(int) ); if( q_paths_sorted_3==NULL ) q_paths_sorted_3 = (Q_path *)malloc( num_q_paths_3*sizeof(Q_path) ); if(netbackdir_table_3==NULL ) netbackdir_table_3 = (int *)malloc( num_q_paths_3*sizeof(int) ); else{QOP_printf0("WARNING: remaking sorted path tables\n"); exit(0); } // make sorted tables sort_quark_paths_hisq( q_paths_1, q_paths_sorted_1, num_q_paths_1, 8 ); for( ipath=0; ipath<num_q_paths_1; ipath++ ) netbackdir_table_1[ipath] = find_backwards_gather( &(q_paths_sorted_1[ipath]) ); sort_quark_paths_hisq( q_paths_2, q_paths_sorted_2, num_q_paths_2, 16 ); for( ipath=0; ipath<num_q_paths_2; ipath++ ) netbackdir_table_2[ipath] = find_backwards_gather( &(q_paths_sorted_2[ipath]) ); sort_quark_paths_hisq( q_paths_3, q_paths_sorted_3, num_q_paths_3, 16 ); for( ipath=0; ipath<num_q_paths_3; ipath++ ) netbackdir_table_3[ipath] = find_backwards_gather( &(q_paths_sorted_3[ipath]) ); first_force=0; } tmat = QDP_create_M(); mat_tmp0 = QDP_create_M(); for(i=XUP;i<=TUP;i++){ force_accum_0[i] = QDP_create_M(); force_accum_0_naik[i] = QDP_create_M(); force_accum_1[i] = QDP_create_M(); force_accum_1u[i] = QDP_create_M(); force_accum_2[i] = QDP_create_M(); force_final[i] = QDP_create_M(); } for(dir=XUP;dir<=TUP;dir++) QDP_M_eq_zero(force_accum_2[dir], QDP_all); // loop on different naik masses n_naik_shift = 0; for( inaik=0; inaik<n_naiks; inaik++ ) { // smearing level 0 if( 0==inaik ) { n_orders_naik_current = n_order_naik_total; } else { n_orders_naik_current = n_orders_naik[inaik]; } QOP_hisq_force_multi_smearing0_fnmat(info,residues+n_naik_shift, x+n_naik_shift, n_orders_naik_current, force_accum_0, force_accum_0_naik); final_flop += info->final_flop; // smearing level 2 if( 0==inaik ) { q_paths_sorted_current = q_paths_sorted_2; num_q_paths_current = num_q_paths_2; netbackdir_table_current = netbackdir_table_2; } else { q_paths_sorted_current = q_paths_sorted_3; num_q_paths_current = num_q_paths_3; netbackdir_table_current = netbackdir_table_3; } QOP_hisq_force_multi_smearing_fnmat( info,Wgf,residues+n_naik_shift, x+n_naik_shift, n_orders_naik_current, force_accum_1, force_accum_0, force_accum_0_naik, num_q_paths_current, q_paths_sorted_current, netbackdir_table_current ); //QOP_printf0("HISQ smear0 flops = %g\n", info->final_flop); final_flop += info->final_flop; if( 0==inaik ) { coeff_mult = 1.0; } else { coeff_mult = eps_naik[inaik]; } for(dir=XUP;dir<=TUP;dir++) { QDP_M_peq_r_times_M(force_accum_2[dir],&coeff_mult, force_accum_1[dir],QDP_all); nflops += 36; } n_naik_shift += n_orders_naik[inaik]; } if ( umethod==QOP_UNITARIZE_NONE ){ // smearing level 1 QOP_hisq_force_multi_smearing_fnmat( info,Ugf,residues, x, nterms, force_accum_1, force_accum_2, NULL, num_q_paths_1, q_paths_sorted_1, netbackdir_table_1 ); final_flop += info->final_flop; } else if ( umethod==QOP_UNITARIZE_RATIONAL ){ // reunitarization QOP_hisq_force_multi_reunit(info,Vgf,force_accum_1u, force_accum_2); //QOP_printf0("reunit flops = %g\n", info->final_flop); final_flop += info->final_flop; // smearing level 1 QOP_hisq_force_multi_smearing_fnmat( info,Ugf,residues, x, nterms, force_accum_1, force_accum_1u, NULL, num_q_paths_1, q_paths_sorted_1, netbackdir_table_1 ); //QOP_printf0("HISQ smear1 flops = %g\n", info->final_flop); final_flop += info->final_flop; } else { QOP_printf0("Unknown or unsupported unitarization method\n"); exit(1); } // contraction with the link in question should be done here, // after contributions from all levels of smearing are taken into account for(dir=XUP;dir<=TUP;dir++){ QDP_M_eq_M_times_M(force_final[dir],Ugf[dir],force_accum_1[dir],QDP_all); nflops += 198; } // take into account even/odd parity (it is NOT done in "smearing" routine) //eps multiplication done outside QOP for(dir=XUP;dir<=TUP;dir++){ QDP_M_eq_M(tmat,force_final[dir],QDP_all); treal = 2.0; QDP_M_eq_r_times_M(force_final[dir],&treal,tmat,QDP_even); treal = -2.0; QDP_M_eq_r_times_M(force_final[dir],&treal,tmat,QDP_odd); nflops += 18; } // Put antihermitian traceless part into momentum // add force to momentum for(dir=XUP; dir<=TUP; dir++){ QDP_M_eq_antiherm_M(mat_tmp0, force_final[dir], QDP_all); QDP_M_peq_M(force[dir], mat_tmp0, QDP_all); nflops += 24+18; //QDP_M_peq_M(force_final[dir], force[dir], QDP_all); //QDP_M_eq_antiherm_M(force[dir], force_final[dir], QDP_all); } for(i=XUP;i<=TUP;i++){ QDP_destroy_M( force_accum_0[i] ); QDP_destroy_M( force_accum_0_naik[i] ); QDP_destroy_M( force_accum_1[i] ); QDP_destroy_M( force_accum_1u[i] ); QDP_destroy_M( force_accum_2[i] ); QDP_destroy_M( force_final[i] ); } QDP_destroy_M( tmat ); QDP_destroy_M( mat_tmp0 ); final_flop += ((double)nflops)*QDP_sites_on_node; info->final_sec = QDP_time() - dtime; info->final_flop = final_flop; info->status = QOP_SUCCESS; //QOP_printf0("HISQ force flops = %g\n", info->final_flop); } //hisq_force_multi_wrapper_fnmat
/* Smearing level i*/ static void QOP_hisq_force_multi_smearing_fnmat(QOP_info_t *info, QDP_ColorMatrix * gf[4], REAL *residues, QDP_ColorVector *x[], int nterms, QDP_ColorMatrix *force_accum[4], QDP_ColorMatrix *force_accum_old[4], QDP_ColorMatrix *force_accum_naik_old[4], int internal_num_q_paths, Q_path *internal_q_paths_sorted, int *internal_netbackdir_table) { int i,j,k,lastdir=-99,ipath,ilink; int length,dir,odir; REAL coeff; QDP_ColorMatrix *tmat; QDP_ColorMatrix *oprod_along_path[MAX_PATH_LENGTH+1]; QDP_ColorMatrix *mats_along_path[MAX_PATH_LENGTH+1]; QDP_ColorMatrix *mat_tmp0,*mat_tmp1, *stmp[8];; QDP_ColorVector *vec_tmp[2]; int netbackdir; size_t nflops = 0; // table of net path displacements (backwards from usual convention) Q_path *this_path; // pointer to current path /* Allocate fields */ for(i=0;i<=MAX_PATH_LENGTH;i++){ oprod_along_path[i] = QDP_create_M(); } for(i=1;i<=MAX_PATH_LENGTH;i++){ // 0 element is never used (it's unit matrix) mats_along_path[i] = QDP_create_M(); } mat_tmp0 = QDP_create_M(); mat_tmp1 = QDP_create_M(); for(i=0; i<8; i++) stmp[i] = QDP_create_M(); tmat = QDP_create_M(); vec_tmp[0] = QDP_create_V(); vec_tmp[1] = QDP_create_V(); // clear force accumulators for(dir=XUP;dir<=TUP;dir++) QDP_M_eq_zero(force_accum[dir], QDP_all); // loop over paths, and loop over links in path for( ipath=0; ipath<internal_num_q_paths; ipath++ ){ this_path = &(internal_q_paths_sorted[ipath]); if(this_path->forwback== -1)continue; // skip backwards dslash length = this_path->length; netbackdir = internal_netbackdir_table[ipath]; // move f(i-1) force from current site in positive direction, // this corresponds to outer product |X><Y| calculated at the endpoint of the path if( netbackdir<8) { // Not a Naik path link_gather_connection_qdp(oprod_along_path[0] , force_accum_old[OPP_DIR(netbackdir)], tmat, netbackdir ); } else { // Naik path if( NULL==force_accum_naik_old ) { QOP_printf0( "hisq_force_multi_smearing_fnmat: mismatch:\n" ); QOP_printf0( "force_accum_naik_old is NULL, but path table contains Naik paths(!)\n" ); exit(0); } // CONVERSION FROM 3-LINK DIRECTION TO 1-LINK DIRECTION link_gather_connection_qdp(oprod_along_path[0] , force_accum_naik_old[OPP_DIR(netbackdir-8)], tmat, netbackdir ); } // figure out how much of the outer products along the path must be // recomputed. j is last one needing recomputation. k is first one. j=length-1; // default is recompute all if( GOES_BACKWARDS(this_path->dir[0]) ) k=1; else k=0; for(ilink=j;ilink>=k;ilink--){ link_transport_connection_qdp( oprod_along_path[length-ilink], oprod_along_path[length-ilink-1], gf, mat_tmp0, stmp, this_path->dir[ilink] ); nflops += 198; } // maintain an array of transports "to this point" along the path. // Don't recompute beginning parts of path if same as last path ilink=0; // first link where new transport is needed // Sometimes we don't need the matrix for the last link if( GOES_FORWARDS(this_path->dir[length-1]) ) k=length-1; else k=length; for( ; ilink<k; ilink++ ){ if( ilink==0 ){ dir = this_path->dir[0]; if( GOES_FORWARDS(dir) ){ QDP_M_eq_sM(tmat, gf[dir], QDP_neighbor[dir], QDP_backward, QDP_all); QDP_M_eq_Ma(mats_along_path[1], tmat, QDP_all); QDP_discard_M(tmat); } else{ QDP_M_eq_M(mats_along_path[1], gf[OPP_DIR(dir)], QDP_all); } } else { // ilink != 0 dir = OPP_DIR(this_path->dir[ilink]); link_transport_connection_qdp( mats_along_path[ilink+1], mats_along_path[ilink], gf, mat_tmp0, stmp, dir ); nflops += 198; } } // end loop over links // A path has (length+1) points, counting the ends. At first // point, no "down" direction links have their momenta "at this // point". At last, no "up" ... if( GOES_FORWARDS(this_path->dir[length-1]) ) k=length-1; else k=length; for( ilink=0; ilink<=k; ilink++ ){ if(ilink<length)dir = this_path->dir[ilink]; else dir=NODIR; coeff = this_path->coeff; if( (ilink%2)==1 )coeff = -coeff; // add in contribution to the force if( ilink<length && GOES_FORWARDS(dir) ){ link_gather_connection_qdp(mat_tmp1, oprod_along_path[length-ilink-1], tmat, dir ); if(ilink==0) { QDP_M_eq_M(mat_tmp0,mat_tmp1,QDP_all); } else { QDP_M_eq_M_times_Ma(mat_tmp0, mats_along_path[ilink], mat_tmp1, QDP_all); nflops += 198; QDP_M_eq_Ma(mat_tmp1,mat_tmp0,QDP_all); } QDP_M_peq_r_times_M(force_accum[dir],&coeff,mat_tmp1,QDP_all); nflops += 36; } if( ilink>0 && GOES_BACKWARDS(lastdir) ){ odir = OPP_DIR(lastdir); if( ilink==1 ){ QDP_M_eq_M(mat_tmp0,oprod_along_path[length-ilink],QDP_all); QDP_M_eq_Ma(mat_tmp1,mat_tmp0,QDP_all); } else{ link_gather_connection_qdp(mat_tmp1, mats_along_path[ilink-1], tmat, odir ); QDP_M_eq_M_times_Ma(mat_tmp0, oprod_along_path[length-ilink], mat_tmp1, QDP_all); nflops += 198; QDP_M_eq_Ma(mat_tmp1, mat_tmp0, QDP_all); } QDP_M_peq_r_times_M(force_accum[odir],&coeff,mat_tmp1,QDP_all); nflops += 36; } lastdir = dir; } // end loop over links in path // } // end loop over paths // QDP_destroy_V( vec_tmp[0] ); QDP_destroy_V( vec_tmp[1] ); QDP_destroy_M( mat_tmp0 ); QDP_destroy_M( mat_tmp1 ); QDP_destroy_M( tmat ); for(i=0; i<8; i++) QDP_destroy_M(stmp[i]); for(i=0;i<=MAX_PATH_LENGTH;i++){ QDP_destroy_M( oprod_along_path[i] ); } for(i=1;i<=MAX_PATH_LENGTH;i++){ QDP_destroy_M( mats_along_path[i] ); } info->final_flop = ((double)nflops)*QDP_sites_on_node; return; }//hisq_force_multi_smearing_fnmat
void QOP_hisq_deriv_multi_fnmat2_qdp(QOP_info_t *info, QOP_FermionLinksHisq *flh, QDP_ColorMatrix *deriv[], QOP_hisq_coeffs_t *hisq_coeff, REAL *residues, QDP_ColorVector *x[], int *n_orders_naik) { #define NC QDP_get_nc(deriv[0]) if(!QOP_asqtad.inited) QOP_asqtad_invert_init(); double dtime = QDP_time(); double totalflops = 0; int siteflops = 0; QOP_info_t tinfo; QDP_ColorMatrix *Ugf[4], *Vgf[4], *Wgf[4]; for(int i=0; i<4; i++) { Ugf[i] = flh->U_links[i]; Vgf[i] = flh->V_links[i]; Wgf[i] = flh->W_unitlinks[i]; } QDP_ColorMatrix *force_accum_0[4]; QDP_ColorMatrix *force_accum_0_naik[4]; QDP_ColorMatrix *force_accum_1[4]; QDP_ColorMatrix *force_accum_1u[4]; QDP_ColorMatrix *force_accum_2[4]; QDP_ColorMatrix *force_final[4]; QDP_ColorMatrix *tmat = QDP_create_M(); for(int i=0; i<4; i++) { force_accum_0[i] = QDP_create_M(); force_accum_0_naik[i] = QDP_create_M(); force_accum_1[i] = QDP_create_M(); force_accum_1u[i] = QDP_create_M(); force_accum_2[i] = QDP_create_M(); force_final[i] = QDP_create_M(); QDP_M_eq_zero(force_accum_2[i], QDP_all); } int n_naiks = hisq_coeff->n_naiks; int nterms = 0; for(int inaik = 0; inaik < n_naiks; inaik++) nterms += n_orders_naik[inaik]; // loop on different naik masses int n_naik_shift = 0; for(int inaik=0; inaik<n_naiks; inaik++) { int n_orders_naik_current; if( inaik==0 ) { n_orders_naik_current = nterms; } else { n_orders_naik_current = n_orders_naik[inaik]; } QOP_get_mid(&tinfo, force_accum_0, QDP_neighbor, 4, residues+n_naik_shift, 1, x+n_naik_shift, n_orders_naik_current); totalflops += tinfo.final_flop; QOP_get_mid(&tinfo, force_accum_0_naik, QOP_common.neighbor3, 4, residues+n_naik_shift, 1, x+n_naik_shift, n_orders_naik_current); totalflops += tinfo.final_flop; // compensate for -1 on odd sites here instead of at end for(int dir=0; dir<4; dir++) { QDP_M_eqm_M(force_accum_0[dir], force_accum_0[dir], QDP_odd); QDP_M_eqm_M(force_accum_0_naik[dir], force_accum_0_naik[dir], QDP_odd); } // smearing level 0 for(int i=0; i<4; i++) QDP_M_eq_zero(force_accum_1[i], QDP_all); if(inaik==0) { QOP_asqtad_coeffs_t acoef; acoef.one_link = hisq_coeff->asqtad_one_link; acoef.three_staple = hisq_coeff->asqtad_three_staple; acoef.five_staple = hisq_coeff->asqtad_five_staple; acoef.seven_staple = hisq_coeff->asqtad_seven_staple; acoef.lepage = hisq_coeff->asqtad_lepage; acoef.naik = hisq_coeff->asqtad_naik; QOP_asqtad_deriv(&tinfo, Wgf, force_accum_1, &acoef, force_accum_0, force_accum_0_naik); //QOP_printf0("HISQ smear0 flops = %g\n", tinfo.final_flop); totalflops += tinfo.final_flop; } else { QOP_asqtad_coeffs_t acoef; acoef.one_link = hisq_coeff->difference_one_link; acoef.three_staple = 0; acoef.five_staple = 0; acoef.seven_staple = 0; acoef.lepage = 0; acoef.naik = hisq_coeff->difference_naik; QOP_asqtad_deriv(&tinfo, Wgf, force_accum_1, &acoef, force_accum_0, force_accum_0_naik); totalflops += tinfo.final_flop; } QLA_Real coeff_mult; if( inaik==0 ) { coeff_mult = 1.0; } else { coeff_mult = hisq_coeff->eps_naik[inaik]; } for(int dir=0; dir<4; dir++) { QDP_M_peq_r_times_M(force_accum_2[dir], &coeff_mult, force_accum_1[dir], QDP_all); } siteflops += 4*36; n_naik_shift += n_orders_naik[inaik]; } // smearing level 1 QOP_asqtad_coeffs_t acoef; acoef.one_link = hisq_coeff->fat7_one_link; acoef.three_staple = hisq_coeff->fat7_three_staple; acoef.five_staple = hisq_coeff->fat7_five_staple; acoef.seven_staple = hisq_coeff->fat7_seven_staple; acoef.lepage = 0; acoef.naik = 0; if(QOP_hisq_links.use_fat7_lepage) { acoef.lepage = hisq_coeff->fat7_lepage; } QOP_hisq_unitarize_method_t umethod = hisq_coeff->umethod; if ( umethod==QOP_UNITARIZE_NONE ){ for(int dir=0; dir<4; dir++) QDP_M_eq_zero(force_accum_1[dir], QDP_all); QOP_asqtad_deriv(&tinfo, Ugf, force_accum_1, &acoef, force_accum_2, NULL); totalflops += tinfo.final_flop; } else if ( umethod==QOP_UNITARIZE_RATIONAL ) { for(int mu=0; mu<4; mu++) QDP_M_eq_Ma(force_accum_1u[mu], force_accum_2[mu], QDP_all); // reunitarization #if QOP_Colors == 3 QOP_hisq_force_multi_reunit(&tinfo, Vgf, force_accum_2, force_accum_1u); #else for(int mu=0; mu<4; mu++) { QOP_projectU_deriv_qdp(&tinfo, force_accum_2[mu], Wgf[mu], Vgf[mu], force_accum_1u[mu]); } #endif //QOP_printf0("reunit flops = %g\n", tinfo.final_flop); for(int mu=0; mu<4; mu++) QDP_M_eq_Ma(force_accum_1u[mu], force_accum_2[mu], QDP_all); totalflops += tinfo.final_flop; for(int dir=0; dir<4; dir++) QDP_M_eq_zero(force_accum_1[dir], QDP_all); QOP_asqtad_deriv(&tinfo, Ugf, force_accum_1, &acoef, force_accum_1u, NULL); //QOP_printf0("HISQ smear1 flops = %g\n", tinfo.final_flop); totalflops += tinfo.final_flop; } else { QOP_printf0("Unknown or unsupported unitarization method\n"); exit(1); } // take into account even/odd parity (it is NOT done in "smearing" routine) // eps multiplication done outside QOP // extra factor of 2 for(int dir=0; dir<4; dir++) { QLA_Real treal = 2; //QDP_M_peq_r_times_M(deriv[dir], &treal, force_accum_1[dir], QDP_even); //QDP_M_meq_r_times_M(deriv[dir], &treal, force_accum_1[dir], QDP_odd); QDP_M_peq_r_times_M(deriv[dir], &treal, force_accum_1[dir], QDP_all); } siteflops += 4*36; for(int i=0; i<4; i++) { QDP_destroy_M( force_accum_0[i] ); QDP_destroy_M( force_accum_0_naik[i] ); QDP_destroy_M( force_accum_1[i] ); QDP_destroy_M( force_accum_1u[i] ); QDP_destroy_M( force_accum_2[i] ); QDP_destroy_M( force_final[i] ); } QDP_destroy_M( tmat ); totalflops += ((double)siteflops)*QDP_sites_on_node; info->final_sec = QDP_time() - dtime; info->final_flop = totalflops; info->status = QOP_SUCCESS; #undef NC }
void QOP_symanzik_1loop_gauge_heatbath_qdp(QOP_info_t *info, QDP_ColorMatrix *links[], QLA_Real beta, QOP_gauge_coeffs_t *coeffs, QDP_RandomState *rs0, int nup, int nhb, int nover) { #define NC QDP_get_nc(links[0]) double dtime = QOP_time(); double nflops = 0; if(coeffs->adjoint_plaquette) { QOP_error("%s: adj plaq not supported\n", __func__); } fac = beta/QLA_Nc; int imp = (coeffs->rectangle!=0)||(coeffs->parallelogram!=0); QDP_Lattice *lat = QDP_get_lattice_M(links[0]); int nd = QDP_ndim_L(lat); QDP_Subset *cbs=QDP_even_and_odd_L(lat); int ncb = 2; if(imp) { ncb = 32; cbs = QOP_get_sub32(lat); } QDP_ColorMatrix *staple = QDP_create_M_L(lat); QDP_ColorMatrix *v = QDP_create_M_L(lat); QDP_ColorMatrix *tmp = QDP_create_M_L(lat); rs = QDP_expose_S(rs0); for(int up=0; up<nup; up++) { for(int hb=0; hb<nhb; hb++) { for(int cb=0; cb<ncb; cb++) { QDP_Subset subset = cbs[cb]; for(int mu=0; mu<nd; mu++) { QDP_M_eq_zero(staple, subset); QOP_symanzik_1loop_gauge_staple_qdp(info, links, staple, mu, coeffs, cbs, cb); QDP_M_eq_M_times_Ma(v, links[mu], staple, subset); QDP_M_eq_funcit(v, hb_func, subset); QDP_M_eq_M_times_M(tmp, v, links[mu], subset); QDP_M_eq_M(links[mu], tmp, subset); } } } for(int over=0; over<nover; over++) { for(int cb=0; cb<ncb; cb++) { QDP_Subset subset = cbs[cb]; for(int mu=0; mu<nd; mu++) { QDP_M_eq_zero(staple, subset); QOP_symanzik_1loop_gauge_staple_qdp(info, links, staple, mu, coeffs, cbs, cb); QDP_M_eq_M_times_Ma(v, links[mu], staple, subset); QDP_M_eq_funcit(v, over_func, subset); QDP_M_eq_M_times_M(tmp, v, links[mu], subset); QDP_M_eq_M(links[mu], tmp, subset); } } } } QDP_reset_S(rs0); QDP_destroy_M(tmp); QDP_destroy_M(v); QDP_destroy_M(staple); info->final_sec = QOP_time() - dtime; info->final_flop = nflops*QDP_sites_on_node; info->status = QOP_SUCCESS; #undef NC }
void start(void) { double mf, best_mf; QLA_Real plaq; QDP_ColorMatrix **u; int i, bs, bsi, best_bs; u = (QDP_ColorMatrix **) malloc(ndim*sizeof(QDP_ColorMatrix *)); for(i=0; i<ndim; i++) u[i] = QDP_create_M(); get_random_links(u, ndim, 0.3); plaq = get_plaq(u); if(QDP_this_node==0) printf("plaquette = %g\n", plaq); QOP_layout_t qoplayout = QOP_LAYOUT_ZERO; qoplayout.latdim = ndim; qoplayout.latsize = (int *) malloc(ndim*sizeof(int)); for(i=0; i<ndim; i++) { qoplayout.latsize[i] = lattice_size[i]; } qoplayout.machdim = -1; if(QDP_this_node==0) { printf("begin init\n"); fflush(stdout); } QOP_init(&qoplayout); gauge = QOP_create_G_from_qdp(u); QOP_Force *force; QDP_ColorMatrix *cm[4]; for(i=0; i<4; i++) { cm[i] = QDP_create_M(); QDP_M_eq_zero(cm[i], QDP_all); } QOP_gauge_coeffs_t gcoeffs = QOP_GAUGE_COEFFS_ZERO; gcoeffs.plaquette = 0.2; gcoeffs.rectangle = 0.2; gcoeffs.parallelogram = 0.2; gcoeffs.adjoint_plaquette = 0.2; force = QOP_create_F_from_qdp(cm); mf = bench_action(&gcoeffs, force); QOP_destroy_F(force); printf0("action: sec%7.4f mflops = %g\n", secs, mf); if(QDP_this_node==0) { printf("begin force\n"); fflush(stdout); } best_mf = 0; best_bs = bsa[0]; for(bsi=0; bsi<bsn; bsi++) { bs = bsa[bsi]; QDP_set_block_size(bs); force = QOP_create_F_from_qdp(cm); mf = bench_force(&gcoeffs, force); QOP_destroy_F(force); printf0("GF: bs%5i sec%7.4f mflops = %g\n", bs, secs, mf); if(mf>best_mf) { best_mf = mf; best_bs = bs; } } QDP_set_block_size(best_bs); QDP_profcontrol(1); force = QOP_create_F_from_qdp(cm); mf = bench_force(&gcoeffs, force); QDP_profcontrol(0); printf0("prof: GF: bs%5i sec%7.4f mflops = %g\n", best_bs, secs, mf); printf0("best: GF: bs%5i mflops = %g\n", best_bs, best_mf); if(QDP_this_node==0) { printf("begin unload links\n"); fflush(stdout); } //QOP_asqtad_invert_unload_links(); if(QDP_this_node==0) { printf("begin finalize\n"); fflush(stdout); } QOP_finalize(); }