static int bicgilu_cl_qop_generic( QOP_FermionLinksWilson *qop_links, QOP_invert_arg_t *qop_invert_arg, QOP_resid_arg_t ***qop_resid_arg, MYREAL *kappas[], int nkappa[], QOP_DiracFermion **qop_sol[], QOP_DiracFermion *qop_src[], int nsrc, int *final_restart, Real *final_rsq_ptr ) { int isrc, ikappa; int iters; QOP_info_t info; if(nsrc == 1 && nkappa[0] == 1) QOP_wilson_invert( &info, qop_links, qop_invert_arg, qop_resid_arg[0][0], kappas[0][0], qop_sol[0][0], qop_src[0] ); else QOP_wilson_invert_multi( &info, qop_links, qop_invert_arg, qop_resid_arg, kappas, nkappa, qop_sol, qop_src, nsrc ); /* For now we return the largest value and total iterations */ *final_rsq_ptr = 0; *final_restart = 0; iters = 0; for(isrc = 0; isrc < nsrc; isrc++) for(ikappa = 0; ikappa < nkappa[isrc]; ikappa++){ if(*final_rsq_ptr < qop_resid_arg[isrc][ikappa]->final_rsq) *final_rsq_ptr = qop_resid_arg[isrc][ikappa]->final_rsq; if(*final_restart < qop_resid_arg[isrc][ikappa]->final_restart) *final_restart = qop_resid_arg[isrc][ikappa]->final_restart; iters += qop_resid_arg[isrc][ikappa]->final_iter; #ifdef CG_DEBUG if(nsrc > 1 || nkappa[isrc] > 1) node0_printf("CONGRAD5(src %d,kappa %d): iters = %d resid = %e\n", isrc, ikappa, qop_resid_arg[isrc][ikappa]->final_iter, qop_resid_arg[isrc][ikappa]->final_rsq); #endif } #ifdef CGTIME node0_printf("CGTIME: time = %e (wilson_qop %s) ", info.final_sec,qop_prec[QOP_Precision-1]); for(isrc = 0; isrc < nsrc; isrc++) node0_printf("nkappa[%d] = %d iters = %d ", isrc,nkappa[isrc],qop_resid_arg[isrc][0]->final_iter); node0_printf("mflops = %e\n", info.final_flop/(1.0e6*info.final_sec) ); fflush(stdout); #endif return iters; }
double bench_inv(QOP_info_t *info, QOP_invert_arg_t *inv_arg, QOP_resid_arg_t *res_arg, QDP_DiracFermion *out, QDP_DiracFermion *in) { static QLA_Real r2s=-1, r2; double sec=0, flop=0, mf=0; int i, iter=0; QOP_DiracFermion *qopout, *qopin; QDP_D_eq_zero(out, QDP_all); qopout = QOP_create_D_from_qdp(out); qopin = QOP_create_D_from_qdp(in); for(i=0; i<=nit; i++) { QMP_barrier(); QOP_wilson_invert(info, flw, inv_arg, res_arg, kappa, qopout, qopin); QMP_barrier(); printf("%i\t%i\t%g\t%i\n", i, res_arg->final_iter, info->final_sec, (int)info->final_flop); if(i>0) { iter += res_arg->final_iter; sec += info->final_sec; flop += info->final_flop; //mf += info->final_flop/(1e6*info->final_sec); } } QOP_destroy_D(qopout); QOP_destroy_D(qopin); QDP_r_eq_norm2_D(&r2, out, QDP_even); if(r2s<0) r2s = r2; if(fabs(1-r2/r2s)>1e-3) { printf0("first norm = %g this norn = %g\n", r2s, r2); } mf = 1; QMP_sum_double(&mf); QMP_sum_double(&sec); QMP_sum_double(&flop); res_arg->final_iter = iter/nit; info->final_sec = sec/(mf*nit); info->final_flop = flop/(mf*nit); mf = info->final_flop/(1e6*info->final_sec); return mf; }