int congrad_w(int niter, Real rsqmin, Real *final_rsq_ptr) { int i; int iteration; /* counter for iterations */ double source_norm; double rsqstop; QLA_Real a, b; double rsq,oldrsq,pkp; /* Sugar's a,b,resid**2,previous resid*2 */ /* pkp = cg_p.K.cg_p */ QLA_Real mkappa; QLA_Real sum; #ifdef CGTIME double dtime; #endif #ifdef LU mkappa = -kappa*kappa; #else mkappa = -kappa; #endif setup_cg(); for(i=0; i<4; i++) { set_M_from_site(gaugelink[i], F_OFFSET(link[i]),EVENANDODD); } set_D_from_site(psi, F_OFFSET(psi),EVENANDODD); set_D_from_site(chi, F_OFFSET(chi),EVENANDODD); #ifdef PRESHIFT_LINKS { QDP_ColorMatrix *tcm; tcm = QDP_create_M(); for(i=0; i<4; i++) { QDP_M_eq_sM(tcm, gaugelink[i], QDP_neighbor[i], QDP_backward, QDP_all); QDP_M_eq_Ma(gaugelink[i+4], tcm, QDP_all); } QDP_destroy_M(tcm); } #endif #ifdef CGTIME dtime = -dclock(); #endif iteration=0; start: /* mp <- M_adjoint*M*psi r,p <- chi - mp rsq = |r|^2 source_norm = |chi|^2 */ rsq = source_norm = 0.0; #ifdef LU QDP_D_eq_D(cgp, psi, QDP_even); dslash_special_qdp(tt1, cgp, 1, QDP_odd, temp1); dslash_special_qdp(ttt, tt1, 1, QDP_even, temp2); QDP_D_eq_r_times_D_plus_D(ttt, &mkappa, ttt, cgp, QDP_even); dslash_special_qdp(tt2, ttt, -1, QDP_odd, temp3); dslash_special_qdp(mp, tt2, -1, QDP_even, temp4); QDP_D_eq_r_times_D_plus_D(mp, &mkappa, mp, ttt, QDP_even); QDP_D_eq_D_minus_D(cgr, chi, mp, QDP_even); QDP_D_eq_D(cgp, cgr, QDP_even); QDP_r_eq_norm2_D(&sum, chi, QDP_even); source_norm = sum; QDP_r_eq_norm2_D(&sum, cgr, QDP_even); rsq = sum; #else QDP_D_eq_D(cgp, psi, QDP_even); dslash_special_qdp(ttt, cgp, 1, QDP_all, temp1); QDP_D_eq_r_times_D_plus_D(ttt, &mkappa, ttt, cgp, QDP_all); dslash_special_qdp(mp, ttt, -1, QDP_all, temp1); QDP_D_eq_r_times_D_plus_D(mp, &mkappa, mp, ttt, QDP_all); QDP_D_eq_D_minus_D(cgr, chi, mp, QDP_all); QDP_D_eq_D(cgp, cgr, QDP_all); QDP_r_eq_norm2_D(&sum, chi, QDP_all); source_norm = sum; QDP_r_eq_norm2_D(&sum, cgr, QDP_all); rsq = sum; #endif iteration++ ; /* iteration counts number of multiplications by M_adjoint*M */ total_iters++; /**if(this_node==0)printf("congrad2: source_norm = %e\n",source_norm); if(this_node==0)printf("congrad2: iter %d, rsq %e, pkp %e, a %e\n", iteration,(double)rsq,(double)pkp,(double)a );**/ rsqstop = rsqmin * source_norm; if( rsq <= rsqstop ){ *final_rsq_ptr= (Real)rsq; return (iteration); } /* main loop - do until convergence or time to restart */ /* oldrsq <- rsq mp <- M_adjoint*M*p pkp <- p.M_adjoint*M.p a <- rsq/pkp psi <- psi + a*p r <- r - a*mp rsq <- |r|^2 b <- rsq/oldrsq p <- r + b*p */ do { oldrsq = rsq; #ifdef LU dslash_special_qdp(tt1, cgp, 1, QDP_odd, temp1); dslash_special_qdp(ttt, tt1, 1, QDP_even, temp2); QDP_D_eq_r_times_D_plus_D(ttt, &mkappa, ttt, cgp, QDP_even); dslash_special_qdp(tt2, ttt, -1, QDP_odd, temp3); dslash_special_qdp(mp, tt2, -1, QDP_even, temp4); QDP_D_eq_r_times_D_plus_D(mp, &mkappa, mp, ttt, QDP_even); QDP_r_eq_re_D_dot_D(&sum, cgp, mp, QDP_even); pkp = sum; #else dslash_special_qdp(ttt, cgp, 1, QDP_all, temp1); QDP_D_eq_r_times_D_plus_D(ttt, &mkappa, ttt, cgp, QDP_all); dslash_special_qdp(mp, ttt, -1, QDP_all, temp1); QDP_D_eq_r_times_D_plus_D(mp, &mkappa, mp, ttt, QDP_all); QDP_r_eq_re_D_dot_D(&sum, cgp, mp, QDP_all); pkp = sum; #endif iteration++; total_iters++; a = rsq / pkp; QDP_D_peq_r_times_D(psi, &a, cgp, MYSUBSET); QDP_D_meq_r_times_D(cgr, &a, mp, MYSUBSET); QDP_r_eq_norm2_D(&sum, cgr, MYSUBSET); rsq = sum; /**if(this_node==0)printf("congrad2: iter %d, rsq %e, pkp %e, a %e\n", iteration,(double)rsq,(double)pkp,(double)a );**/ if( rsq <= rsqstop ){ *final_rsq_ptr= (Real)rsq; #ifdef CGTIME dtime += dclock(); if(this_node==0) printf("CONGRAD2: time = %.2e size_r= %.2e iters= %d MF = %.1f\n", dtime,rsq,iteration, (double)6480*iteration*even_sites_on_node/(dtime*1e6)); //(double)5616*iteration*even_sites_on_node/(dtime*1e6)); #endif set_site_from_D(F_OFFSET(psi), psi,EVENANDODD); return (iteration); } b = rsq / oldrsq; QDP_D_eq_r_times_D_plus_D(cgp, &b, cgp, cgr, MYSUBSET); } while( iteration%niter != 0); set_site_from_D(F_OFFSET(psi), psi,EVENANDODD); if( iteration < 3*niter ) goto start; *final_rsq_ptr= (Real)rsq; return(iteration); }
void MultivariateFNormalSufficient::set_use_cg(bool use, double tol) { use_cg_=use; cg_tol_ = std::abs(tol); if (use) setup_cg(); }