Пример #1
0
int
congrad_w(int niter, Real rsqmin, Real *final_rsq_ptr) 
{
  int i;
  int iteration;	/* counter for iterations */
  double source_norm;
  double rsqstop;
  QLA_Real a, b;
  double rsq,oldrsq,pkp;	/* Sugar's a,b,resid**2,previous resid*2 */
				/* pkp = cg_p.K.cg_p */
  QLA_Real mkappa;
  QLA_Real sum;
#ifdef CGTIME
  double dtime;
#endif
#ifdef LU
  mkappa = -kappa*kappa;
#else
  mkappa = -kappa;
#endif

  setup_cg();

  for(i=0; i<4; i++) {
    set_M_from_site(gaugelink[i], F_OFFSET(link[i]),EVENANDODD);
  }
  set_D_from_site(psi, F_OFFSET(psi),EVENANDODD);
  set_D_from_site(chi, F_OFFSET(chi),EVENANDODD);

#ifdef PRESHIFT_LINKS
  {
    QDP_ColorMatrix *tcm;
    tcm = QDP_create_M();
    for(i=0; i<4; i++) {
      QDP_M_eq_sM(tcm, gaugelink[i], QDP_neighbor[i], QDP_backward, QDP_all);
      QDP_M_eq_Ma(gaugelink[i+4], tcm, QDP_all);
    }
    QDP_destroy_M(tcm);
  }
#endif

#ifdef CGTIME
  dtime = -dclock();
#endif

  iteration=0;
 start:
  /* mp <-  M_adjoint*M*psi
     r,p <- chi - mp
     rsq = |r|^2
     source_norm = |chi|^2
  */
  rsq = source_norm = 0.0;

#ifdef LU

  QDP_D_eq_D(cgp, psi, QDP_even);
  dslash_special_qdp(tt1, cgp, 1, QDP_odd, temp1);
  dslash_special_qdp(ttt, tt1, 1, QDP_even, temp2);
  QDP_D_eq_r_times_D_plus_D(ttt, &mkappa, ttt, cgp, QDP_even);

  dslash_special_qdp(tt2, ttt, -1, QDP_odd, temp3);
  dslash_special_qdp(mp, tt2, -1, QDP_even, temp4);
  QDP_D_eq_r_times_D_plus_D(mp, &mkappa, mp, ttt, QDP_even);
  QDP_D_eq_D_minus_D(cgr, chi, mp, QDP_even);
  QDP_D_eq_D(cgp, cgr, QDP_even);

  QDP_r_eq_norm2_D(&sum, chi, QDP_even);
  source_norm = sum;
  QDP_r_eq_norm2_D(&sum, cgr, QDP_even);
  rsq = sum;

#else

  QDP_D_eq_D(cgp, psi, QDP_even);
  dslash_special_qdp(ttt, cgp, 1, QDP_all, temp1);
  QDP_D_eq_r_times_D_plus_D(ttt, &mkappa, ttt, cgp, QDP_all);

  dslash_special_qdp(mp, ttt, -1, QDP_all, temp1);
  QDP_D_eq_r_times_D_plus_D(mp, &mkappa, mp, ttt, QDP_all);

  QDP_D_eq_D_minus_D(cgr, chi, mp, QDP_all);
  QDP_D_eq_D(cgp, cgr, QDP_all);

  QDP_r_eq_norm2_D(&sum, chi, QDP_all);
  source_norm = sum;
  QDP_r_eq_norm2_D(&sum, cgr, QDP_all);
  rsq = sum;

#endif

  iteration++ ;	/* iteration counts number of multiplications
		   by M_adjoint*M */
  total_iters++;
  /**if(this_node==0)printf("congrad2: source_norm = %e\n",source_norm);
     if(this_node==0)printf("congrad2: iter %d, rsq %e, pkp %e, a %e\n",
     iteration,(double)rsq,(double)pkp,(double)a );**/
  rsqstop = rsqmin * source_norm;
  if( rsq <= rsqstop ){
    *final_rsq_ptr= (Real)rsq;
    return (iteration);
  }

  /* main loop - do until convergence or time to restart */
  /* 
     oldrsq <- rsq
     mp <- M_adjoint*M*p
     pkp <- p.M_adjoint*M.p
     a <- rsq/pkp
     psi <- psi + a*p
     r <- r - a*mp
     rsq <- |r|^2
     b <- rsq/oldrsq
     p <- r + b*p
  */
  do {
    oldrsq = rsq;
#ifdef LU
    dslash_special_qdp(tt1, cgp, 1, QDP_odd, temp1);
    dslash_special_qdp(ttt, tt1, 1, QDP_even, temp2);
    QDP_D_eq_r_times_D_plus_D(ttt, &mkappa, ttt, cgp, QDP_even);

    dslash_special_qdp(tt2, ttt, -1, QDP_odd, temp3);
    dslash_special_qdp(mp, tt2, -1, QDP_even, temp4);
    QDP_D_eq_r_times_D_plus_D(mp, &mkappa, mp, ttt, QDP_even);

    QDP_r_eq_re_D_dot_D(&sum, cgp, mp, QDP_even);
    pkp = sum;
#else
    dslash_special_qdp(ttt, cgp, 1, QDP_all, temp1);
    QDP_D_eq_r_times_D_plus_D(ttt, &mkappa, ttt, cgp, QDP_all);

    dslash_special_qdp(mp, ttt, -1, QDP_all, temp1);
    QDP_D_eq_r_times_D_plus_D(mp, &mkappa, mp, ttt, QDP_all);

    QDP_r_eq_re_D_dot_D(&sum, cgp, mp, QDP_all);
    pkp = sum;
#endif
    iteration++;
    total_iters++;

    a = rsq / pkp;
    QDP_D_peq_r_times_D(psi, &a, cgp, MYSUBSET);
    QDP_D_meq_r_times_D(cgr, &a, mp, MYSUBSET);
    QDP_r_eq_norm2_D(&sum, cgr, MYSUBSET);
    rsq = sum;

    /**if(this_node==0)printf("congrad2: iter %d, rsq %e, pkp %e, a %e\n",
       iteration,(double)rsq,(double)pkp,(double)a );**/
    if( rsq <= rsqstop ){
      *final_rsq_ptr= (Real)rsq;
#ifdef CGTIME
      dtime += dclock();
      if(this_node==0)
	printf("CONGRAD2: time = %.2e size_r= %.2e iters= %d MF = %.1f\n",
	       dtime,rsq,iteration,
	       (double)6480*iteration*even_sites_on_node/(dtime*1e6));
      //(double)5616*iteration*even_sites_on_node/(dtime*1e6));
#endif
      set_site_from_D(F_OFFSET(psi), psi,EVENANDODD);
      return (iteration);
    }

    b = rsq / oldrsq;
    QDP_D_eq_r_times_D_plus_D(cgp, &b, cgp, cgr, MYSUBSET);

  } while( iteration%niter != 0);

  set_site_from_D(F_OFFSET(psi), psi,EVENANDODD);

  if( iteration < 3*niter ) goto start;
  *final_rsq_ptr= (Real)rsq;
  return(iteration);
}
void MultivariateFNormalSufficient::set_use_cg(bool use, double tol)
 {
     use_cg_=use;
     cg_tol_ = std::abs(tol);
     if (use) setup_cg();
 }