Beispiel #1
0
double stupid_fermion_force_x(const int i) {
	double dA = 0.001;
	double s0 = 0.0, s1 = 0.0;
	
	Ax[i] = Ax[i] - dA;
	calculatelinkvars();

	g_cgiterations1 += cg(g_temp2, g_fermion, ITER_MAX, DELTACG, &fermion_sqr);
	s0 = scalar_prod_r(g_fermion, g_temp2);

	Ax[i] = Ax[i] + 2*dA;
	calculatelinkvars();

	g_cgiterations1 += cg(g_temp2, g_fermion, ITER_MAX, DELTACG, &fermion_sqr);
	s1 = scalar_prod_r(g_fermion, g_temp2);

	Ax[i] = Ax[i] - dA;
	calculatelinkvars();
	return (s1 - s0)/(2.0*dA);
}
Beispiel #2
0
double rat_acc(const int id, hamiltonian_field_t * const hf) {
  solver_pm_t solver_pm;
  monomial * mnl = &monomial_list[id];
  double atime, etime, dummy;
  atime = gettime();
  // only for non-twisted operators
  g_mu = 0.;
  g_mu3 = 0.;
  boundary(mnl->kappa);
  if(mnl->type == CLOVERRAT) {
    g_c_sw = mnl->c_sw;
    sw_term((const su3**) hf->gaugefield, mnl->kappa, mnl->c_sw); 
    sw_invert(EE, 0.);
  }
  mnl->energy1 = 0.;

  solver_pm.max_iter = mnl->maxiter;
  solver_pm.squared_solver_prec = mnl->accprec;
  solver_pm.no_shifts = mnl->rat.np;
  solver_pm.shifts = mnl->rat.mu;
  solver_pm.type = CGMMS;
  solver_pm.M_psi = mnl->Qsq;
  solver_pm.sdim = VOLUME/2;
  solver_pm.rel_prec = g_relative_precision_flag;
  mnl->iter0 += cg_mms_tm(g_chi_up_spinor_field, mnl->pf,
			  &solver_pm, &dummy);

  // apply R to the pseudo-fermion fields
  assign(mnl->w_fields[0], mnl->pf, VOLUME/2);
  for(int j = (mnl->rat.np-1); j > -1; j--) {
    assign_add_mul_r(mnl->w_fields[0], g_chi_up_spinor_field[j], 
		     mnl->rat.rmu[j], VOLUME/2);
  }

  mnl->energy1 = scalar_prod_r(mnl->pf, mnl->w_fields[0], VOLUME/2, 1);
  etime = gettime();
  if(g_proc_id == 0) {
    if(g_debug_level > 1) {
      printf("# Time for %s monomial acc step: %e s\n", mnl->name, etime-atime);
    }
    if(g_debug_level > 0) { // shoud be 3
      printf("called rat_acc for id %d dH = %1.10e\n", id, mnl->energy1 - mnl->energy0);
    }
  }
  return(mnl->energy1 - mnl->energy0);
}
Beispiel #3
0
int update() //Basic HMC update step
{
  double squnrm;
  int i, acc;
  double exphdiff;
  
  /* the new impulses and the 'generator' of the arbitrary pseudofield */
  /* calculate the hamiltonian of this state: new impulses + action */
  /* g_X is ab-used a bit - here it is \xi = (gamma5 D)^{-1} \phi */
  
  ham_old = s_g_old;
  for(i=0; i<GRIDPOINTS; i++) {
    gp1[i] = gauss();
    gp2[i] = gauss();
    ham_old += 0.5*(gp1[i]*gp1[i] + gp2[i]*gp2[i]);
  }
  
  /* Now create the field and calculate its contributions to the action (end of the 'misuse') */
  /* squnrm is the fermion part of the action : */
  /*   S = R^dagger * R  =  g_fermion^dag * D^{-1 dag} * D^{-1} * g_fermion = g_fermion Q^-1 g_fermion */

  /* PF1 det(1/(Q^2 + mu^2)) */
  for(i=0; i<GRIDPOINTS; i++) {
    g_X[i].s1 = (gauss() + I*gauss())/sqrt(2); //Gaussian fields R
    g_X[i].s2 = (gauss() + I*gauss())/sqrt(2);
  }
  squnrm = square_norm(g_X);
  
  // step iv): g_fermion = \phi = K^dag * g_X = K^dag * \xi
  gam5D_wilson(g_fermion, g_X);
  assign_diff_mul(g_fermion, g_X, 0.+I*sqrt(g_musqr));
  ham_old += squnrm;

  /* PF2 det((Q^2 + mu^2)/Q^2) */
  if(no_timescales > 2) {
    for(i=0; i<GRIDPOINTS; i++) {
      g_X[i].s1 = (gauss() + I*gauss())/sqrt(2); //Gaussian fields R
      g_X[i].s2 = (gauss() + I*gauss())/sqrt(2);
    }
    squnrm = square_norm(g_X);

    cg(g_fermion2, g_X, ITER_MAX, DELTACG, &gam5D_SQR_musqr_wilson);    
    gam5D_wilson(g_gam5DX, g_fermion2);
    assign_add_mul(g_gam5DX, g_fermion2, 0.+I*sqrt(g_musqr));
    gam5D_wilson(g_fermion2, g_gam5DX);
    ham_old += squnrm;
  }
  // Add the part for the fermion fields

  // Do the molecular dynamic chain
  /* the simple LF scheme */

  /* the second order minimal norm multi-timescale integrator*/
  /* MN2_integrator(g_steps, 2, g_steps*g_stepsize, 0.2); */

  /* This is the recursive implementation */
  /* in can be found in rec_lf_integrator.c|h */
  if (no_timescales == 1)
    leapfrog(n_steps[0], tau/n_steps[0]);
  else
    integrate_leap_frog(tau/n_steps[no_timescales-1], no_timescales-1, no_timescales, n_steps, 1, up_momenta);
  
  // Calculate the new action and hamiltonian
  ham = 0;
  s_g = 0;
  for (i=0; i<GRIDPOINTS; i++) {
    s_g += S_G(i);
    ham += 0.5*(gp1[i]*gp1[i] + gp2[i]*gp2[i]);
  }
  /* Sum_ij [(g_fermion^*)_i (Q^-1)_ij (g_fermion)_j]  =  Sum_ij [(g_fermion^*)_i (g_X)_i] */
  ham += s_g;
  // add in the part for the fermion fields.
  cg(g_X, g_fermion, ITER_MAX, DELTACG, &gam5D_SQR_musqr_wilson);
  ham += scalar_prod_r(g_fermion, g_X);
  
  if(no_timescales > 2) {
    cg(g_gam5DX, g_fermion2, ITER_MAX, DELTACG, &gam5D_SQR_wilson);
    gam5D_SQR_musqr_wilson(g_X, g_temp, g_gam5DX);
    ham += scalar_prod_r(g_fermion2, g_X);
  }

  exphdiff = exp(ham_old-ham);
  acc = accept(exphdiff);
 
  for(i=0; i<GRIDPOINTS; i++) {
    gauge1_old[i]=gauge1[i];
    gauge2_old[i]=gauge2[i];
  }
 
  s_g_old = s_g;
  return(acc);
}
Beispiel #4
0
/* P output = solution , Q input = source */
int cg_mms_tm(spinor * const P, spinor * const Q, const int max_iter, 
	      double eps_sq, const int rel_prec, const int N, matrix_mult f) {

  static double normsq, pro, err, alpha_cg = 1., beta_cg = 0., squarenorm;
  int iteration, im, append = 0;
  char filename[100];
  static double gamma, alpham1;
  int const cg_mms_default_precision = 32;
  double tmp_mu = g_mu;
  WRITER * writer = NULL;
  paramsInverterInfo *inverterInfo = NULL;
  paramsPropagatorFormat *propagatorFormat = NULL;
  spinor * temp_save; //used to save all the masses
  spinor ** solver_field = NULL;
  const int nr_sf = 5;

  init_solver_field(&solver_field, VOLUMEPLUSRAND, nr_sf);
  init_mms_tm(g_no_extra_masses);

  /* currently only implemented for P=0 */
  zero_spinor_field(P, N);
  /*  Value of the bare MMS-masses (\mu^2 - \mu_0^2) */
  for(im = 0; im < g_no_extra_masses; im++) {
    sigma[im] = g_extra_masses[im]*g_extra_masses[im] - g_mu*g_mu;
    assign(xs_mms_solver[im], P, N);
    assign(ps_mms_solver[im], Q, N);
    zitam1[im] = 1.0;
    zita[im] = 1.0;
    alphas[im] = 1.0;
    betas[im] = 0.0;
  }

  squarenorm = square_norm(Q, N, 1);
  assign(solver_field[0], P, N);
/*   normsp = square_norm(P, N, 1); */

  /* initialize residue r and search vector p */
/*   if(normsp == 0){ */
  /* currently only implemented for P=0 */
  if(1) {
    /* if a starting solution vector equal to zero is chosen */
    assign(solver_field[1], Q, N);
    assign(solver_field[2], Q, N);
    normsq = square_norm(Q, N, 1);
  }
  else{
    /* if a starting solution vector different from zero is chosen */
    f(solver_field[3], solver_field[0]);

    diff(solver_field[1], Q, solver_field[3], N);
    assign(solver_field[2], solver_field[1], N);
    normsq = square_norm(solver_field[2], N, 1);
  }

  /* main loop */
  for(iteration = 0; iteration < max_iter; iteration++) {

    /*   Q^2*p and then (p,Q^2*p)  */
    f(solver_field[4], solver_field[2]);
    pro = scalar_prod_r(solver_field[2], solver_field[4], N, 1);

    /* For the update of the coeff. of the shifted pol. we need alpha_cg(i-1) and alpha_cg(i).
       This is the reason why we need this double definition of alpha */
    alpham1 = alpha_cg;

    /* Compute alpha_cg(i+1) */
    alpha_cg = normsq/pro;
    for(im = 0; im < g_no_extra_masses; im++) {

      /* Now gamma is a temp variable that corresponds to zita(i+1) */ 
      gamma = zita[im]*alpham1/(alpha_cg*beta_cg*(1.-zita[im]/zitam1[im]) 
				+ alpham1*(1.+sigma[im]*alpha_cg));

      /* Now zita(i-1) is put equal to the old zita(i) */
      zitam1[im] = zita[im];
      /* Now zita(i+1) is updated */
      zita[im] = gamma;
      /* Update of alphas(i) = alpha_cg(i)*zita(i+1)/zita(i) */ 
      alphas[im] = alpha_cg*zita[im]/zitam1[im];
      /* Compute xs(i+1) = xs(i) + alphas(i)*ps(i) */
      assign_add_mul_r(xs_mms_solver[im], ps_mms_solver[im], alphas[im], N); 
    }

    /*  Compute x_(i+1) = x_i + alpha_cg(i+1) p_i    */
    assign_add_mul_r(solver_field[0], solver_field[2],  alpha_cg, N);
    /*  Compute r_(i+1) = r_i - alpha_cg(i+1) Qp_i   */
    assign_add_mul_r(solver_field[1], solver_field[4], -alpha_cg, N);

    /* Check whether the precision eps_sq is reached */

    err = square_norm(solver_field[1], N, 1);
    if(g_debug_level > 2 && g_proc_id == g_stdio_proc) {
      printf("CGMMS iteration: %d residue: %g\n", iteration, err); fflush( stdout );
    }

    if( ((err <= eps_sq) && (rel_prec == 0)) ||
      ((err <= eps_sq*squarenorm) && (rel_prec == 1)) ) {

      assign(P, solver_field[0], N);
      f(solver_field[2], P);
      diff(solver_field[3], solver_field[2], Q, N);
      err = square_norm(solver_field[3], N, 1);
      if(g_debug_level > 0 && g_proc_id == g_stdio_proc) {
        printf("# CG MMS true residue at final iteration (%d) was %g.\n", iteration, err); 
        fflush( stdout);
      }
      g_sloppy_precision = 0;
      g_mu = tmp_mu;

      /* save all the results of (Q^dagger Q)^(-1) \gamma_5 \phi */
      /* here ... */
      /* when im == -1 save the base mass*/
      for(im = -1; im < g_no_extra_masses; im++) {
        if(im==-1) {
          temp_save=solver_field[0];
        } else {
          temp_save=xs_mms_solver[im];
        }

        if(SourceInfo.type != 1) {
          if (PropInfo.splitted) {
            sprintf(filename, "%s.%.4d.%.2d.%.2d.cgmms.%.2d.inverted", SourceInfo.basename, SourceInfo.nstore, SourceInfo.t, SourceInfo.ix, im+1);
          } else {
            sprintf(filename, "%s.%.4d.%.2d.cgmms.%.2d.inverted", SourceInfo.basename, SourceInfo.nstore, SourceInfo.t, im+1);
          }
        }
        else {
          sprintf(filename, "%s.%.4d.%.5d.cgmms.%.2d.0", SourceInfo.basename, SourceInfo.nstore, SourceInfo.sample, im+1);
        }
        if(g_kappa != 0) {
          mul_r(temp_save, (2*g_kappa)*(2*g_kappa), temp_save, N);
        }

        append = !PropInfo.splitted;

        construct_writer(&writer, filename, append);

        if (PropInfo.splitted || SourceInfo.ix == index_start) {
          //Create the inverter info NOTE: always set to TWILSON=12 and 1 flavour (to be adjusted)
          inverterInfo = construct_paramsInverterInfo(err, iteration+1, 12, 1);
          if (im == -1) {
            inverterInfo->cgmms_mass = inverterInfo->mu;
          } else {
            inverterInfo->cgmms_mass = g_extra_masses[im]/(2 * inverterInfo->kappa);
          }
          write_spinor_info(writer, PropInfo.format, inverterInfo, append);
          //Create the propagatorFormat NOTE: always set to 1 flavour (to be adjusted)
          propagatorFormat = construct_paramsPropagatorFormat(cg_mms_default_precision, 1);
          write_propagator_format(writer, propagatorFormat);
          free(inverterInfo);
          free(propagatorFormat);
        }
        convert_lexic_to_eo(solver_field[2], solver_field[1], temp_save);
        write_spinor(writer, &solver_field[2], &solver_field[1], 1, 32);
        destruct_writer(writer);
      }
      finalize_solver(solver_field, nr_sf);
      return(iteration+1);
    }

    /* Compute beta_cg(i+1) = (r(i+1),r(i+1))/(r(i),r(i))
       Compute p(i+1) = r(i+1) + beta(i+1)*p(i)  */
    beta_cg = err/normsq;
    assign_mul_add_r(solver_field[2], beta_cg, solver_field[1], N);
    normsq = err;

    /* Compute betas(i+1) = beta_cg(i)*(zita(i+1)*alphas(i))/(zita(i)*alpha_cg(i))
       Compute ps(i+1) = zita(i+1)*r(i+1) + betas(i+1)*ps(i)  */
    for(im = 0; im < g_no_extra_masses; im++) {
      betas[im] = beta_cg*zita[im]*alphas[im]/(zitam1[im]*alpha_cg);
      assign_mul_add_mul_r(ps_mms_solver[im], solver_field[1], betas[im], zita[im], N);
    }
  }
  assign(P, solver_field[0], N);
  g_sloppy_precision = 0;
  finalize_solver(solver_field, nr_sf);
  return(-1);
}
Beispiel #5
0
/* k output , l input */
int solve_cg(spinor * const k, spinor * const l, double eps_sq, const int rel_prec) {

  static double normsq, pro, err, alpha_cg, beta_cg, squarenorm, sqnrm, sqnrm2;
  int iteration = 0, i, j;
  int save_sloppy = g_sloppy_precision;
  double atime, etime, flops;
  spinor *x, *delta, *y;
  
  /* initialize residue r and search vector p */
#ifdef MPI
  atime = MPI_Wtime();
#else
  atime = ((double)clock())/((double)(CLOCKS_PER_SEC));
#endif
  squarenorm = square_norm(l, VOLUME/2, 1);

  if(g_sloppy_precision_flag == 1) { 
    delta = g_spinor_field[DUM_SOLVER+3];
    x = g_spinor_field[DUM_SOLVER+4];
    y = g_spinor_field[DUM_SOLVER+5];
    assign(delta, l, VOLUME/2);
    Qtm_pm_psi(y, k);
    diff(delta, l, y, VOLUME/2);
    sqnrm = square_norm(delta, VOLUME/2, 1);
    if(((sqnrm <= eps_sq) && (rel_prec == 0)) || ((sqnrm <= eps_sq*squarenorm) && (rel_prec == 1))) {
      return(0);
    }
    
    for(i = 0; i < 20; i++) {
      g_sloppy_precision = 1;
      /* main CG loop in lower precision */
      zero_spinor_field(x, VOLUME/2);
      assign(g_spinor_field[DUM_SOLVER+1], delta, VOLUME/2);
      assign(g_spinor_field[DUM_SOLVER+2], delta, VOLUME/2);
      sqnrm2 = sqnrm;
      for(j = 0; j <= ITER_MAX_CG; j++) {
	Qtm_pm_psi(g_spinor_field[DUM_SOLVER], g_spinor_field[DUM_SOLVER+2]);
	pro = scalar_prod_r(g_spinor_field[DUM_SOLVER+2], g_spinor_field[DUM_SOLVER], VOLUME/2, 1);
	alpha_cg = sqnrm2 / pro;
	assign_add_mul_r(x, g_spinor_field[DUM_SOLVER+2], alpha_cg, VOLUME/2);
	
	assign_mul_add_r(g_spinor_field[DUM_SOLVER], -alpha_cg, g_spinor_field[DUM_SOLVER+1], VOLUME/2);
	err = square_norm(g_spinor_field[DUM_SOLVER], VOLUME/2, 1);
	
	if(g_proc_id == g_stdio_proc && g_debug_level > 1) {
	  printf("inner CG: %d res^2 %g\n", iteration+j+1, err);
	  fflush(stdout);
	}
	
	if (((err <= eps_sq) && (rel_prec == 0)) || ((err <= eps_sq*squarenorm) && (rel_prec == 1))){
	  break;
	}
	beta_cg = err / sqnrm2;
	assign_mul_add_r(g_spinor_field[DUM_SOLVER+2], beta_cg, g_spinor_field[DUM_SOLVER], VOLUME/2);
	assign(g_spinor_field[DUM_SOLVER+1], g_spinor_field[DUM_SOLVER], VOLUME/2);
	sqnrm2 = err;
      }
      /* end main CG loop */
      iteration += j;
      g_sloppy_precision = 0;
      add(k, k, x, VOLUME/2);
      
      Qtm_pm_psi(y, x);
      diff(delta, delta, y, VOLUME/2);
      sqnrm = square_norm(delta, VOLUME/2, 1);
      if(g_debug_level > 0 && g_proc_id == g_stdio_proc) {
	printf("mixed CG(linsolve): true residue %d\t%g\t\n",iteration, sqnrm); fflush( stdout);
      }
      
      if(((sqnrm <= eps_sq) && (rel_prec == 0)) || ((sqnrm <= eps_sq*squarenorm) && (rel_prec == 1))) {
	break;
      }
      iteration++;
    }
  }
  else {
    Qtm_pm_psi(g_spinor_field[DUM_SOLVER], k); 
    
    diff(g_spinor_field[DUM_SOLVER+1], l, g_spinor_field[DUM_SOLVER], VOLUME/2);
    assign(g_spinor_field[DUM_SOLVER+2], g_spinor_field[DUM_SOLVER+1], VOLUME/2);
    normsq=square_norm(g_spinor_field[DUM_SOLVER+1], VOLUME/2, 1);
    
    /* main loop */
    for(iteration = 1; iteration <= ITER_MAX_CG; iteration++) {
      Qtm_pm_psi(g_spinor_field[DUM_SOLVER], g_spinor_field[DUM_SOLVER+2]);
      pro=scalar_prod_r(g_spinor_field[DUM_SOLVER+2], g_spinor_field[DUM_SOLVER], VOLUME/2, 1);
      alpha_cg=normsq/pro;
      assign_add_mul_r(k, g_spinor_field[DUM_SOLVER+2], alpha_cg, VOLUME/2);
      
      assign_mul_add_r(g_spinor_field[DUM_SOLVER], -alpha_cg, g_spinor_field[DUM_SOLVER+1], VOLUME/2);
      err=square_norm(g_spinor_field[DUM_SOLVER], VOLUME/2, 1);
      
      if(g_proc_id == g_stdio_proc && g_debug_level > 1) {
	printf("CG (linsolve): iterations: %d res^2 %e\n", iteration, err);
	fflush(stdout);
      }
      
      if (((err <= eps_sq) && (rel_prec == 0)) || ((err <= eps_sq*squarenorm) && (rel_prec == 1))){
	break;
      }
      beta_cg = err/normsq;
      assign_mul_add_r(g_spinor_field[DUM_SOLVER+2], beta_cg, g_spinor_field[DUM_SOLVER], VOLUME/2);
      assign(g_spinor_field[DUM_SOLVER+1], g_spinor_field[DUM_SOLVER], VOLUME/2);
      normsq=err;
    }
  }
#ifdef MPI
  etime = MPI_Wtime();
#else
  etime = ((double)clock())/((double)(CLOCKS_PER_SEC));
#endif
  /* 2 A + 2 Nc Ns + N_Count ( 2 A + 10 Nc Ns ) */
  /* 2*1320.0 because the linalg is over VOLUME/2 */
  flops = (2*(2*1320.0+2*3*4) + 2*3*4 + iteration*(2.*(2*1320.0+2*3*4) + 10*3*4))*VOLUME/2/1.0e6f;
  if(g_proc_id==0 && g_debug_level > 0) {
    printf("CG: iter: %d eps_sq: %1.4e t/s: %1.4e\n", iteration, eps_sq, etime-atime); 
    printf("CG: flopcount: t/s: %1.4e mflops_local: %.1f mflops: %.1f\n", 
	   etime-atime, flops/(etime-atime), g_nproc*flops/(etime-atime));
  }
  g_sloppy_precision = save_sloppy;
  return(iteration);
}
Beispiel #6
0
/*lambda: largest eigenvalue, k eigenvector */
int evamax(double *rz, int k, double q_off, double eps_sq) {
  static double ritz,norm0,normg,normg0,beta_cg;
  static double costh,sinth,cosd,sind,aaa,normp,xxx;
  static double xs1,xs2,xs3;
  int iteration;
  /* Initialize k to be gaussian */
  random_spinor_field(g_spinor_field[k], VOLUME/2);
  norm0=square_norm(g_spinor_field[k], VOLUME/2, 1); 
  /*normalize k */
  assign_mul_bra_add_mul_r( g_spinor_field[k], 1./sqrt(norm0),0., g_spinor_field[k], VOLUME/2);
  Q_psi(DUM_SOLVER,k,q_off);
  Q_psi(DUM_SOLVER,DUM_SOLVER,q_off);
  /*compute the ritz functional */
  /*put g on DUM_SOLVER+2 and p on DUM_SOLVER+1*/
  ritz=scalar_prod_r(g_spinor_field[DUM_SOLVER], g_spinor_field[k], VOLUME/2, 1); 
  zero_spinor_field(g_spinor_field[DUM_SOLVER+2],VOLUME/2);
  assign_add_mul_r_add_mul(g_spinor_field[DUM_SOLVER+2], g_spinor_field[DUM_SOLVER], g_spinor_field[k],
			   1., -ritz, VOLUME/2);
  assign(g_spinor_field[DUM_SOLVER+1], g_spinor_field[DUM_SOLVER+2], VOLUME/2);
  normg0=square_norm(g_spinor_field[DUM_SOLVER+2], VOLUME/2, 1);
  
  /* main loop */
  for(iteration=1;iteration<=ITER_MAX_BCG;iteration++) {
    if(normg0 <= eps_sq) break;
    Q_psi(DUM_SOLVER+2,DUM_SOLVER+1,q_off);
    Q_psi(DUM_SOLVER+2,DUM_SOLVER+2,q_off);
    /*   compute costh and sinth */
    normp=square_norm(g_spinor_field[DUM_SOLVER+1], VOLUME/2, 1);
    xxx=scalar_prod_r(g_spinor_field[DUM_SOLVER+2], g_spinor_field[DUM_SOLVER+1], VOLUME/2, 1);
    
    xs1=0.5*(ritz+xxx/normp);
    xs2=0.5*(ritz-xxx/normp);
    normp=sqrt(normp);
    xs3=normg0/normp;
    aaa=sqrt(xs2*xs2+xs3*xs3);
    cosd=xs2/aaa;
    sind=xs3/aaa;
    
    if(cosd>=0.) { 
      costh=sqrt(0.5*(1.+cosd));
      sinth=0.5*sind/costh;
    }
    else {
      sinth=sqrt(0.5*(1.-cosd));
      costh=0.5*sind/sinth;
    } 
    ritz=xs1+aaa;
    
    assign_add_mul_r_add_mul(g_spinor_field[k], g_spinor_field[k], g_spinor_field[DUM_SOLVER+1], 
			     costh-1., sinth/normp, VOLUME/2);
    assign_add_mul_r_add_mul(g_spinor_field[DUM_SOLVER], g_spinor_field[DUM_SOLVER], g_spinor_field[DUM_SOLVER+2],
			     costh-1., sinth/normp, VOLUME/2);
    
    /*   compute g */
    zero_spinor_field(g_spinor_field[DUM_SOLVER+2],VOLUME/2);
    assign_add_mul_r_add_mul(g_spinor_field[DUM_SOLVER+2], g_spinor_field[DUM_SOLVER], g_spinor_field[k], 
			     1., -ritz, VOLUME/2);
    
    /*   calculate the norm of g' and beta_cg=costh g'^2/g^2 */
    normg=square_norm(g_spinor_field[DUM_SOLVER+2], VOLUME/2, 1);
    beta_cg=costh*normg/normg0;
    if(beta_cg*costh*normp>20.*sqrt(normg))  beta_cg=0.;
    normg0=normg;    
    /*   compute the new value of p */
    assign_add_mul_r(g_spinor_field[DUM_SOLVER+1], g_spinor_field[k], -scalar_prod_r(g_spinor_field[k], g_spinor_field[DUM_SOLVER+1], VOLUME/2), VOLUME/2, 1);
    assign_mul_add_r(g_spinor_field[DUM_SOLVER+1],beta_cg, g_spinor_field[DUM_SOLVER+2], VOLUME/2);
    /*   restore the state of the iteration */
    if(iteration%20==0) {
      /* readjust x */
      xxx=sqrt(square_norm(g_spinor_field[k], VOLUME/2), 1);
      assign_mul_bra_add_mul_r( g_spinor_field[k], 1./xxx,0., g_spinor_field[k], VOLUME/2);
      Q_psi(DUM_SOLVER,k,q_off);
      Q_psi(DUM_SOLVER,DUM_SOLVER,q_off);
      /*compute the ritz functional */
      ritz=scalar_prod_r(g_spinor_field[DUM_SOLVER], g_spinor_field[k], VOLUME/2, 1);
      /*put g on DUM_SOLVER+2 and p on DUM_SOLVER+1*/
      zero_spinor_field(g_spinor_field[DUM_SOLVER+2],VOLUME/2);
      assign_add_mul_r_add_mul(g_spinor_field[DUM_SOLVER+2], g_spinor_field[DUM_SOLVER], g_spinor_field[k],
			       1., -ritz, VOLUME/2);
      normg0=square_norm(g_spinor_field[DUM_SOLVER+2], VOLUME/2, 1);
      /*subtract a linear combination of x and g from p to 
	insure (x,p)=0 and (p,g)=(g,g) */
      cosd=scalar_prod_r(g_spinor_field[k], g_spinor_field[DUM_SOLVER+1], VOLUME/2, 1);
      assign_add_mul_r(g_spinor_field[DUM_SOLVER+1], g_spinor_field[k], -cosd, VOLUME/2);
      cosd=scalar_prod_r(g_spinor_field[DUM_SOLVER+1], g_spinor_field[DUM_SOLVER+2], VOLUME/2, 1)-normg0;
      assign_add_mul_r(g_spinor_field[DUM_SOLVER+1], g_spinor_field[DUM_SOLVER+2], -cosd/sqrt(normg0), VOLUME/2);
    }
  }
  *rz=ritz;
  return iteration;
}
Beispiel #7
0
int bicgstabell(spinor * const x0, spinor * const b, const int max_iter, 
		double eps_sq, const int rel_prec, const int _l, const int N, matrix_mult f) {

  double err;
  int i, j, k, l;
  double rho0, rho1, beta, alpha, omega, gamma0 = 0., squarenorm;
  spinor * r[5], * u[5], * r0_tilde, * x;
  double tau[5][5], gamma[25], gammap[25], gammapp[25], sigma[25];
  spinor ** solver_field = NULL;
  const int nr_sf = 2*(_l+1)+2;

  l = _l;
  k = -l;

  if(N == VOLUME) {
    init_solver_field(&solver_field, VOLUMEPLUSRAND, nr_sf);
  }
  else {
    init_solver_field(&solver_field, VOLUMEPLUSRAND/2, nr_sf);
  }
  r0_tilde = solver_field[0];
  for(i = 0; i <= l; i++){
    r[i] = solver_field[2+2*i];
    u[i] = solver_field[3+2*i];
  }

  x = x0; 
  assign(u[0], b, N);
  f(r0_tilde, x);
  diff(r[0], u[0], r0_tilde, N);
  zero_spinor_field(solver_field[1], N);
  assign(r0_tilde, r[0], N);
  squarenorm = square_norm(b, N, 1);

  rho0 = 1.;
  alpha = 0.;
  omega = 1.;
  err = square_norm(r0_tilde, N, 1);
  while( k < max_iter && (((err > eps_sq) && (rel_prec == 0)) 
			  || ((err > eps_sq*squarenorm) && (rel_prec == 1)) 
			  )) {
    k+=l;

    /* The BiCG part */

    rho0 *= -omega;
    for(j = 0; j < l; j++) {
      rho1 = scalar_prod_r(r[j], r0_tilde, N, 1);
      beta = (rho1/rho0);
      beta *= alpha; 
      rho0 = rho1;
      for(i = 0; i <= j; i++) {
	/* u_i = r_i - \beta u_i */
	assign_mul_add_r(u[i], -beta, r[i], N);
      }
      f(u[j+1], u[j]);
      gamma0 = scalar_prod_r(u[j+1], r0_tilde, N, 1);
      alpha = rho0/gamma0;
      /* r_i = r_i - \alpha u_{i+1} */
      for(i = 0; i <= j; i++) {
	assign_add_mul_r(r[i], u[i+1], -alpha, N);
      }
      f(r[j+1], r[j]);
      /* x = x + \alpha u_0 */
      assign_add_mul_r(x, u[0], alpha, N);
      err = square_norm(r[j+1], N, 1);
      if(g_proc_id == 0 && g_debug_level > 1) {printf("%d %d err = %e\n", k, j, err);fflush(stdout);}
    }

    /* The MR part */

    for(j = 1; j <= l; j++){
      for(i = 1; i < j; i++){
	tau[i][j] = scalar_prod_r(r[j], r[i], N, 1)/sigma[i];
	assign_add_mul_r(r[j], r[i], -tau[i][j], N);
      }
      sigma[j] = scalar_prod_r(r[j], r[j], N, 1);
      gammap[j] = scalar_prod_r(r[0], r[j], N, 1)/sigma[j];
    }
    gamma[l] = gammap[l];
    omega = gamma[l];
    for(j = l-1; j > 0; j--) {
      gamma[j] = gammap[j];
      for(i = j+1; i <= l; i++) {
	gamma[j] -= (tau[j][i]*gamma[i]);
      }
    }
    for(j = 1; j < l; j++) {
      gammapp[j] = gamma[j+1];
      for(i = j+1; i < l; i++){
	gammapp[j] += (tau[j][i]*gamma[i+1]);
      }
    }
    assign_add_mul_r(x, r[0], gamma[1], N);
    assign_add_mul_r(r[0], r[l], -gammap[l], N);
    for(j = 1; j < l; j++){
      assign_add_mul_r(x, r[j], gammapp[j], N);
      assign_add_mul_r(r[0], r[j], -gammap[j], N);
    }
    assign_add_mul_r(u[0], u[l], -gamma[l], N);
    for(j = 1; j < l; j++){
      assign_add_mul_r(u[0], u[j], -gamma[j], N);
    }
    err = square_norm(r[0], N, 1);
    if(g_proc_id == 0 && g_debug_level > 0){
      printf(" BiCGstabell iterated %d %d, %e rho0 = %e, alpha = %e, gamma0= %e\n", l, k, err, rho0, alpha, gamma0);
      fflush( stdout );
    }
  }
  finalize_solver(solver_field, nr_sf);
  if(k == max_iter) return(-1);
  return(k);
}
Beispiel #8
0
void ndratcor_heatbath(const int id, hamiltonian_field_t * const hf) {
  monomial * mnl = &monomial_list[id];
  double atime, etime, delta;
  spinor * up0, * dn0, * up1, * dn1, * tup, * tdn, * Zup, * Zdn;
  double coefs[6] = {1./4., -3./32., 7./128., -77./2048., 231./8192., -1463./65536.}; // series of (1+x)^(1/4)
  double coefs_check[6] = {1./2., -1./8., 1./16., -5./128., 7./256., -21./1024.}; // series of (1+x)^(1/2)
  atime = gettime();
  nd_set_global_parameter(mnl);
  g_mu3 = 0.;
  mnl->iter0 = 0;
  if(mnl->type == NDCLOVERRATCOR) {
    init_sw_fields();
    sw_term((const su3**)hf->gaugefield, mnl->kappa, mnl->c_sw); 
    sw_invert_nd(mnl->mubar*mnl->mubar - mnl->epsbar*mnl->epsbar);
    copy_32_sw_fields();
  }
  // we measure before the trajectory!
  if((mnl->rec_ev != 0) && (hf->traj_counter%mnl->rec_ev == 0)) {
    if(mnl->type != NDCLOVERRAT) phmc_compute_ev(hf->traj_counter-1, id, &Qtm_pm_ndbipsi);
    else phmc_compute_ev(hf->traj_counter-1, id, &Qsw_pm_ndbipsi);
  }

  // the Gaussian distributed random fields
  mnl->energy0 = 0.;
  random_spinor_field_eo(mnl->pf, mnl->rngrepro, RN_GAUSS);
  mnl->energy0 = square_norm(mnl->pf, VOLUME/2, 1);

  random_spinor_field_eo(mnl->pf2, mnl->rngrepro, RN_GAUSS);
  mnl->energy0 += square_norm(mnl->pf2, VOLUME/2, 1);

  mnl->solver_params.max_iter = mnl->maxiter;
  mnl->solver_params.squared_solver_prec = mnl->accprec;
  mnl->solver_params.no_shifts = mnl->rat.np;
  mnl->solver_params.shifts = mnl->rat.mu;
  mnl->solver_params.type = mnl->solver;
  mnl->solver_params.M_ndpsi = &Qtm_pm_ndpsi;
  mnl->solver_params.M_ndpsi32 = &Qtm_pm_ndpsi_32;    
  if(mnl->type == NDCLOVERRATCOR) {
    mnl->solver_params.M_ndpsi = &Qsw_pm_ndpsi;
    mnl->solver_params.M_ndpsi32 = &Qsw_pm_ndpsi_32;
  }
  mnl->solver_params.sdim = VOLUME/2;
  mnl->solver_params.rel_prec = g_relative_precision_flag;

  // apply B to the random field to generate pseudo-fermion fields
  up0 = mnl->w_fields[0]; dn0 = mnl->w_fields[1];
  up1 = mnl->w_fields[2]; dn1 = mnl->w_fields[3];
  Zup = mnl->w_fields[4]; Zdn = mnl->w_fields[5];

  apply_Z_ndpsi(up0, dn0, mnl->pf, mnl->pf2, id, hf, &(mnl->solver_params));
  // computing correction to energy1
  delta = coefs_check[0]*(scalar_prod_r(mnl->pf, up0, VOLUME/2, 1) + scalar_prod_r(mnl->pf2, dn0, VOLUME/2, 1));
  if(g_debug_level > 2 && g_proc_id == 0)
    printf("# NDRATCOR heatbath: c_%d*(R * Z^%d * R) = %e\n", 1, 1, delta);
  // debug for showing that the old check was giving a smaller delta
  if(g_debug_level > 3) {
    double delta_old = square_norm(up0, VOLUME/2, 1) + square_norm(dn0, VOLUME/2, 1);
    if(g_proc_id == 0) {
      printf("# NDRATCOR old check: || Z^%d * R ||^2 = %e\n", 1, delta_old);
      printf("# NDRATCOR new check: (c_%d*(R * Z^%d * R))^2 = %e\n", 1, 1, delta*delta);
    }
  }

  if(delta*delta > mnl->accprec) {
    assign_add_mul_r(mnl->pf, up0, coefs[0], VOLUME/2);
    assign_add_mul_r(mnl->pf2, dn0, coefs[0], VOLUME/2);
    
    // saving first application
    assign(Zup, up0, VOLUME/2);
    assign(Zdn, dn0, VOLUME/2);
    
    
    for(int i = 2; i < 8; i++) {
      // computing next order correction to energy1
      delta = coefs_check[i-1]*(scalar_prod_r(Zup, up0, VOLUME/2, 1) + scalar_prod_r(Zup, dn0, VOLUME/2, 1)); 
      if(g_debug_level > 2 && g_proc_id == 0)
        printf("# NDRATCOR heatbath: c_%d*(R * Z^%d * R) = %e\n", i, i, delta);
      // debug for showing that the old check was giving a smaller delta
      if(g_debug_level > 3) {
        double delta_old = square_norm(up0, VOLUME/2, 1) + square_norm(dn0, VOLUME/2, 1);
        if(g_proc_id == 0) {
          printf("# NDRATCOR old check: || Z^%d * R ||^2 = %e\n", 1, delta_old);
          printf("# NDRATCOR new check: (c_%d*(R * Z^%d * R))^2 = %e\n", 1, 1, delta*delta);
        }
      }
      if(delta*delta < mnl->accprec) break;

      apply_Z_ndpsi(up1, dn1, up0, dn0, id, hf, &(mnl->solver_params));
      
      assign_add_mul_r(mnl->pf, up1, coefs[i-1], VOLUME/2);
      assign_add_mul_r(mnl->pf2, dn1, coefs[i-1], VOLUME/2);

      tup = up0; tdn = dn0;
      up0 = up1; dn0 = dn1;
      up1 = tup; dn1 = tdn;
    }
  }
  etime = gettime();
  if(g_proc_id == 0) {
    if(g_debug_level > 1) {
      printf("# Time for %s monomial heatbath: %e s\n", mnl->name, etime-atime);
    }
    if(g_debug_level > 3) { 
      printf("called ndratcor_heatbath for id %d energy %f\n", id, mnl->energy0);
    }
  }
  return;
}
Beispiel #9
0
double ndratcor_acc(const int id, hamiltonian_field_t * const hf) {
  monomial * mnl = &monomial_list[id];
  double atime, etime, delta;
  spinor * up0, * dn0, * up1, * dn1, * tup, * tdn;
  double coefs[6] = {-1./2., 3./8., -5./16., 35./128., -63./256., 231./1024.};
  atime = gettime();
  nd_set_global_parameter(mnl);
  g_mu3 = 0.;
  if(mnl->type == NDCLOVERRATCOR) {
    sw_term((const su3**) hf->gaugefield, mnl->kappa, mnl->c_sw); 
    sw_invert_nd(mnl->mubar*mnl->mubar - mnl->epsbar*mnl->epsbar);
    copy_32_sw_fields();
  }
  mnl->energy1 = square_norm(mnl->pf, VOLUME/2, 1) + square_norm(mnl->pf2, VOLUME/2, 1);

  mnl->solver_params.max_iter = mnl->maxiter;
  mnl->solver_params.squared_solver_prec = mnl->accprec;
  mnl->solver_params.no_shifts = mnl->rat.np;
  mnl->solver_params.shifts = mnl->rat.mu;
  mnl->solver_params.type = mnl->solver;
  mnl->solver_params.M_ndpsi = &Qtm_pm_ndpsi;
  mnl->solver_params.M_ndpsi32 = &Qtm_pm_ndpsi_32;    
  if(mnl->type == NDCLOVERRATCOR) {
    mnl->solver_params.M_ndpsi = &Qsw_pm_ndpsi;
    mnl->solver_params.M_ndpsi32 = &Qsw_pm_ndpsi_32;
  }
  mnl->solver_params.sdim = VOLUME/2;
  mnl->solver_params.rel_prec = g_relative_precision_flag;

  // apply (Q R)^(-1) to pseudo-fermion fields
  up0 = mnl->w_fields[0]; dn0 = mnl->w_fields[1];
  up1 = mnl->w_fields[2]; dn1 = mnl->w_fields[3];

  apply_Z_ndpsi(up0, dn0, mnl->pf, mnl->pf2, id, hf, &(mnl->solver_params));
  delta = coefs[0]*(scalar_prod_r(mnl->pf, up0, VOLUME/2, 1) + scalar_prod_r(mnl->pf2, dn0, VOLUME/2, 1));
  mnl->energy1 += delta;
  if(g_debug_level > 2 && g_proc_id == 0)
    printf("# NDRATCOR acc step: c_%d*(phi * Z^%d * phi) = %e\n", 1, 1, delta);

  for(int i = 2; i < 8; i++) {
    if(delta*delta < mnl->accprec) break;

    delta = coefs[i-1]*(square_norm(up0, VOLUME/2, 1) + square_norm(dn0, VOLUME/2, 1)); 
    mnl->energy1 += delta;
    if(g_debug_level > 2 && g_proc_id == 0)
      printf("# NDRATCOR acc step: c_%d*(phi * Z^%d * phi) = %e\n", i, i, delta);
    i++; //incrementing i
    if(delta*delta < mnl->accprec) break;

    apply_Z_ndpsi(up1, dn1, up0, dn0, id, hf, &(mnl->solver_params));
    delta = coefs[i-1]*(scalar_prod_r(up0, up1, VOLUME/2, 1) + scalar_prod_r(dn0, dn1, VOLUME/2, 1));
    mnl->energy1 += delta;
    if(g_debug_level > 2 && g_proc_id == 0)
      printf("# NDRATCOR acc step: c_%d*(phi * Z^%d * phi) = %e\n", i, i, delta);

    tup = up0; tdn = dn0;
    up0 = up1; dn0 = dn1;
    up1 = tup; dn1 = tdn;
  }


  etime = gettime();
  if(g_proc_id == 0) {
    if(g_debug_level > 1) {
      printf("# Time for %s monomial acc step: %e s\n", mnl->name, etime-atime);
    }
    if(g_debug_level > 3) { // shoud be 3
      printf("called ndratcor_acc for id %d dH = %1.10e\n", id, mnl->energy1 - mnl->energy0);
    }
  }
  return(mnl->energy1 - mnl->energy0);
}
Beispiel #10
0
/* P output = solution , Q input = source */
int cg_mms_tm(spinor ** const P, spinor * const Q,
		 solver_params_t * solver_params, double * cgmms_reached_prec) {

  static double normsq, pro, err, squarenorm;
  int iteration, N = solver_params->sdim, no_shifts = solver_params->no_shifts;
  static double gamma, alpham1;
  spinor ** solver_field = NULL;
  double atime, etime;
  const int nr_sf = 3;

  atime = gettime();
  if(solver_params->sdim == VOLUME) {
    init_solver_field(&solver_field, VOLUMEPLUSRAND, nr_sf);
    init_mms_tm(no_shifts, VOLUMEPLUSRAND);
  } 
  else {
    init_solver_field(&solver_field, VOLUMEPLUSRAND/2, nr_sf); 
    init_mms_tm(no_shifts, VOLUMEPLUSRAND/2);
  } 

  zero_spinor_field(P[0], N);
  alphas[0] = 1.0;
  betas[0] = 0.0;
  sigma[0] = solver_params->shifts[0]*solver_params->shifts[0];
  if(g_proc_id == 0 && g_debug_level > 1) printf("# CGMMS: shift %d is %e\n", 0, sigma[0]);

  for(int im = 1; im < no_shifts; im++) {
    sigma[im] = solver_params->shifts[im]*solver_params->shifts[im] - sigma[0];
    if(g_proc_id == 0 && g_debug_level > 1) printf("# CGMMS: shift %d is %e\n", im, sigma[im]);
    // these will be the result spinor fields
    zero_spinor_field(P[im], N);
    // these are intermediate fields
    assign(ps_mms_solver[im-1], Q, N);
    zitam1[im] = 1.0;
    zita[im] = 1.0;
    alphas[im] = 1.0;
    betas[im] = 0.0;
  }

  /* currently only implemented for P=0 */
  squarenorm = square_norm(Q, N, 1);
  /* if a starting solution vector equal to zero is chosen */
  assign(solver_field[0], Q, N);
  assign(solver_field[1], Q, N);
  normsq = squarenorm;

  /* main loop */
  for(iteration = 0; iteration < solver_params->max_iter; iteration++) {

    /*   Q^2*p and then (p,Q^2*p)  */
    solver_params->M_psi(solver_field[2], solver_field[1]);
    // add the zero's shift
    assign_add_mul_r(solver_field[2], solver_field[1], sigma[0], N);
    pro = scalar_prod_r(solver_field[1], solver_field[2], N, 1);

    /* For the update of the coeff. of the shifted pol. we need alphas[0](i-1) and alpha_cg(i).
       This is the reason why we need this double definition of alpha */
    alpham1 = alphas[0];

    /* Compute alphas[0](i+1) */
    alphas[0] = normsq/pro;
    for(int im = 1; im < no_shifts; im++) {

      /* Now gamma is a temp variable that corresponds to zita(i+1) */ 
      gamma = zita[im]*alpham1/(alphas[0]*betas[0]*(1.-zita[im]/zitam1[im]) 
				+ alpham1*(1.+sigma[im]*alphas[0]));

      // Now zita(i-1) is put equal to the old zita(i)
      zitam1[im] = zita[im];
      // Now zita(i+1) is updated 
      zita[im] = gamma;
      // Update of alphas(i) = alphas[0](i)*zita(i+1)/zita(i) 
      alphas[im] = alphas[0]*zita[im]/zitam1[im];

      // Compute xs(i+1) = xs(i) + alphas(i)*ps(i) 
      assign_add_mul_r(P[im], ps_mms_solver[im-1], alphas[im], N); 
      // in the CG the corrections are decreasing with the iteration number increasing
      // therefore, we can remove shifts when the norm of the correction vector
      // falls below a threshold
      // this is useful for computing time and needed, because otherwise
      // zita might get smaller than DOUBLE_EPS and, hence, zero
      if(iteration > 0 && (iteration % 20 == 0) && (im == no_shifts-1)) {
	double sn = square_norm(ps_mms_solver[im-1], N, 1);
	if(alphas[no_shifts-1]*alphas[no_shifts-1]*sn <= solver_params->squared_solver_prec) {
	  no_shifts--;
	  if(g_debug_level > 2 && g_proc_id == 0) {
	    printf("# CGMMS: at iteration %d removed one shift, %d remaining\n", iteration, no_shifts);
      	  }
	}
      }
    }
    
    /*  Compute x_(i+1) = x_i + alphas[0](i+1) p_i    */
    assign_add_mul_r(P[0], solver_field[1],  alphas[0], N);
    /*  Compute r_(i+1) = r_i - alphas[0](i+1) Qp_i   */
    assign_add_mul_r(solver_field[0], solver_field[2], -alphas[0], N);

    /* Check whether the precision eps_sq is reached */

    err = square_norm(solver_field[0], N, 1);

    if(g_debug_level > 2 && g_proc_id == g_stdio_proc) {
      printf("# CGMMS iteration: %d residue: %g\n", iteration, err); fflush( stdout );
    }

    if( ((err <= solver_params->squared_solver_prec) && (solver_params->rel_prec == 0)) ||
        ((err <= solver_params->squared_solver_prec*squarenorm) && (solver_params->rel_prec > 0)) ||
        (iteration == solver_params->max_iter -1) ) {
      /* FIXME temporary output of precision until a better solution can be found */
      *cgmms_reached_prec = err;
      break;
    }

    /* Compute betas[0](i+1) = (r(i+1),r(i+1))/(r(i),r(i))
       Compute p(i+1) = r(i+1) + beta(i+1)*p(i)  */
    betas[0] = err/normsq;
    assign_mul_add_r(solver_field[1], betas[0], solver_field[0], N);
    normsq = err;

    /* Compute betas(i+1) = betas[0](i+1)*(zita(i+1)*alphas(i))/(zita(i)*alphas[0](i))
       Compute ps(i+1) = zita(i+1)*r(i+1) + betas(i+1)*ps(i)  */
    for(int im = 1; im < no_shifts; im++) {
      betas[im] = betas[0]*zita[im]*alphas[im]/(zitam1[im]*alphas[0]);
      assign_mul_add_mul_r(ps_mms_solver[im-1], solver_field[0], betas[im], zita[im], N);
    }
  }
  etime = gettime();
  g_sloppy_precision = 0;
  if(iteration == solver_params->max_iter -1) iteration = -1;
  else iteration++;
  if(g_debug_level > 0 && g_proc_id == 0) {
    printf("# CGMMS (%d shifts): iter: %d eps_sq: %1.4e %1.4e t/s\n", solver_params->no_shifts, iteration, solver_params->squared_solver_prec, etime - atime); 
  }
  
  finalize_solver(solver_field, nr_sf);
  return(iteration);
}
Beispiel #11
0
/* P output = solution , Q input = source */
int pcg_her(spinor * const P, spinor * const Q, const int max_iter, 
	    double eps_sq, const int rel_prec, const int N, matrix_mult f) {
  double normsp, pro, pro2, err, alpha_cg, beta_cg, squarenorm;
  int iteration;
  spinor ** solver_field = NULL;
  const int nr_sf = 5;

  if(N == VOLUME) {
    init_solver_field(&solver_field, VOLUMEPLUSRAND, nr_sf);
  }
  else {
    init_solver_field(&solver_field, VOLUMEPLUSRAND/2, nr_sf);
  }
  squarenorm = square_norm(Q, N, 1);
  /*        !!!!   INITIALIZATION    !!!! */
  assign(solver_field[0], P, N);
  /*        (r_0,r_0)  =  normsq         */
  normsp = square_norm(P, N, 1);

  assign(solver_field[3], Q, N);
  /* initialize residue r and search vector p */
  if(normsp==0){
    /* if a starting solution vector equal to zero is chosen */
    /* r0 */
    assign(solver_field[1], solver_field[3], N);
    /* p0 */
  }
  else{
    /* if a starting solution vector different from zero is chosen */
    /* r0 = b - A x0 */
    f(solver_field[2], solver_field[0]);
    diff(solver_field[1], solver_field[3], solver_field[2], N);
  }
  /* z0 = M^-1 r0 */
  invert_eigenvalue_part(solver_field[3], solver_field[1], 10, N);
  /* p0 = z0 */
  assign(solver_field[2], solver_field[3], N);

  /* Is this really real? */
  pro2 = scalar_prod_r(solver_field[1], solver_field[3], N, 1);  
  /* main loop */
  for(iteration = 0; iteration < max_iter; iteration++) {
    /* A p */
    f(solver_field[4], solver_field[2]);

    pro = scalar_prod_r(solver_field[2], solver_field[4], N, 1);
    /*  Compute alpha_cg(i+1)   */
    alpha_cg=pro2/pro;
     
    /*  Compute x_(i+1) = x_i + alpha_cg(i+1) p_i    */
    assign_add_mul_r(solver_field[0], solver_field[2],  alpha_cg, N);
    /*  Compute r_(i+1) = r_i - alpha_cg(i+1) Qp_i   */
    assign_add_mul_r(solver_field[1], solver_field[4], -alpha_cg, N);

    /* Check whether the precision is reached ... */
    err=square_norm(solver_field[1], N, 1);
    if(g_debug_level > 1 && g_proc_id == g_stdio_proc) {
      printf("%d\t%g\n",iteration,err); fflush( stdout);
    }

    if(((err <= eps_sq) && (rel_prec == 0)) || ((err <= eps_sq*squarenorm) && (rel_prec == 1))) {
      assign(P, solver_field[0], N);
      g_sloppy_precision = 0;
      finalize_solver(solver_field, nr_sf);
      return(iteration+1);
    }
#ifdef _USE_HALFSPINOR
    if(((err*err <= eps_sq) && (rel_prec == 0)) || ((err*err <= eps_sq*squarenorm) && (rel_prec == 1)) || iteration > 1400) {
      g_sloppy_precision = 1;
      if(g_debug_level > 2 && g_proc_id == g_stdio_proc) {
	printf("sloppy precision on\n"); fflush( stdout);
      }
    }
#endif
    /* z_j */
    beta_cg = 1/pro2;
/*     invert_eigenvalue_part(solver_field[3], solver_field[1], 10, N); */
    /* Compute beta_cg(i+1)
       Compute p_(i+1) = r_i+1 + beta_(i+1) p_i     */
    pro2 = scalar_prod_r(solver_field[1], solver_field[3], N, 1);
    beta_cg *= pro2;
    assign_mul_add_r(solver_field[2], beta_cg, solver_field[3], N);
  }
  assign(P, solver_field[0], N);
  g_sloppy_precision = 0;
/*   return(-1); */
  finalize_solver(solver_field, nr_sf);
  return(1);
}
Beispiel #12
0
int bicgstab2(spinor * const x0, spinor * const b, const int max_iter, 
		double eps_sq, const int rel_prec, const int N, matrix_mult f) {

  const int l = 2;
  double err;
  int i, j, k;
  int update_app = 0, update_res = 0;
  double rho0, rho1, beta, alpha, omega, gamma_hat,
    sigma, kappa0, kappal, rho, zeta0;
  double squarenorm, Mx=0., Mr=0.;
  spinor * r[5], * u[5], * r0_tilde, * u0, * x, * xp, * bp;
  double Z[3][3], y0[3], yl[3], yp[3], ypp[3];
  spinor ** solver_field = NULL;
  const int nr_sf = 10;

  k = -l;
  if(N == VOLUME) {
    init_solver_field(&solver_field, VOLUMEPLUSRAND, nr_sf);
  }
  else {
    init_solver_field(&solver_field, VOLUMEPLUSRAND/2, nr_sf);
  }
  r0_tilde = solver_field[0];
  u0 = solver_field[1];
  r[0] = solver_field[2];
  u[0] = solver_field[3];
  r[1] = solver_field[4];
  u[1] = solver_field[5];
  r[2] = solver_field[6];
  u[2] = solver_field[7];
  bp = solver_field[8];
  xp = x0;
  x = solver_field[9];

  zero_spinor_field(x, N);
  assign(u[0], b, N);
  f(r0_tilde, xp);
  diff(r[0], u[0], r0_tilde, N);
  zero_spinor_field(u0, N);
  assign(r0_tilde, r[0], N); 
/*   random_spinor_field(r0_tilde, N); */
  assign(bp, r[0], N);
  squarenorm = square_norm(b, N, 1);

  rho0 = 1.;
  alpha = rho0;
  omega = rho0;
  err = square_norm(r[0], N, 1);
  Mr = err;
  Mx = err;
  zeta0 = err;
  while( k < max_iter && (((err > eps_sq) && (rel_prec == 0)) 
			  || ((err > eps_sq*squarenorm) && (rel_prec == 1)) 
			  )) {
    k+=l;

    /* The BiCG part */
    rho0 *= -omega; 
    for(j = 0; j < l; j++) {
      rho1 = scalar_prod_r(r[j], r0_tilde, N, 1);
      beta = alpha*(rho1/rho0); 
      rho0 = rho1;
/*       if(g_proc_id == 0) {printf("beta = %e, alpha = %e, rho0 = %e\n", beta, alpha, rho0);fflush(stdout);} */
      for(i = 0; i <= j; i++) {
	/* u_i = r_i - \beta u_i */
	assign_mul_add_r(u[i], -beta, r[i], N);
      }
      f(u[j+1], u[j]);
      sigma = scalar_prod_r(u[j+1], r0_tilde, N, 1);
      alpha = rho1/sigma;
/*       if(g_proc_id == 0) {printf("sigma = %e, alpha = %e\n", sigma, alpha);fflush(stdout);} */
      /* x = x + \alpha u_0 */
      assign_add_mul_r(x, u[0], alpha, N);
      /* r_i = r_i - \alpha u_{i+1} */
      for(i = 0; i <= j; i++) {
	assign_add_mul_r(r[i], u[i+1], -alpha, N);
      }
      f(r[j+1], r[j]);
      err = square_norm(r[j+1], N, 1);
      if(g_proc_id == 0 && g_debug_level > 1) {printf("%d %d err = %e\n", k, j, err);fflush(stdout);}
      if(err > Mr) Mr = err;
      if(err > Mx) Mx = err;
    }

    /* The polynomial part */

    /* Z = R* R */
    for(i = 0; i <= l; i++){
      for(j = 0; j <= i; j++){
	Z[i][j] = scalar_prod_r(r[j], r[i], N, 1);
	Z[j][i] = Z[i][j];
      }
    }

    /* r0tilde and rl_tilde */
    y0[0] = -1;
    y0[2] = 0.;
    y0[1] = Z[1][0]/Z[1][1]; 

    yl[0] = 0.;
    yl[2] = -1.;
    yl[1] = Z[1][2]/Z[1][1]; 

    /* Convex combination */
    for(i = 0; i < l+1; i++){
      yp[i] = 0.;
      ypp[i] = 0.;
      for(j = 0; j < l+1; j++) {
	yp[i] +=Z[i][j]*y0[j];
	ypp[i] +=Z[i][j]*yl[j];
      }
    }
    kappa0 = sqrt( y0[0]*yp[0] + y0[1]*yp[1] + y0[2]*yp[2] );
    kappal = sqrt( yl[0]*ypp[0] + yl[1]*ypp[1] + yl[2]*ypp[2] );
    rho = (yl[0]*yp[0] + yl[1]*yp[1] + yl[2]*yp[2])/kappa0/kappal;
    if(fabs(rho) > 0.7) {
      gamma_hat = rho;
    }
    else {
      gamma_hat = rho*0.7/fabs(rho);
    }
    for(i = 0; i <= l; i++) {
      y0[i] -= gamma_hat*kappa0*yl[i]/kappal;
    }

    /* Update */
    omega = y0[l];
    for(i = 1; i < l+1; i++) {
      assign_add_mul_r(u[0], u[i], -y0[i], N);
      assign_add_mul_r(x, r[i-1], y0[i], N);
      assign_add_mul_r(r[0], r[i], -y0[i], N);
    }
    err = kappa0*kappa0;
    /* Reliable update part */
    if(err > Mr) Mr = err;
    if(err > Mx) Mx = err;    
    update_app = (err < 1.e-4*zeta0 && zeta0 <= Mx);
    update_res = ((err < 1.e-4*Mr && zeta0 <= Mr) || update_app);
    if(update_res) {
      if(g_proc_id == 0 && g_debug_level > 1) printf("Update res\n");
      f(r[0], x);
      diff(r[0], bp, r[0], N);
      Mr = err;
      if(update_app) {
	if(g_proc_id == 0  && g_debug_level > 1) printf("Update app\n");
	Mx = err;
	assign_add_mul_r(xp, x, 1., N);
	zero_spinor_field(x, N);
	assign(bp, r[0], N);
      }
    }
    update_app = 0;
    update_res = 0;
    if(g_proc_id == 0 && g_debug_level > 0){
      printf(" BiCGstab(2)convex iterated %d %d, %e rho0 = %e, alpha = %e, gamma_hat= %e\n", 
	     l, k, err, rho0, alpha, gamma_hat);
      fflush( stdout );
    }
  }
  assign_add_mul_r(x, xp, 1., N);
  assign(x0, x, N);
  if(k == max_iter) return(-1);
  return(k);
}
Beispiel #13
0
/* P output = solution , Q input = source */
int cg_her_nd(spinor * const P_up,spinor * P_dn, spinor * const Q_up, spinor * const Q_dn, 
	      const int max_iter, double eps_sq, const int rel_prec, 
	      const int N, matrix_mult_nd f) {
  double normsp, normsq, pro, err, alpha_cg, beta_cg, squarenorm;
  int iteration;
  double err1, err2;
  spinor ** up_field = NULL;
  spinor ** dn_field = NULL;  
  const int nr_sf = 5;
  /* do we really need so many fields??? */
  init_solver_field(&up_field, VOLUMEPLUSRAND, nr_sf);
  init_solver_field(&dn_field, VOLUMEPLUSRAND, nr_sf);

  squarenorm = square_norm(Q_up, N, 1);
  squarenorm+= square_norm(Q_dn, N, 1);
  /*        !!!!   INITIALIZATION    !!!! */
  assign(up_field[0], P_up, N);
  assign(dn_field[0], P_dn, N);
  
  /*        (r_0,r_0)  =  normsq         */
  normsp =square_norm(P_up, N, 1);
  normsp+=square_norm(P_dn, N, 1);

/*   assign(up_field[5], Q_up, N); */
/*   assign(dn_field[5], Q_dn, N); */
  
  /* initialize residue r and search vector p */
  if(normsp==0){
    /* if a starting solution vector equal to zero is chosen */
    assign(up_field[1], Q_up, N);
    assign(dn_field[1], Q_dn, N);
    assign(up_field[2], Q_up, N);
    assign(dn_field[2], Q_dn, N);
    normsq =square_norm(Q_up, N, 1);
    normsq+=square_norm(Q_dn, N, 1);
  }
  else {
    /* if a starting solution vector different from zero is chosen */
    f(up_field[3],dn_field[3],
      up_field[0],dn_field[0]);
   
    diff(up_field[1], Q_up, up_field[3], N);
    diff(dn_field[1], Q_dn, dn_field[3], N);
    assign(up_field[2], up_field[1], N);
    assign(dn_field[2], dn_field[1], N);
    normsq =square_norm(up_field[2], N, 1);
    normsq+=square_norm(dn_field[2], N, 1);
  }

  /* main loop */
  for(iteration=0;iteration<max_iter;iteration++){
    f(up_field[4],dn_field[4],
      up_field[2],dn_field[2]);

    pro =scalar_prod_r(up_field[2], up_field[4], N, 1);
    pro+=scalar_prod_r(dn_field[2], dn_field[4], N, 1);
     
    /*  Compute alpha_cg(i+1)   */
    alpha_cg=normsq/pro;
     
    /*  Compute x_(i+1) = x_i + alpha_cg(i+1) p_i    */
    assign_add_mul_r(up_field[0], up_field[2],  alpha_cg, N);
    assign_add_mul_r(dn_field[0], dn_field[2],  alpha_cg, N);
    /*  Compute r_(i+1) = r_i - alpha_cg(i+1) Qp_i   */
    assign_add_mul_r(up_field[1], up_field[4], -alpha_cg, N);
    assign_add_mul_r(dn_field[1], dn_field[4], -alpha_cg, N);

    /* Check whether the precision is reached ... */
    err1 =square_norm(up_field[1], N, 1);
    err2 =square_norm(dn_field[1], N, 1);
    err = err1 + err2;
    if(g_debug_level > 1 && g_proc_id == g_stdio_proc) {
      printf("cg_her_nd : i = %d  esqr  %e = %e + %e \n",iteration,err, err1, err2); fflush( stdout);
    }

    if(((err <= eps_sq) && (rel_prec == 0)) || ((err <= eps_sq*squarenorm) && (rel_prec == 1))) {
      assign(P_up, up_field[0], N);
      assign(P_dn, dn_field[0], N);
      g_sloppy_precision = 0;
      finalize_solver(up_field, nr_sf);
      finalize_solver(dn_field, nr_sf);
      return(iteration+1);
    }
#ifdef _USE_HALFSPINOR
    if(((err*err <= eps_sq) && (rel_prec == 0)) || ((err*err <= eps_sq*squarenorm) && (rel_prec == 1))) {
      g_sloppy_precision = 1;
      if(g_debug_level > 2 && g_proc_id == g_stdio_proc) {
	printf("sloppy precision on\n"); fflush( stdout);
      }
    }
#endif
    /* Compute beta_cg(i+1)
       Compute p_(i+1) = r_i+1 + beta_(i+1) p_i     */
    beta_cg=err/normsq;
    assign_mul_add_r(up_field[2], beta_cg, up_field[1], N);
    assign_mul_add_r(dn_field[2], beta_cg, dn_field[1], N);
    normsq=err;
  }

  assign(P_up, up_field[0], N);
  assign(P_dn, dn_field[0], N);
  g_sloppy_precision = 0;  
  
  finalize_solver(up_field, nr_sf);
  finalize_solver(dn_field, nr_sf);
  return(-1);
}