Пример #1
0
int invert_cloverdoublet_eo(spinor * const Even_new_s, spinor * const Odd_new_s, 
			    spinor * const Even_new_c, spinor * const Odd_new_c, 
			    spinor * const Even_s, spinor * const Odd_s,
			    spinor * const Even_c, spinor * const Odd_c,
			    const double precision, const int max_iter,
			    const int solver_flag, const int rel_prec) {
  
  int iter = 0;
  
  
  /* here comes the inversion using even/odd preconditioning */
  if(g_proc_id == 0) {printf("# Using even/odd preconditioning!\n"); fflush(stdout);}
  Msw_ee_inv_ndpsi(Even_new_s, Even_new_c, 
		   Even_s, Even_c);
  Hopping_Matrix(OE, g_spinor_field[DUM_DERI], Even_new_s);
  Hopping_Matrix(OE, g_spinor_field[DUM_DERI+1], Even_new_c);
  
  /* The sign is plus, since in Hopping_Matrix */
  /* the minus is missing                      */
  assign_mul_add_r(g_spinor_field[DUM_DERI], +1., Odd_s, VOLUME/2);
  assign_mul_add_r(g_spinor_field[DUM_DERI+1], +1., Odd_c, VOLUME/2);
  
  /* Do the inversion with the preconditioned  */
  /* matrix to get the odd sites               */
  
  /* Here we invert the hermitean operator squared */
  
  if(g_proc_id == 0) {
    printf("# Using CG for TMWILSON flavour doublet!\n"); 
    fflush(stdout);
  }
  gamma5(g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI], VOLUME/2);
  gamma5(g_spinor_field[DUM_DERI+1], g_spinor_field[DUM_DERI+1], VOLUME/2);
  
  iter = cg_her_nd(Odd_new_s, Odd_new_c, g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1],
		   max_iter, precision, rel_prec, 
		   VOLUME/2, &Qsw_pm_ndpsi);
  
  
  Qsw_dagger_ndpsi(Odd_new_s, Odd_new_c,
		   Odd_new_s, Odd_new_c);
  
  /* Reconstruct the even sites                */
  Hopping_Matrix(EO, g_spinor_field[DUM_DERI], Odd_new_s);
  Hopping_Matrix(EO, g_spinor_field[DUM_DERI+1], Odd_new_c);
  Msw_ee_inv_ndpsi(g_spinor_field[DUM_DERI+2], g_spinor_field[DUM_DERI+3],
		   g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1]);
  
  /* The sign is plus, since in Hopping_Matrix */
  /* the minus is missing                      */
  assign_add_mul_r(Even_new_s, g_spinor_field[DUM_DERI+2], +1., VOLUME/2);
  assign_add_mul_r(Even_new_c, g_spinor_field[DUM_DERI+3], +1., VOLUME/2);
  
  return(iter);
}
Пример #2
0
void M_full(spinor * const Even_new, spinor * const Odd_new, 
	    spinor * const Even, spinor * const Odd) {
  /* Even sites */
  Hopping_Matrix(EO, g_spinor_field[DUM_DERI], Odd);
  assign_mul_one_pm_imu(Even_new, Even, 1., VOLUME/2); 
  assign_add_mul_r(Even_new, g_spinor_field[DUM_DERI], -1., VOLUME/2);

  /* Odd sites */
  Hopping_Matrix(OE, g_spinor_field[DUM_DERI], Even);
  assign_mul_one_pm_imu(Odd_new, Odd, 1., VOLUME/2); 
  assign_add_mul_r(Odd_new, g_spinor_field[DUM_DERI], -1., VOLUME/2);
}
Пример #3
0
// applies ((Q_h\tau_1 * R)^2 - 1)
void apply_Z_ndpsi(spinor * const k_up, spinor * const k_dn,
		     spinor * const l_up, spinor * const l_dn,
		     const int id, hamiltonian_field_t * const hf,
		     solver_params_t * solver_params) {
  monomial * mnl = &monomial_list[id];

  mnl->iter0 += solve_mms_nd(g_chi_up_spinor_field, g_chi_dn_spinor_field,
			                       l_up, l_dn, solver_params);  
  
  // apply R to the pseudo-fermion fields
  assign(k_up, l_up, VOLUME/2);
  assign(k_dn, l_dn, VOLUME/2);
  for(int j = (mnl->rat.np-1); j > -1; j--) {
    assign_add_mul_r(k_up, g_chi_up_spinor_field[j], 
		     mnl->rat.rmu[j], VOLUME/2);
    assign_add_mul_r(k_dn, g_chi_dn_spinor_field[j], 
		     mnl->rat.rmu[j], VOLUME/2);
  }

  // apply R a second time
  mnl->iter0 += solve_mms_nd(g_chi_up_spinor_field, g_chi_dn_spinor_field,
	       k_up, k_dn,
	       solver_params);
  for(int j = (mnl->rat.np-1); j > -1; j--) {
    assign_add_mul_r(k_up, g_chi_up_spinor_field[j], 
		     mnl->rat.rmu[j], VOLUME/2);
    assign_add_mul_r(k_dn, g_chi_dn_spinor_field[j], 
		     mnl->rat.rmu[j], VOLUME/2);
  }
  mul_r(g_chi_up_spinor_field[mnl->rat.np], mnl->rat.A*mnl->rat.A, 
	k_up, VOLUME/2);
  mul_r(g_chi_dn_spinor_field[mnl->rat.np], mnl->rat.A*mnl->rat.A, 
	k_dn, VOLUME/2);
  // apply Q^2 and compute the residue
  solver_params->M_ndpsi(k_up, k_dn,
		     g_chi_up_spinor_field[mnl->rat.np], g_chi_dn_spinor_field[mnl->rat.np]);
  diff(k_up, k_up, l_up, VOLUME/2);
  diff(k_dn, k_dn, l_dn, VOLUME/2);
  
}
Пример #4
0
void CGeoSmoother(spinor * const P, spinor * const Q, const int Ncy, const int dummy) {
  spinor ** solver_field = NULL;
  const int nr_sf = 5;
  double musave = g_mu;
  g_mu = g_mu1;
  init_solver_field(&solver_field, VOLUMEPLUSRAND/2, nr_sf);
  
  convert_lexic_to_eo(solver_field[0], solver_field[1], Q);
  if(g_c_sw > 0)
    assign_mul_one_sw_pm_imu_inv(EE,solver_field[2], solver_field[0], g_mu);
  else
    assign_mul_one_pm_imu_inv(solver_field[2], solver_field[0], +1., VOLUME/2);
  
  Hopping_Matrix(OE, solver_field[4], solver_field[2]); 
  /* The sign is plus, since in Hopping_Matrix */
  /* the minus is missing                      */
  assign_mul_add_r(solver_field[4], +1., solver_field[1], VOLUME/2);
  /* Do the inversion with the preconditioned  */
  /* matrix to get the odd sites               */
  gamma5(solver_field[4], solver_field[4], VOLUME/2);
  if(g_c_sw > 0) {
    cg_her(solver_field[3], solver_field[4], Ncy, 1.e-8, 1, 
	   VOLUME/2, &Qsw_pm_psi);
    Qsw_minus_psi(solver_field[3], solver_field[3]);
    
    /* Reconstruct the even sites                */
    Hopping_Matrix(EO, solver_field[2], solver_field[3]);
    assign_mul_one_sw_pm_imu_inv(EE,solver_field[4],solver_field[2], g_mu);
  }
  else {
    cg_her(solver_field[3], solver_field[4], Ncy, 1.e-8, 1, 
	   VOLUME/2, &Qtm_pm_psi);
    Qtm_minus_psi(solver_field[3], solver_field[3]);
    
    /* Reconstruct the even sites                */
    Hopping_Matrix(EO, solver_field[4], solver_field[3]);
    mul_one_pm_imu_inv(solver_field[4], +1., VOLUME/2);
  }
  
  /* The sign is plus, since in Hopping_Matrix */
  /* the minus is missing                      */
  assign_add_mul_r(solver_field[2], solver_field[4], +1., VOLUME/2);
  
  convert_eo_to_lexic(P, solver_field[2], solver_field[3]); 
  g_mu = musave;
  finalize_solver(solver_field, nr_sf);
  return;  
}
Пример #5
0
double rat_acc(const int id, hamiltonian_field_t * const hf) {
  solver_pm_t solver_pm;
  monomial * mnl = &monomial_list[id];
  double atime, etime, dummy;
  atime = gettime();
  // only for non-twisted operators
  g_mu = 0.;
  g_mu3 = 0.;
  boundary(mnl->kappa);
  if(mnl->type == CLOVERRAT) {
    g_c_sw = mnl->c_sw;
    sw_term((const su3**) hf->gaugefield, mnl->kappa, mnl->c_sw); 
    sw_invert(EE, 0.);
  }
  mnl->energy1 = 0.;

  solver_pm.max_iter = mnl->maxiter;
  solver_pm.squared_solver_prec = mnl->accprec;
  solver_pm.no_shifts = mnl->rat.np;
  solver_pm.shifts = mnl->rat.mu;
  solver_pm.type = CGMMS;
  solver_pm.M_psi = mnl->Qsq;
  solver_pm.sdim = VOLUME/2;
  solver_pm.rel_prec = g_relative_precision_flag;
  mnl->iter0 += cg_mms_tm(g_chi_up_spinor_field, mnl->pf,
			  &solver_pm, &dummy);

  // apply R to the pseudo-fermion fields
  assign(mnl->w_fields[0], mnl->pf, VOLUME/2);
  for(int j = (mnl->rat.np-1); j > -1; j--) {
    assign_add_mul_r(mnl->w_fields[0], g_chi_up_spinor_field[j], 
		     mnl->rat.rmu[j], VOLUME/2);
  }

  mnl->energy1 = scalar_prod_r(mnl->pf, mnl->w_fields[0], VOLUME/2, 1);
  etime = gettime();
  if(g_proc_id == 0) {
    if(g_debug_level > 1) {
      printf("# Time for %s monomial acc step: %e s\n", mnl->name, etime-atime);
    }
    if(g_debug_level > 0) { // shoud be 3
      printf("called rat_acc for id %d dH = %1.10e\n", id, mnl->energy1 - mnl->energy0);
    }
  }
  return(mnl->energy1 - mnl->energy0);
}
Пример #6
0
int bicg(spinor * const k, spinor * const l, double eps_sq) {
  int iteration;
  double xxx;
  xxx=0.0;
  gamma5(g_spinor_field[DUM_SOLVER+1], l, VOLUME/2);
  /* main loop */
  for(iteration=1;iteration<=ITER_MAX_BCG;iteration++) {
    /* compute the residual*/
    M_psi(DUM_SOLVER,k,q_off);
    xxx=diff_and_square_norm(g_spinor_field[DUM_SOLVER], g_spinor_field[DUM_SOLVER+1], VOLUME/2);
    /*apply the solver step for the residual*/
    M_psi(DUM_SOLVER+2,DUM_SOLVER,q_off-(2.+2.*q_off));
    assign_add_mul_r(k,-1./((1.+q_off)*(1.+q_off)),g_spinor_field[DUM_SOLVER+2], VOLUME/2);
    if(xxx <= eps_sq) break;
  }

  if(g_proc_id==0) {
    sout = fopen(solvout, "a");
    fprintf(sout, "%d %e %f\n",iteration,xxx, g_mu);
    fclose(sout);
  }

  /* if the geometric series fails, redo with conjugate gradient */
  if(iteration>=ITER_MAX_BCG) {
    if(ITER_MAX_BCG == 0) {
      iteration = 0;
    }
    zero_spinor_field(k,VOLUME/2);
    iteration += solve_cg(k,l,q_off,eps_sq);
    Q_psi(k,k,q_off);
    if(ITER_MAX_BCG != 0) {
      iteration -= 1000000;
    }
    if(g_proc_id == 0) {
      sout = fopen(solvout, "a");
      fprintf(sout, "%d %e\n",iteration, g_mu);
      fclose(sout);
    }
  }
  
  return iteration;
}
Пример #7
0
int bicgstabell(spinor * const x0, spinor * const b, const int max_iter, 
		double eps_sq, const int rel_prec, const int _l, const int N, matrix_mult f) {

  double err;
  int i, j, k, l;
  double rho0, rho1, beta, alpha, omega, gamma0 = 0., squarenorm;
  spinor * r[5], * u[5], * r0_tilde, * x;
  double tau[5][5], gamma[25], gammap[25], gammapp[25], sigma[25];
  spinor ** solver_field = NULL;
  const int nr_sf = 2*(_l+1)+2;

  l = _l;
  k = -l;

  if(N == VOLUME) {
    init_solver_field(&solver_field, VOLUMEPLUSRAND, nr_sf);
  }
  else {
    init_solver_field(&solver_field, VOLUMEPLUSRAND/2, nr_sf);
  }
  r0_tilde = solver_field[0];
  for(i = 0; i <= l; i++){
    r[i] = solver_field[2+2*i];
    u[i] = solver_field[3+2*i];
  }

  x = x0; 
  assign(u[0], b, N);
  f(r0_tilde, x);
  diff(r[0], u[0], r0_tilde, N);
  zero_spinor_field(solver_field[1], N);
  assign(r0_tilde, r[0], N);
  squarenorm = square_norm(b, N, 1);

  rho0 = 1.;
  alpha = 0.;
  omega = 1.;
  err = square_norm(r0_tilde, N, 1);
  while( k < max_iter && (((err > eps_sq) && (rel_prec == 0)) 
			  || ((err > eps_sq*squarenorm) && (rel_prec == 1)) 
			  )) {
    k+=l;

    /* The BiCG part */

    rho0 *= -omega;
    for(j = 0; j < l; j++) {
      rho1 = scalar_prod_r(r[j], r0_tilde, N, 1);
      beta = (rho1/rho0);
      beta *= alpha; 
      rho0 = rho1;
      for(i = 0; i <= j; i++) {
	/* u_i = r_i - \beta u_i */
	assign_mul_add_r(u[i], -beta, r[i], N);
      }
      f(u[j+1], u[j]);
      gamma0 = scalar_prod_r(u[j+1], r0_tilde, N, 1);
      alpha = rho0/gamma0;
      /* r_i = r_i - \alpha u_{i+1} */
      for(i = 0; i <= j; i++) {
	assign_add_mul_r(r[i], u[i+1], -alpha, N);
      }
      f(r[j+1], r[j]);
      /* x = x + \alpha u_0 */
      assign_add_mul_r(x, u[0], alpha, N);
      err = square_norm(r[j+1], N, 1);
      if(g_proc_id == 0 && g_debug_level > 1) {printf("%d %d err = %e\n", k, j, err);fflush(stdout);}
    }

    /* The MR part */

    for(j = 1; j <= l; j++){
      for(i = 1; i < j; i++){
	tau[i][j] = scalar_prod_r(r[j], r[i], N, 1)/sigma[i];
	assign_add_mul_r(r[j], r[i], -tau[i][j], N);
      }
      sigma[j] = scalar_prod_r(r[j], r[j], N, 1);
      gammap[j] = scalar_prod_r(r[0], r[j], N, 1)/sigma[j];
    }
    gamma[l] = gammap[l];
    omega = gamma[l];
    for(j = l-1; j > 0; j--) {
      gamma[j] = gammap[j];
      for(i = j+1; i <= l; i++) {
	gamma[j] -= (tau[j][i]*gamma[i]);
      }
    }
    for(j = 1; j < l; j++) {
      gammapp[j] = gamma[j+1];
      for(i = j+1; i < l; i++){
	gammapp[j] += (tau[j][i]*gamma[i+1]);
      }
    }
    assign_add_mul_r(x, r[0], gamma[1], N);
    assign_add_mul_r(r[0], r[l], -gammap[l], N);
    for(j = 1; j < l; j++){
      assign_add_mul_r(x, r[j], gammapp[j], N);
      assign_add_mul_r(r[0], r[j], -gammap[j], N);
    }
    assign_add_mul_r(u[0], u[l], -gamma[l], N);
    for(j = 1; j < l; j++){
      assign_add_mul_r(u[0], u[j], -gamma[j], N);
    }
    err = square_norm(r[0], N, 1);
    if(g_proc_id == 0 && g_debug_level > 0){
      printf(" BiCGstabell iterated %d %d, %e rho0 = %e, alpha = %e, gamma0= %e\n", l, k, err, rho0, alpha, gamma0);
      fflush( stdout );
    }
  }
  finalize_solver(solver_field, nr_sf);
  if(k == max_iter) return(-1);
  return(k);
}
Пример #8
0
void ndratcor_heatbath(const int id, hamiltonian_field_t * const hf) {
  monomial * mnl = &monomial_list[id];
  double atime, etime, delta;
  spinor * up0, * dn0, * up1, * dn1, * tup, * tdn, * Zup, * Zdn;
  double coefs[6] = {1./4., -3./32., 7./128., -77./2048., 231./8192., -1463./65536.}; // series of (1+x)^(1/4)
  double coefs_check[6] = {1./2., -1./8., 1./16., -5./128., 7./256., -21./1024.}; // series of (1+x)^(1/2)
  atime = gettime();
  nd_set_global_parameter(mnl);
  g_mu3 = 0.;
  mnl->iter0 = 0;
  if(mnl->type == NDCLOVERRATCOR) {
    init_sw_fields();
    sw_term((const su3**)hf->gaugefield, mnl->kappa, mnl->c_sw); 
    sw_invert_nd(mnl->mubar*mnl->mubar - mnl->epsbar*mnl->epsbar);
    copy_32_sw_fields();
  }
  // we measure before the trajectory!
  if((mnl->rec_ev != 0) && (hf->traj_counter%mnl->rec_ev == 0)) {
    if(mnl->type != NDCLOVERRAT) phmc_compute_ev(hf->traj_counter-1, id, &Qtm_pm_ndbipsi);
    else phmc_compute_ev(hf->traj_counter-1, id, &Qsw_pm_ndbipsi);
  }

  // the Gaussian distributed random fields
  mnl->energy0 = 0.;
  random_spinor_field_eo(mnl->pf, mnl->rngrepro, RN_GAUSS);
  mnl->energy0 = square_norm(mnl->pf, VOLUME/2, 1);

  random_spinor_field_eo(mnl->pf2, mnl->rngrepro, RN_GAUSS);
  mnl->energy0 += square_norm(mnl->pf2, VOLUME/2, 1);

  mnl->solver_params.max_iter = mnl->maxiter;
  mnl->solver_params.squared_solver_prec = mnl->accprec;
  mnl->solver_params.no_shifts = mnl->rat.np;
  mnl->solver_params.shifts = mnl->rat.mu;
  mnl->solver_params.type = mnl->solver;
  mnl->solver_params.M_ndpsi = &Qtm_pm_ndpsi;
  mnl->solver_params.M_ndpsi32 = &Qtm_pm_ndpsi_32;    
  if(mnl->type == NDCLOVERRATCOR) {
    mnl->solver_params.M_ndpsi = &Qsw_pm_ndpsi;
    mnl->solver_params.M_ndpsi32 = &Qsw_pm_ndpsi_32;
  }
  mnl->solver_params.sdim = VOLUME/2;
  mnl->solver_params.rel_prec = g_relative_precision_flag;

  // apply B to the random field to generate pseudo-fermion fields
  up0 = mnl->w_fields[0]; dn0 = mnl->w_fields[1];
  up1 = mnl->w_fields[2]; dn1 = mnl->w_fields[3];
  Zup = mnl->w_fields[4]; Zdn = mnl->w_fields[5];

  apply_Z_ndpsi(up0, dn0, mnl->pf, mnl->pf2, id, hf, &(mnl->solver_params));
  // computing correction to energy1
  delta = coefs_check[0]*(scalar_prod_r(mnl->pf, up0, VOLUME/2, 1) + scalar_prod_r(mnl->pf2, dn0, VOLUME/2, 1));
  if(g_debug_level > 2 && g_proc_id == 0)
    printf("# NDRATCOR heatbath: c_%d*(R * Z^%d * R) = %e\n", 1, 1, delta);
  // debug for showing that the old check was giving a smaller delta
  if(g_debug_level > 3) {
    double delta_old = square_norm(up0, VOLUME/2, 1) + square_norm(dn0, VOLUME/2, 1);
    if(g_proc_id == 0) {
      printf("# NDRATCOR old check: || Z^%d * R ||^2 = %e\n", 1, delta_old);
      printf("# NDRATCOR new check: (c_%d*(R * Z^%d * R))^2 = %e\n", 1, 1, delta*delta);
    }
  }

  if(delta*delta > mnl->accprec) {
    assign_add_mul_r(mnl->pf, up0, coefs[0], VOLUME/2);
    assign_add_mul_r(mnl->pf2, dn0, coefs[0], VOLUME/2);
    
    // saving first application
    assign(Zup, up0, VOLUME/2);
    assign(Zdn, dn0, VOLUME/2);
    
    
    for(int i = 2; i < 8; i++) {
      // computing next order correction to energy1
      delta = coefs_check[i-1]*(scalar_prod_r(Zup, up0, VOLUME/2, 1) + scalar_prod_r(Zup, dn0, VOLUME/2, 1)); 
      if(g_debug_level > 2 && g_proc_id == 0)
        printf("# NDRATCOR heatbath: c_%d*(R * Z^%d * R) = %e\n", i, i, delta);
      // debug for showing that the old check was giving a smaller delta
      if(g_debug_level > 3) {
        double delta_old = square_norm(up0, VOLUME/2, 1) + square_norm(dn0, VOLUME/2, 1);
        if(g_proc_id == 0) {
          printf("# NDRATCOR old check: || Z^%d * R ||^2 = %e\n", 1, delta_old);
          printf("# NDRATCOR new check: (c_%d*(R * Z^%d * R))^2 = %e\n", 1, 1, delta*delta);
        }
      }
      if(delta*delta < mnl->accprec) break;

      apply_Z_ndpsi(up1, dn1, up0, dn0, id, hf, &(mnl->solver_params));
      
      assign_add_mul_r(mnl->pf, up1, coefs[i-1], VOLUME/2);
      assign_add_mul_r(mnl->pf2, dn1, coefs[i-1], VOLUME/2);

      tup = up0; tdn = dn0;
      up0 = up1; dn0 = dn1;
      up1 = tup; dn1 = tdn;
    }
  }
  etime = gettime();
  if(g_proc_id == 0) {
    if(g_debug_level > 1) {
      printf("# Time for %s monomial heatbath: %e s\n", mnl->name, etime-atime);
    }
    if(g_debug_level > 3) { 
      printf("called ndratcor_heatbath for id %d energy %f\n", id, mnl->energy0);
    }
  }
  return;
}
Пример #9
0
// computes ||(1 - C^dagger R C) phi||
void check_C_ndpsi(spinor * const k_up, spinor * const k_dn,
		   spinor * const l_up, spinor * const l_dn,
		   const int id, hamiltonian_field_t * const hf,
		   solver_params_t * solver_params) {
  monomial * mnl = &monomial_list[id];
  mnl->iter0 = solve_mms_nd(g_chi_up_spinor_field, g_chi_dn_spinor_field,
			     l_up, l_dn, solver_params);

  assign(k_up, l_up, VOLUME/2);
  assign(k_dn, l_dn, VOLUME/2);

  // apply C to the random field to generate pseudo-fermion fields
  for(int j = (mnl->rat.np-1); j > -1; j--) {
    // Q_h * tau^1 - i nu_j
    // this needs phmc_Cpol = 1 to work!
    if(mnl->type == NDCLOVERRATCOR || mnl->type == NDCLOVERRAT) {
      Qsw_tau1_sub_const_ndpsi(g_chi_up_spinor_field[mnl->rat.np], g_chi_dn_spinor_field[mnl->rat.np],
			       g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j], 
			       I*mnl->rat.nu[j], 1., mnl->EVMaxInv);
    }
    else {
      Q_tau1_sub_const_ndpsi(g_chi_up_spinor_field[mnl->rat.np], g_chi_dn_spinor_field[mnl->rat.np],
			     g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j], 
			     I*mnl->rat.nu[j], 1., mnl->EVMaxInv);
    }
    assign_add_mul(k_up, g_chi_up_spinor_field[mnl->rat.np], I*mnl->rat.rnu[j], VOLUME/2);
    assign_add_mul(k_dn, g_chi_dn_spinor_field[mnl->rat.np], I*mnl->rat.rnu[j], VOLUME/2);
  }
  //apply R
  solver_params->shifts = mnl->rat.mu;
  solve_mms_nd(g_chi_up_spinor_field, g_chi_dn_spinor_field,
	       k_up, k_dn,
	       solver_params);
  for(int j = (mnl->rat.np-1); j > -1; j--) {
    assign_add_mul_r(k_up, g_chi_up_spinor_field[j], 
		     mnl->rat.rmu[j], VOLUME/2);
    assign_add_mul_r(k_dn, g_chi_dn_spinor_field[j], 
		     mnl->rat.rmu[j], VOLUME/2);
  }
  // apply C^dagger
  solver_params->shifts = mnl->rat.nu;
  solve_mms_nd(g_chi_up_spinor_field, g_chi_dn_spinor_field,
	       k_up, k_dn, solver_params);
  for(int j = (mnl->rat.np-1); j > -1; j--) {
    // Q_h * tau^1 + i nu_j
    if(mnl->type == NDCLOVERRATCOR || mnl->type == NDCLOVERRAT) {
      Qsw_tau1_sub_const_ndpsi(g_chi_up_spinor_field[mnl->rat.np], g_chi_dn_spinor_field[mnl->rat.np],
			     g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j], 
			     -I*mnl->rat.nu[j], 1., mnl->EVMaxInv);
    }
    else {
      Q_tau1_sub_const_ndpsi(g_chi_up_spinor_field[mnl->rat.np], g_chi_dn_spinor_field[mnl->rat.np],
			     g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j], 
			     -I*mnl->rat.nu[j], 1., mnl->EVMaxInv);
    }
    assign_add_mul(k_up, g_chi_up_spinor_field[mnl->rat.np], -I*mnl->rat.rnu[j], VOLUME/2);
    assign_add_mul(k_dn, g_chi_dn_spinor_field[mnl->rat.np], -I*mnl->rat.rnu[j], VOLUME/2);
  }
  diff(k_up, k_up, l_up, VOLUME/2);  
  diff(k_dn, k_dn, l_dn, VOLUME/2);  
  double resi = square_norm(k_up, VOLUME/2, 1);
  resi += square_norm(k_dn, VOLUME/2, 1);
  if(g_proc_id == 0) printf("|| (1-C^dagger R C)*phi|| = %e\n", resi);

  return;
}
Пример #10
0
void eigcg(int n, int lde, spinor * const x, spinor * const b, double *normb, 
           const double eps_sq, double restart_eps_sq, const int rel_prec, int maxit, int *iter, 
           double *reshist, int *flag, spinor **work, matrix_mult f, 
           int nev, int v_max, spinor *V, int esize, _Complex double *ework)
{
  double tolb;        
  double alpha, beta; /* CG scalars */
  double rho, rhoprev;
  double pAp;
  int it;   /* current iteration number */
  int i, j; /* loop variables */
  int zs,ds,tmpsize;
  spinor *r, *p, *Ap;   /* ptrs in work for CG vectors */
  _Complex double tempz;        /* double precision complex temp var */
  double tempd;         /* double temp var */
  int tempi;            /* int temp var */
  int ONE = 1;          /* var for passing 1 into BLAS routines */
  /*----------------------------------------------------------------------
         Eigen variables and setup    
    ----------------------------------------------------------------------*/
  /* Some constants */
  char cR = 'R'; char cL = 'L'; char cN ='N'; 
  char cV = 'V'; char cU = 'U'; char cC ='C';
  double betaprev, alphaprev;     /* remember the previous iterations scalars */
  int v_size;                     /* tracks the size of V */
  int lwork = 3*v_max;            /* the size of zwork */
  spinor *Ap_prev;
  void *_h;     
  _Complex double *H;         /* the V'AV projection matrix */
  void *_hevecs;
  _Complex double *Hevecs;    /* the eigenvectors of H */
  void *_hevecsold;
  _Complex double *Hevecsold; /* the eigenvectors of H(v_max-1,v_max-1) */
  void *_hevals;
  double    *Hevals;    /* the eigenvalues of H */
  void *_hevalsold;
  double    *Hevalsold; /* the eigenvalues of H(m-1,m-1) */
  void *_tau;
  _Complex double *TAU;	         
  void *_zwork;
  _Complex double *zwork;        /* double complex work array needed by zheev */
  void *_rwork;
  double *rwork;        /* double work array needed by zheev */

  int parallel;
  
  double tmpd;
  _Complex double tmpz;

  zs = sizeof(_Complex double);  
  ds = sizeof(double);

  int info, allelems = v_max*v_max;
  
#ifdef MPI
  parallel=1;
#else
  parallel=0;
#endif

  if(nev > 0)   /*allocate memory only if eigenvalues will be used */
  {
    #if (defined SSE || defined SSE2 || defined SSE3)
    if ((_h = calloc(v_max*v_max+ALIGN_BASE,zs)) == NULL)
    {
      if( g_proc_id == g_stdio_proc) 
      {fprintf(stderr,"ERROR Could not allocate H\n"); exit(1);}  
    }
    else
      H = (_Complex double *)(((unsigned long int)(_h)+ALIGN_BASE)&~ALIGN_BASE);
  
  
    if ((_hevecs = calloc(v_max*v_max+ALIGN_BASE,zs)) == NULL)
    {
      if( g_proc_id == g_stdio_proc ) 
      {fprintf(stderr, "ERROR Could not allocate Hevecs\n"); exit(1);}
    }else
      Hevecs = (_Complex double *)(((unsigned long int)(_hevecs)+ALIGN_BASE)&~ALIGN_BASE);
  
    if ((_hevecsold = calloc(v_max*v_max+ALIGN_BASE,zs)) == NULL)
    {
      if( g_proc_id == g_stdio_proc ) 
        {fprintf(stderr, "ERROR Could not allocate Hevecsold\n"); exit(1);}  
    }else
      Hevecsold = (_Complex double *)(((unsigned long int)(_hevecsold)+ALIGN_BASE)&~ALIGN_BASE);
  
    if ((_hevals = calloc(v_max+ALIGN_BASE,ds)) == NULL)
    {
      if( g_proc_id == g_stdio_proc) 
        {fprintf(stderr, "ERROR Could not allocate Hevals\n"); exit(1);}
    
    }else
      Hevals = (double *)(((unsigned long int)(_hevals)+ALIGN_BASE)&~ALIGN_BASE);
  
    if ((_hevalsold = calloc(v_max+ALIGN_BASE,ds)) == NULL) 
    {
      if( g_proc_id == g_stdio_proc)
        {fprintf(stderr, "ERROR Could not allocate Hevalsold\n"); exit(1); }
    
    }else
      Hevalsold = (double *)(((unsigned long int)(_hevalsold)+ALIGN_BASE)&~ALIGN_BASE);
  
    if ((_tau = calloc(2*nev+ALIGN_BASE,zs)) == NULL)  
    {
      if( g_proc_id == g_stdio_proc ) 
        {fprintf(stderr, "ERROR Could not allocate TAU\n"); exit(1); }
    
    }else
      TAU = (_Complex double *)(((unsigned long int)(_tau)+ALIGN_BASE)&~ALIGN_BASE);
  
    if ((_zwork = calloc(lwork+ALIGN_BASE,zs)) == NULL)   
    {
      if( g_proc_id == g_stdio_proc)
      {fprintf(stderr, "ERROR Could not allocate zwork\n"); exit(1);}
    
    }else
      zwork = (_Complex double *)(((unsigned long int)(_zwork)+ALIGN_BASE)&~ALIGN_BASE);
  
    if ((_rwork = calloc(3*v_max+ALIGN_BASE,ds)) == NULL) 
    {
      if( g_proc_id == g_stdio_proc)
        {fprintf(stderr, "ERROR Could not allocate rwork\n"); exit(1);}
    
    }else
      rwork = (double *)(((unsigned long int)(_rwork)+ALIGN_BASE)&~ALIGN_BASE);
  
    #else
  
    if ((H = (_Complex double *) calloc(v_max*v_max, zs)) == NULL)
    {
      if( g_proc_id == g_stdio_proc) 
        {fprintf(stderr, "ERROR Could not allocate H\n"); exit(1);}
    }

    if ((Hevecs = (_Complex double *) calloc(v_max*v_max, zs)) == NULL)
    {
      if( g_proc_id == g_stdio_proc ) 
        {fprintf(stderr, "ERROR Could not allocate Hevecs\n"); exit(1);}
    }

    if ((Hevecsold = (_Complex double *) calloc(v_max*v_max, zs)) == NULL)
    {
      if( g_proc_id == g_stdio_proc ) 
      {fprintf(stderr, "ERROR Could not allocate Hevecsold\n"); exit(1);}
    }

    if ((Hevals = (double *) calloc(v_max, ds)) == NULL)
    {
      if( g_proc_id == g_stdio_proc) 
        {fprintf(stderr, "ERROR Could not allocate Hevals\n"); exit(1);}
    }
     

    if ((Hevalsold = (double *) calloc(v_max, ds)) == NULL) 
    {
      if( g_proc_id == g_stdio_proc)
        {fprintf(stderr, "ERROR Could not allocate Hevalsold\n"); exit(1); }
    }


    if ((TAU = (_Complex double *) calloc(2*nev, zs)) == NULL)
    {
      if( g_proc_id == g_stdio_proc ) 
       {fprintf(stderr, "ERROR Could not allocate TAU\n"); exit(1); }
    
    }
  
  
    if ((zwork = (_Complex double *) calloc(lwork, zs)) == NULL) 
    {
      if( g_proc_id == g_stdio_proc)
      {fprintf(stderr, "ERROR Could not allocate zwork\n"); exit(1);}
    
    }
  
    if ((rwork = (double *) calloc(3*v_max, ds)) == NULL) 
    {
      if( g_proc_id == g_stdio_proc)
      {fprintf(stderr, "ERROR Could not allocate rwork\n"); exit(1);}
    
    }

    #endif 
  } /* end if (nev > 0) */  

  /*----------------------------------------------------------------------*/

  /* setup pointers into work */
  r = work[0];
  p = work[1];
  Ap = work[2];
  Ap_prev = work[3];
  


  /*--------------------------------------------------------------------
     Initialization phase 
    --------------------------------------------------------------------*/
  
  if (*flag != 3) 
  {
    
    /* If flag == 3, the eigCG is called after restart with the same b 
     * whose norm is already known in normb, so no need for these    */
    
    tempd = square_norm(b,n,parallel); /* Norm of rhs, b */
    *normb = sqrt(tempd);

    /* If right hand side is zero return zero solution. ITER stays the same */
    if (*normb == 0.0) 
    {
      for (i=0; i<n; i++) 
      {
	_vector_null(x[i].s0);
        _vector_null(x[i].s1);
        _vector_null(x[i].s2);
        _vector_null(x[i].s3);
      }       
    
      *flag = 0;		
      *reshist = 0.0;
      if( g_debug_level > 0 && g_proc_id == g_stdio_proc)
        displayInfo(eps_sq,maxit,*flag,*iter,*reshist);
      return;
     }
     
  }
  
  /* Set up for the method */
  *flag = 1;
  tolb = eps_sq * (*normb)*(*normb);	/* Relative to b tolerance */

  /* Zero-th residual: r = b - A*x  */
  f(r,x);
  diff(r,b,r,n);
  
  rho = 0.0;
  alpha = 1.0;
  beta = 0.0;
  v_size = 0;

  double reshist_init=square_norm(r,n,parallel);

  //if( g_proc_id == g_stdio_proc )
    //fprintf(stdout, "reshist init %f\n", reshist_init);
  
  /*--------------------------------------------------------------------
     main CG loop
    --------------------------------------------------------------------*/
  for (it = 0; it < maxit; it++) {
   
    rhoprev = rho;
    rho=square_norm(r,n,parallel);
    *reshist = rho;
    if ( (g_debug_level > 2) && (g_proc_id == g_stdio_proc) )
    { fprintf(stdout, " Linsys res( %d ): %g\n",*iter+it,*reshist); fflush(stdout); }

    /* Convergence test */
    if ( ( (*reshist < eps_sq) && (rel_prec==0) ) || ( (*reshist < eps_sq*(*normb)*(*normb)) && (rel_prec ==1 ) )   ) 
    { 
       *flag = 0;
       break;  /* break do not return */
    }
    
    /* Restart test */
    if(nev==0)
    {
       if (*reshist < (restart_eps_sq*reshist_init) ) 
       {  
           *flag = 3;
            break;  /* break do not return */
       }
    }

    if (it == 0)
      assign(p,r,n);
    else {
      betaprev = beta;
      beta = rho / rhoprev;
      if (beta == 0.0) {
	       *flag = 2;
	       break;
      }
      assign_mul_add_r(p,beta,r,n); /* p = beta*p + r */
    }

    /*----- eigCG specific code -------------------------------------------*/
    /* Remember Ap from previous iteration to be used at restart */
    if (nev > 0 && v_size == v_max)
      assign(Ap_prev,Ap,n); 
    /*---------------------------------------------------------------------*/

    f(Ap,p);

    /*----- eigCG specific code -------------------------------------------*/
    if (nev > 0) {
      /* record the diagonal vAv for the previous vector */
      if (it > 0) {
	H[(v_size-1)*v_max+v_size-1]= 1.0/alpha + betaprev/alphaprev;
	//H[(v_size-1)*v_max+v_size-1].im = 0.0;
      }
      
      /* Restarting V */
      if (v_size == v_max) {
	/* Solve (v_max) and (v_max-1) eigenproblems */
	tempi = v_max;
	allelems=v_max*v_max;
	_FT(zcopy)(&allelems, H, &ONE, Hevecs, &ONE);
	_FT(zheev)(&cV,&cU,&tempi,Hevecs,&v_max,Hevals,zwork,&lwork,rwork,&info,1,1);
	if( (info != 0 ) && (g_proc_id==g_stdio_proc))
	{fprintf(stderr, "Error: ZHEEV in eigcg at v_max step, info %d\n",info); exit(1);}
	
	tempi = v_max-1;
	_FT(zcopy)(&allelems, H, &ONE, Hevecsold, &ONE);
	_FT(zheev)(&cV,&cU,&tempi,Hevecsold,&v_max,Hevalsold,zwork,&lwork,rwork,&info,1,1);
	       
	if( (info != 0 ) && (g_proc_id==g_stdio_proc))
	{fprintf(stderr, "Error: ZHEEV in eigcg at (v_max-1) step, info %d\n",info); exit(1);}
	       
	
	/* fill 0s in vmax-th elem of oldevecs to match Hevecs */
	for(i=1; i <= v_max ; i++)
	{Hevecsold[i*v_max-1] = 0.0 ;}

	/* Attach the first nev oldevecs at the end of the nev latest ones */
	tempi = nev*v_max;
	_FT(zcopy)(&tempi,Hevecsold,&ONE,&Hevecs[tempi],&ONE);

        /* Orthogonalize the 2*nev (new+old) vectors Hevecs=QR */
	v_size = 2*nev; 
	_FT(zgeqrf)(&v_max,&v_size,Hevecs,&v_max,TAU,zwork,&lwork,&info) ;
 
	if( (info != 0 ) && (g_proc_id==g_stdio_proc))
	{fprintf(stderr, "Error: ZGEQRF in eigcg info %d\n",info); exit(1);}
	
	/* use as a temp space Hevecsold = Q^THQ */
	_FT(zcopy)(&allelems,H,&ONE,Hevecsold,&ONE); 
	_FT(zunmqr)(&cR,&cN,&v_max,&v_max,&v_size,Hevecs,&v_max,
		               TAU,Hevecsold,&v_max,zwork,&lwork,&info);
	
	if( (info != 0 ) && (g_proc_id==g_stdio_proc))
	{fprintf(stderr, "Error: ZGEQRF call 1 in eigcg info %d\n",info); exit(1);}
	
	_FT(zunmqr)(&cL,&cC,&v_max,&v_size,&v_size,Hevecs,&v_max,
		               TAU,Hevecsold,&v_max,zwork,&lwork,&info);
	
	if( (info != 0 ) && (g_proc_id==g_stdio_proc))
	{fprintf(stderr, "Error: ZGEQRF call 2 in eigcg info %d\n",info); exit(1);}

        /* solve the small Hevecsold v_size x v_size eigenproblem */
	_FT(zheev)(&cV,&cU,&v_size,Hevecsold,&v_max,Hevals, zwork,&lwork,rwork,&info,1,1);
	if( (info != 0 ) && (g_proc_id==g_stdio_proc))
	{fprintf(stderr, "Error: ZHEEV in eigcg info %d\n",info); exit(1);}



	/* zero out unused part of eigenectors in Hevecsold */
	tempi = 0;
	for(i = 0; i < v_size; i++ ) 
	{
	  for(j = v_size; j < v_max; j++)
	  {Hevecsold[tempi + j]=0.0;}
	  tempi += v_max;
	  
	}


	/* Compute the Hevecsold = Hevecs*Hevecsold */
	_FT(zunmqr)(&cL,&cN,&v_max,&v_size,&v_size,Hevecs,&v_max,
		               TAU,Hevecsold,&v_max,zwork,&lwork,&info);

	           
	if( (info != 0 ) && (g_proc_id==g_stdio_proc))
	{fprintf(stderr, "Error: ZUNMQR, info %d\n",info); exit(1);}   
	      
	  
	/* Restart V = V(n,v_max)*Hevecsold(v_max,v_size) */
	Zrestart_X((_Complex double *) V, 12*lde, Hevecsold, 12*n, v_max, v_size, ework, esize); 
	
	/* Restart H = diag(Hevals) plus a column and a row */
	for (i = 0; i < allelems; i++ )  {H[i] = 0.0; }
    	for (i = 0; i < v_size; i++) H[i*(v_max+1)]= Hevals[i];

	 
	  
        /* The next residual to be added (v = r/sqrt(rho)) 
     	 * needs the (nev+1)-th column and row, through V(:,1:vs)'*A*v. 
	 * Instead of a matvec, we use the Ap and Ap_prev to obtain this:
	 * V(:,1:vs)'*A*V(:,vs+1) = V(:,1:vs)'*A*r/sqrt(rho) = 
	 * V'(A(p-beta*p_prev))/sqrt(rho) = V'(Ap - beta*Ap_prev)/sqrt(rho)*/
	  
	tmpd=-beta;
	assign_mul_add_r(Ap_prev,tmpd,Ap,n);   /* Ap_prev=Ap-beta*Ap_prev */
	  
	tempi=v_size*v_max;
	for (i=0; i<v_size; i++){
	  tmpz=scalar_prod(&V[i*lde],Ap_prev,n,parallel);
	  H[v_size+i*v_max]=tmpz/sqrt(rho);
	  H[i+tempi]=conj(tmpz)/sqrt(rho);
	}
	
      } /* end of if v_size == v_max */
      else 
      {
	/* update (vs+1,vs),(vs,vs+1) elements of tridigonal which are real*/
        if ( it > 0) 
	{
	  H[(v_size-1)*v_max + v_size]= -sqrt(beta)/alpha;
	  H[v_size*v_max + v_size-1] = creal(H[(v_size-1)*v_max + v_size]);
	}
	
      } /* of else */
      /* Augment V with the current CG residual r normalized by sqrt(rho) */

      tmpd=1.0/sqrt(rho);
      mul_r(&V[v_size*lde],tmpd,r,n);
      v_size++;
    } /* end of if nev >0 , ie., the eigCG specific code */
    /*---------------------------------------------------------------------*/

    /* pAp = p' * Ap */
    tempz=scalar_prod(p,Ap,n,parallel);
    pAp = creal(tempz);
    if (pAp == 0.0) {
      *flag = 2;
      break;
    } 

    alphaprev = alpha;
    alpha = rho / pAp;
    
    assign_add_mul_r(x,p,alpha,n);  /*update x*/
    tmpd=-alpha;
    assign_add_mul_r(r,Ap,tmpd,n);   /*update r*/
    
    //next line useful for debugging
    //printf("%d beta, alpha, rho, pAp %le %le %le %le\n",it,beta,alpha,rho,pAp);
  } /* for it = 0 : maxit-1 */
  
  *iter = *iter + it+1; /* record the number of CG iterations plus any older */
  if( g_proc_id == g_stdio_proc && g_debug_level > 0)
    displayInfo(eps_sq,maxit,*flag,*iter-1,*reshist);

  
  if(nev > 0 )
  {
    #if (defined SSE || defined SSE2 || defined SSE3)
    H= NULL;
    free(_h);
    Hevecs=NULL;
    free(_hevecs);
    Hevecsold=NULL;
    free(_hevecsold);
    Hevals=NULL;
    free(_hevals);
    Hevalsold=NULL;
    free(_hevalsold);
    TAU=NULL;
    free(_tau);
    zwork=NULL;
    free(_zwork);
    rwork=NULL;
    free(_rwork);
    #else
    free(H);
    free(Hevecs);
    free(Hevecsold);
    free(Hevals);
    free(Hevalsold);
    free(TAU);
    free(zwork);
    free(rwork);
    #endif
  }

 return;
} 
Пример #11
0
int invert_doublet_eo(spinor * const Even_new_s, spinor * const Odd_new_s, 
                      spinor * const Even_new_c, spinor * const Odd_new_c,
                      spinor * const Even_s, spinor * const Odd_s,
                      spinor * const Even_c, spinor * const Odd_c,
                      const double precision, const int max_iter,
                      const int solver_flag, const int rel_prec, 
                      solver_params_t solver_params, const ExternalInverter external_inverter, 
                      const SloppyPrecision sloppy, const CompressionType compression) {

  int iter = 0;

#ifdef TM_USE_QUDA
  if( external_inverter==QUDA_INVERTER ) {
    return invert_doublet_eo_quda( Even_new_s, Odd_new_s, Even_new_c, Odd_new_c,
                                   Even_s, Odd_s, Even_c, Odd_c,
                                   precision, max_iter,
                                   solver_flag, rel_prec, 1,
                                   sloppy, compression );
  }
#endif
  
#ifdef HAVE_GPU
#  ifdef TEMPORALGAUGE
  if (usegpu_flag) {
    gtrafo_eo_nd(Even_s, Odd_s, Even_c, Odd_c, 
                 (spinor*const)NULL, (spinor*const)NULL, (spinor*const)NULL, (spinor*const)NULL, 
                 GTRAFO_APPLY);    
  } 
#  endif  
#endif /* HAVE_GPU*/


  /* here comes the inversion using even/odd preconditioning */
  if(g_proc_id == 0) {printf("# Using even/odd preconditioning!\n"); fflush(stdout);}
  M_ee_inv_ndpsi(Even_new_s, Even_new_c, 
                 Even_s, Even_c,
                 g_mubar, g_epsbar);
  Hopping_Matrix(OE, g_spinor_field[DUM_DERI], Even_new_s);
  Hopping_Matrix(OE, g_spinor_field[DUM_DERI+1], Even_new_c);
  
  /* The sign is plus, since in Hopping_Matrix */
  /* the minus is missing                      */
  assign_mul_add_r(g_spinor_field[DUM_DERI], +1., Odd_s, VOLUME/2);
  assign_mul_add_r(g_spinor_field[DUM_DERI+1], +1., Odd_c, VOLUME/2);
  
  /* Do the inversion with the preconditioned  */
  /* matrix to get the odd sites               */
  
  /* Here we invert the hermitean operator squared */
  
  if(g_proc_id == 0) {
    printf("# Using CG for TMWILSON flavour doublet!\n"); 
    fflush(stdout);
  }
  if ( external_inverter == NO_EXT_INV ){
    gamma5(g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI], VOLUME/2);
    gamma5(g_spinor_field[DUM_DERI+1], g_spinor_field[DUM_DERI+1], VOLUME/2);
  
#ifdef HAVE_GPU
    if (usegpu_flag) {    // GPU, mixed precision solver
#    if ( defined TM_USE_MPI  && defined PARALLELT )
      iter = mixedsolve_eo_nd(Odd_new_s, Odd_new_c, g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1],
                              max_iter, precision, rel_prec);
#    elif ( !defined TM_USE_MPI  && !defined PARALLELT )
      iter = mixedsolve_eo_nd(Odd_new_s, Odd_new_c, g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1],
                              max_iter, precision, rel_prec);
#    else
      printf("MPI and/or PARALLELT are not appropriately set for the GPU implementation. Aborting...\n");
      exit(-1);
#    endif
    }
    else {                // CPU, conjugate gradient
      iter = cg_her_nd(Odd_new_s, Odd_new_c, g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1],
                       max_iter, precision, rel_prec, 
                       VOLUME/2, &Qtm_pm_ndpsi);
    }
#else                   // CPU, conjugate gradient
    if(solver_flag == RGMIXEDCG){
      iter = rg_mixed_cg_her_nd(Odd_new_s, Odd_new_c, g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1],
                                solver_params, max_iter, precision, rel_prec, VOLUME/2,
                                &Qtm_pm_ndpsi, &Qtm_pm_ndpsi_32);
    } 
    else {
      iter = cg_her_nd(Odd_new_s, Odd_new_c, g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1],
                       max_iter, precision, rel_prec, VOLUME/2, &Qtm_pm_ndpsi);
    }
#endif
    Qtm_dagger_ndpsi(Odd_new_s, Odd_new_c,
                     Odd_new_s, Odd_new_c);
  } // if(NO_EXT_INV)
#ifdef TM_USE_QPHIX
  else if (external_inverter == QPHIX_INVERTER ) {
    // using QPhiX, we invert M M^dagger y = b, so we don't need gamma_5 multiplications
    iter = invert_eo_qphix_twoflavour(Odd_new_s, Odd_new_c, g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1],
                                      max_iter, precision, solver_flag, rel_prec,
                                      solver_params, sloppy, compression);
    // and it multiplies y internally by M^dagger, returning M^{-1} b as required
  }
#endif // TM_USE_QPHIX

  /* Reconstruct the even sites                */
  Hopping_Matrix(EO, g_spinor_field[DUM_DERI], Odd_new_s);
  Hopping_Matrix(EO, g_spinor_field[DUM_DERI+1], Odd_new_c);
  M_ee_inv_ndpsi(g_spinor_field[DUM_DERI+2], g_spinor_field[DUM_DERI+3],
                 g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1],
                 g_mubar, g_epsbar);
  
  /* The sign is plus, since in Hopping_Matrix */
  /* the minus is missing                      */
  assign_add_mul_r(Even_new_s, g_spinor_field[DUM_DERI+2], +1., VOLUME/2);
  assign_add_mul_r(Even_new_c, g_spinor_field[DUM_DERI+3], +1., VOLUME/2);
  
  
#ifdef HAVE_GPU  
  /* return from temporal gauge again */
#  ifdef TEMPORALGAUGE
  if (usegpu_flag) { 
    gtrafo_eo_nd(Even_s, Odd_s, Even_c, Odd_c, Even_new_s, Odd_new_s, Even_new_c, Odd_new_c,
                 GTRAFO_REVERT);
  }
#  endif
#endif
  return(iter);
}
Пример #12
0
int invert_cloverdoublet_eo(spinor * const Even_new_s, spinor * const Odd_new_s, 
                      spinor * const Even_new_c, spinor * const Odd_new_c,
                      spinor * const Even_s, spinor * const Odd_s,
                      spinor * const Even_c, spinor * const Odd_c,
                      const double precision, const int max_iter,
                      const int solver_flag, const int rel_prec, solver_params_t solver_params,
                      const ExternalInverter external_inverter, const SloppyPrecision sloppy, const CompressionType compression) {
  
  int iter = 0;

#ifdef TM_USE_QUDA
  if( external_inverter==QUDA_INVERTER ) {
    return invert_doublet_eo_quda( Even_new_s, Odd_new_s, Even_new_c, Odd_new_c,
                                   Even_s, Odd_s, Even_c, Odd_c,
                                   precision, max_iter,
                                   solver_flag, rel_prec, 1,
                                   sloppy, compression );
  }
#endif
  
  /* here comes the inversion using even/odd preconditioning */
  if(g_proc_id == 0) {printf("# Using even/odd preconditioning!\n"); fflush(stdout);}
  Msw_ee_inv_ndpsi(Even_new_s, Even_new_c, 
                  Even_s, Even_c);
  Hopping_Matrix(OE, g_spinor_field[DUM_DERI], Even_new_s);
  Hopping_Matrix(OE, g_spinor_field[DUM_DERI+1], Even_new_c);
  
  /* The sign is plus, since in Hopping_Matrix */
  /* the minus is missing                      */
  assign_mul_add_r(g_spinor_field[DUM_DERI], +1., Odd_s, VOLUME/2);
  assign_mul_add_r(g_spinor_field[DUM_DERI+1], +1., Odd_c, VOLUME/2);  
  if( external_inverter == NO_EXT_INV ){    
    /* Do the inversion with the preconditioned  */
    /* matrix to get the odd sites               */
    
    /* Here we invert the hermitean operator squared */
    
    if(g_proc_id == 0) {
      printf("# Using CG for TMWILSON flavour doublet!\n"); 
      fflush(stdout);
    }
    gamma5(g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI], VOLUME/2);
    gamma5(g_spinor_field[DUM_DERI+1], g_spinor_field[DUM_DERI+1], VOLUME/2);
    
    if(solver_flag == RGMIXEDCG){
      iter = rg_mixed_cg_her_nd(Odd_new_s, Odd_new_c, g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1],
                                solver_params, max_iter, precision, rel_prec, VOLUME/2,
                                &Qsw_pm_ndpsi, &Qsw_pm_ndpsi_32);
    } else {
      iter = cg_her_nd(Odd_new_s, Odd_new_c, g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1],
                                  max_iter, precision, rel_prec, 
                                  VOLUME/2, &Qsw_pm_ndpsi);
    }
    
    Qsw_dagger_ndpsi(Odd_new_s, Odd_new_c,
                    Odd_new_s, Odd_new_c);
  } // if(NO_EXT_INV)
#ifdef TM_USE_QPHIX
  else if (external_inverter == QPHIX_INVERTER ) {
    // using QPhiX, we invert M M^dagger y = b, so we don't need gamma_5 multiplications
    iter = invert_eo_qphix_twoflavour(Odd_new_s, Odd_new_c, g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1],
                                      max_iter, precision, solver_flag, rel_prec,
                                      solver_params, sloppy, compression);
    // and it multiplies y internally by M^dagger, returning M^{-1} b as required
  }
#endif // TM_USE_QPHIX
  
  /* Reconstruct the even sites                */
  Hopping_Matrix(EO, g_spinor_field[DUM_DERI], Odd_new_s);
  Hopping_Matrix(EO, g_spinor_field[DUM_DERI+1], Odd_new_c);
  Msw_ee_inv_ndpsi(g_spinor_field[DUM_DERI+2], g_spinor_field[DUM_DERI+3],
                   g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1]);
  
  /* The sign is plus, since in Hopping_Matrix */
  /* the minus is missing                      */
  assign_add_mul_r(Even_new_s, g_spinor_field[DUM_DERI+2], +1., VOLUME/2);
  assign_add_mul_r(Even_new_c, g_spinor_field[DUM_DERI+3], +1., VOLUME/2);
  
  return(iter);
}
Пример #13
0
int invert_clover_eo(spinor * const Even_new, spinor * const Odd_new,
                     spinor * const Even, spinor * const Odd,
                     const double precision, const int max_iter,
                     const int solver_flag, const int rel_prec,solver_params_t solver_params,
                     su3 *** gf, matrix_mult Qsq, matrix_mult Qm) {
    int iter;

    if(g_proc_id == 0 && g_debug_level > 0) {
        printf("# Using even/odd preconditioning!\n");
        fflush(stdout);
    }

    assign_mul_one_sw_pm_imu_inv(EE, Even_new, Even, +g_mu);

    Hopping_Matrix(OE, g_spinor_field[DUM_DERI], Even_new);
    /* The sign is plus, since in Hopping_Matrix */
    /* the minus is missing                      */
    assign_mul_add_r(g_spinor_field[DUM_DERI], +1., Odd, VOLUME/2);
    /* Do the inversion with the preconditioned  */
    /* matrix to get the odd sites               */

    /* Here we invert the hermitean operator squared */
    gamma5(g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI], VOLUME/2);
    if(g_proc_id == 0) {
        //printf("# Using CG!\n");
        printf("# mu = %f, kappa = %f, csw = %f\n",
               g_mu/2./g_kappa, g_kappa, g_c_sw);
        fflush(stdout);
    }

    if(solver_flag == CG) {
        if(g_proc_id == 0) {
            printf("# Using CG!\n");
            fflush(stdout);
        }
        iter = cg_her(Odd_new, g_spinor_field[DUM_DERI], max_iter,
                      precision, rel_prec,
                      VOLUME/2, Qsq);
        Qm(Odd_new, Odd_new);
    } else if(solver_flag == INCREIGCG) {

        if(g_proc_id == 0) {
            printf("# Using Incremental Eig-CG!\n");
            fflush(stdout);
        }
        iter = incr_eigcg(VOLUME/2,solver_params.eigcg_nrhs, solver_params.eigcg_nrhs1, Odd_new, g_spinor_field[DUM_DERI], solver_params.eigcg_ldh, Qsq,
                          solver_params.eigcg_tolsq1, solver_params.eigcg_tolsq, solver_params.eigcg_restolsq , solver_params.eigcg_rand_guess_opt,
                          rel_prec, max_iter, solver_params.eigcg_nev, solver_params.eigcg_vmax);
        Qm(Odd_new, Odd_new);

    } else {
        if(g_proc_id == 0) {
            printf("# This solver is not available for this operator. Exisiting!\n");
            fflush(stdout);
        }
        return 0;
    }


    /* Reconstruct the even sites                */
    Hopping_Matrix(EO, g_spinor_field[DUM_DERI], Odd_new);
    clover_inv(g_spinor_field[DUM_DERI], +1, g_mu);
    /* The sign is plus, since in Hopping_Matrix */
    /* the minus is missing                      */
    assign_add_mul_r(Even_new, g_spinor_field[DUM_DERI], +1., VOLUME/2);

    return(iter);
}
Пример #14
0
/*lambda: largest eigenvalue, k eigenvector */
int evamax(double *rz, int k, double q_off, double eps_sq) {
  static double ritz,norm0,normg,normg0,beta_cg;
  static double costh,sinth,cosd,sind,aaa,normp,xxx;
  static double xs1,xs2,xs3;
  int iteration;
  /* Initialize k to be gaussian */
  random_spinor_field(g_spinor_field[k], VOLUME/2);
  norm0=square_norm(g_spinor_field[k], VOLUME/2, 1); 
  /*normalize k */
  assign_mul_bra_add_mul_r( g_spinor_field[k], 1./sqrt(norm0),0., g_spinor_field[k], VOLUME/2);
  Q_psi(DUM_SOLVER,k,q_off);
  Q_psi(DUM_SOLVER,DUM_SOLVER,q_off);
  /*compute the ritz functional */
  /*put g on DUM_SOLVER+2 and p on DUM_SOLVER+1*/
  ritz=scalar_prod_r(g_spinor_field[DUM_SOLVER], g_spinor_field[k], VOLUME/2, 1); 
  zero_spinor_field(g_spinor_field[DUM_SOLVER+2],VOLUME/2);
  assign_add_mul_r_add_mul(g_spinor_field[DUM_SOLVER+2], g_spinor_field[DUM_SOLVER], g_spinor_field[k],
			   1., -ritz, VOLUME/2);
  assign(g_spinor_field[DUM_SOLVER+1], g_spinor_field[DUM_SOLVER+2], VOLUME/2);
  normg0=square_norm(g_spinor_field[DUM_SOLVER+2], VOLUME/2, 1);
  
  /* main loop */
  for(iteration=1;iteration<=ITER_MAX_BCG;iteration++) {
    if(normg0 <= eps_sq) break;
    Q_psi(DUM_SOLVER+2,DUM_SOLVER+1,q_off);
    Q_psi(DUM_SOLVER+2,DUM_SOLVER+2,q_off);
    /*   compute costh and sinth */
    normp=square_norm(g_spinor_field[DUM_SOLVER+1], VOLUME/2, 1);
    xxx=scalar_prod_r(g_spinor_field[DUM_SOLVER+2], g_spinor_field[DUM_SOLVER+1], VOLUME/2, 1);
    
    xs1=0.5*(ritz+xxx/normp);
    xs2=0.5*(ritz-xxx/normp);
    normp=sqrt(normp);
    xs3=normg0/normp;
    aaa=sqrt(xs2*xs2+xs3*xs3);
    cosd=xs2/aaa;
    sind=xs3/aaa;
    
    if(cosd>=0.) { 
      costh=sqrt(0.5*(1.+cosd));
      sinth=0.5*sind/costh;
    }
    else {
      sinth=sqrt(0.5*(1.-cosd));
      costh=0.5*sind/sinth;
    } 
    ritz=xs1+aaa;
    
    assign_add_mul_r_add_mul(g_spinor_field[k], g_spinor_field[k], g_spinor_field[DUM_SOLVER+1], 
			     costh-1., sinth/normp, VOLUME/2);
    assign_add_mul_r_add_mul(g_spinor_field[DUM_SOLVER], g_spinor_field[DUM_SOLVER], g_spinor_field[DUM_SOLVER+2],
			     costh-1., sinth/normp, VOLUME/2);
    
    /*   compute g */
    zero_spinor_field(g_spinor_field[DUM_SOLVER+2],VOLUME/2);
    assign_add_mul_r_add_mul(g_spinor_field[DUM_SOLVER+2], g_spinor_field[DUM_SOLVER], g_spinor_field[k], 
			     1., -ritz, VOLUME/2);
    
    /*   calculate the norm of g' and beta_cg=costh g'^2/g^2 */
    normg=square_norm(g_spinor_field[DUM_SOLVER+2], VOLUME/2, 1);
    beta_cg=costh*normg/normg0;
    if(beta_cg*costh*normp>20.*sqrt(normg))  beta_cg=0.;
    normg0=normg;    
    /*   compute the new value of p */
    assign_add_mul_r(g_spinor_field[DUM_SOLVER+1], g_spinor_field[k], -scalar_prod_r(g_spinor_field[k], g_spinor_field[DUM_SOLVER+1], VOLUME/2), VOLUME/2, 1);
    assign_mul_add_r(g_spinor_field[DUM_SOLVER+1],beta_cg, g_spinor_field[DUM_SOLVER+2], VOLUME/2);
    /*   restore the state of the iteration */
    if(iteration%20==0) {
      /* readjust x */
      xxx=sqrt(square_norm(g_spinor_field[k], VOLUME/2), 1);
      assign_mul_bra_add_mul_r( g_spinor_field[k], 1./xxx,0., g_spinor_field[k], VOLUME/2);
      Q_psi(DUM_SOLVER,k,q_off);
      Q_psi(DUM_SOLVER,DUM_SOLVER,q_off);
      /*compute the ritz functional */
      ritz=scalar_prod_r(g_spinor_field[DUM_SOLVER], g_spinor_field[k], VOLUME/2, 1);
      /*put g on DUM_SOLVER+2 and p on DUM_SOLVER+1*/
      zero_spinor_field(g_spinor_field[DUM_SOLVER+2],VOLUME/2);
      assign_add_mul_r_add_mul(g_spinor_field[DUM_SOLVER+2], g_spinor_field[DUM_SOLVER], g_spinor_field[k],
			       1., -ritz, VOLUME/2);
      normg0=square_norm(g_spinor_field[DUM_SOLVER+2], VOLUME/2, 1);
      /*subtract a linear combination of x and g from p to 
	insure (x,p)=0 and (p,g)=(g,g) */
      cosd=scalar_prod_r(g_spinor_field[k], g_spinor_field[DUM_SOLVER+1], VOLUME/2, 1);
      assign_add_mul_r(g_spinor_field[DUM_SOLVER+1], g_spinor_field[k], -cosd, VOLUME/2);
      cosd=scalar_prod_r(g_spinor_field[DUM_SOLVER+1], g_spinor_field[DUM_SOLVER+2], VOLUME/2, 1)-normg0;
      assign_add_mul_r(g_spinor_field[DUM_SOLVER+1], g_spinor_field[DUM_SOLVER+2], -cosd/sqrt(normg0), VOLUME/2);
    }
  }
  *rz=ritz;
  return iteration;
}
Пример #15
0
int bicgstab2(spinor * const x0, spinor * const b, const int max_iter, 
		double eps_sq, const int rel_prec, const int N, matrix_mult f) {

  const int l = 2;
  double err;
  int i, j, k;
  int update_app = 0, update_res = 0;
  double rho0, rho1, beta, alpha, omega, gamma_hat,
    sigma, kappa0, kappal, rho, zeta0;
  double squarenorm, Mx=0., Mr=0.;
  spinor * r[5], * u[5], * r0_tilde, * u0, * x, * xp, * bp;
  double Z[3][3], y0[3], yl[3], yp[3], ypp[3];
  spinor ** solver_field = NULL;
  const int nr_sf = 10;

  k = -l;
  if(N == VOLUME) {
    init_solver_field(&solver_field, VOLUMEPLUSRAND, nr_sf);
  }
  else {
    init_solver_field(&solver_field, VOLUMEPLUSRAND/2, nr_sf);
  }
  r0_tilde = solver_field[0];
  u0 = solver_field[1];
  r[0] = solver_field[2];
  u[0] = solver_field[3];
  r[1] = solver_field[4];
  u[1] = solver_field[5];
  r[2] = solver_field[6];
  u[2] = solver_field[7];
  bp = solver_field[8];
  xp = x0;
  x = solver_field[9];

  zero_spinor_field(x, N);
  assign(u[0], b, N);
  f(r0_tilde, xp);
  diff(r[0], u[0], r0_tilde, N);
  zero_spinor_field(u0, N);
  assign(r0_tilde, r[0], N); 
/*   random_spinor_field(r0_tilde, N); */
  assign(bp, r[0], N);
  squarenorm = square_norm(b, N, 1);

  rho0 = 1.;
  alpha = rho0;
  omega = rho0;
  err = square_norm(r[0], N, 1);
  Mr = err;
  Mx = err;
  zeta0 = err;
  while( k < max_iter && (((err > eps_sq) && (rel_prec == 0)) 
			  || ((err > eps_sq*squarenorm) && (rel_prec == 1)) 
			  )) {
    k+=l;

    /* The BiCG part */
    rho0 *= -omega; 
    for(j = 0; j < l; j++) {
      rho1 = scalar_prod_r(r[j], r0_tilde, N, 1);
      beta = alpha*(rho1/rho0); 
      rho0 = rho1;
/*       if(g_proc_id == 0) {printf("beta = %e, alpha = %e, rho0 = %e\n", beta, alpha, rho0);fflush(stdout);} */
      for(i = 0; i <= j; i++) {
	/* u_i = r_i - \beta u_i */
	assign_mul_add_r(u[i], -beta, r[i], N);
      }
      f(u[j+1], u[j]);
      sigma = scalar_prod_r(u[j+1], r0_tilde, N, 1);
      alpha = rho1/sigma;
/*       if(g_proc_id == 0) {printf("sigma = %e, alpha = %e\n", sigma, alpha);fflush(stdout);} */
      /* x = x + \alpha u_0 */
      assign_add_mul_r(x, u[0], alpha, N);
      /* r_i = r_i - \alpha u_{i+1} */
      for(i = 0; i <= j; i++) {
	assign_add_mul_r(r[i], u[i+1], -alpha, N);
      }
      f(r[j+1], r[j]);
      err = square_norm(r[j+1], N, 1);
      if(g_proc_id == 0 && g_debug_level > 1) {printf("%d %d err = %e\n", k, j, err);fflush(stdout);}
      if(err > Mr) Mr = err;
      if(err > Mx) Mx = err;
    }

    /* The polynomial part */

    /* Z = R* R */
    for(i = 0; i <= l; i++){
      for(j = 0; j <= i; j++){
	Z[i][j] = scalar_prod_r(r[j], r[i], N, 1);
	Z[j][i] = Z[i][j];
      }
    }

    /* r0tilde and rl_tilde */
    y0[0] = -1;
    y0[2] = 0.;
    y0[1] = Z[1][0]/Z[1][1]; 

    yl[0] = 0.;
    yl[2] = -1.;
    yl[1] = Z[1][2]/Z[1][1]; 

    /* Convex combination */
    for(i = 0; i < l+1; i++){
      yp[i] = 0.;
      ypp[i] = 0.;
      for(j = 0; j < l+1; j++) {
	yp[i] +=Z[i][j]*y0[j];
	ypp[i] +=Z[i][j]*yl[j];
      }
    }
    kappa0 = sqrt( y0[0]*yp[0] + y0[1]*yp[1] + y0[2]*yp[2] );
    kappal = sqrt( yl[0]*ypp[0] + yl[1]*ypp[1] + yl[2]*ypp[2] );
    rho = (yl[0]*yp[0] + yl[1]*yp[1] + yl[2]*yp[2])/kappa0/kappal;
    if(fabs(rho) > 0.7) {
      gamma_hat = rho;
    }
    else {
      gamma_hat = rho*0.7/fabs(rho);
    }
    for(i = 0; i <= l; i++) {
      y0[i] -= gamma_hat*kappa0*yl[i]/kappal;
    }

    /* Update */
    omega = y0[l];
    for(i = 1; i < l+1; i++) {
      assign_add_mul_r(u[0], u[i], -y0[i], N);
      assign_add_mul_r(x, r[i-1], y0[i], N);
      assign_add_mul_r(r[0], r[i], -y0[i], N);
    }
    err = kappa0*kappa0;
    /* Reliable update part */
    if(err > Mr) Mr = err;
    if(err > Mx) Mx = err;    
    update_app = (err < 1.e-4*zeta0 && zeta0 <= Mx);
    update_res = ((err < 1.e-4*Mr && zeta0 <= Mr) || update_app);
    if(update_res) {
      if(g_proc_id == 0 && g_debug_level > 1) printf("Update res\n");
      f(r[0], x);
      diff(r[0], bp, r[0], N);
      Mr = err;
      if(update_app) {
	if(g_proc_id == 0  && g_debug_level > 1) printf("Update app\n");
	Mx = err;
	assign_add_mul_r(xp, x, 1., N);
	zero_spinor_field(x, N);
	assign(bp, r[0], N);
      }
    }
    update_app = 0;
    update_res = 0;
    if(g_proc_id == 0 && g_debug_level > 0){
      printf(" BiCGstab(2)convex iterated %d %d, %e rho0 = %e, alpha = %e, gamma_hat= %e\n", 
	     l, k, err, rho0, alpha, gamma_hat);
      fflush( stdout );
    }
  }
  assign_add_mul_r(x, xp, 1., N);
  assign(x0, x, N);
  if(k == max_iter) return(-1);
  return(k);
}
Пример #16
0
void Msap_eo(spinor * const P, spinor * const Q, const int Ncy) {
    int blk, ncy = 0, eo, vol;
    spinor * r, * a, * b;
    double nrm;
    spinor * b_even, * b_odd, * a_even, * a_odd;
    spinor ** solver_field = NULL;
    const int nr_sf = 3;

    /*
     * here it would be probably better to get the working fields as a parameter
     * from the calling function
     */
    init_solver_field(&solver_field, VOLUME, nr_sf);
    r = solver_field[0];
    a = solver_field[1];
    b = solver_field[2];

    vol = block_list[0].volume/2;
    b_even = b;
    b_odd = b + vol + 1;
    a_even = a;
    a_odd = a + vol + 1;

    for(ncy = 0; ncy < Ncy; ncy++) {
        /* compute the global residue        */
        /* this can be done more efficiently */
        /* here only a naive implementation  */
        for(eo = 0; eo < 2; eo++) {
            D_psi(r, P);
            diff(r, Q, r, VOLUME);
            nrm = square_norm(r, VOLUME, 1);
            if(g_proc_id == 0 && g_debug_level > 1 && eo == 1) {
                printf("Msap: %d %1.3e\n", ncy, nrm);
            }
            /* choose the even (odd) block */

            for (blk = 0; blk < nb_blocks; blk++) {
                if(block_list[blk].evenodd == eo) {
                    /* get part of r corresponding to block blk into b_even and b_odd */
                    copy_global_to_block_eo(b_even, b_odd, r, blk);

                    assign_mul_one_pm_imu_inv(a_even, b_even, +1., vol);
                    Block_H_psi(&block_list[blk], a_odd, a_even, OE);
                    /* a_odd = a_odd - b_odd */
                    assign_mul_add_r(a_odd, -1., b_odd, vol);

                    mrblk(b_odd, a_odd, 3, 1.e-31, 1, vol, &Mtm_plus_block_psi, blk);

                    Block_H_psi(&block_list[blk], b_even, b_odd, EO);
                    mul_one_pm_imu_inv(b_even, +1., vol);
                    /* a_even = a_even - b_even */
                    assign_add_mul_r(a_even, b_even, -1., vol);

                    /* add even and odd part up to full spinor P */
                    add_eo_block_to_global(P, a_even, b_odd, blk);
                }
            }
        }
    }
    finalize_solver(solver_field, nr_sf);
    return;
}
Пример #17
0
int mixed_cg_mms_tm_nd(spinor ** const Pup, spinor ** const Pdn, 
		 spinor * const Qup, spinor * const Qdn, 
		 solver_pm_t * solver_pm) {

  double eps_sq = solver_pm->squared_solver_prec;
  int noshifts = solver_pm->no_shifts;
  int rel_prec = solver_pm->rel_prec;
  int max_iter = solver_pm->max_iter;
  int check_abs, check_rel;
  double * shifts = solver_pm->shifts;
  int Nshift = noshifts;
 
  // algorithm
  double rr_up, rr_dn, rr, rr_old, r0r0, dAd_up, dAd_dn, dAd;  
  
  if(rel_prec){
    check_rel = 1;
    check_abs = 0;
   }
   else{
    check_rel = 0;
    check_abs = 1;     
  }
  
  int use_eo=1, eofactor=2;
  //not even-odd?
  if(solver_pm->sdim == VOLUME) {
    eofactor = 1;
    use_eo = 0;
  }
  
  int N = VOLUME/eofactor;
  int Vol = VOLUMEPLUSRAND/eofactor;
 
  
  // norm of source
  rr_up = square_norm(Qup, N, 1);
  rr_dn = square_norm(Qdn, N, 1);
  rr    = rr_up + rr_dn;  
 
  if( (g_cart_id == 0 && g_debug_level > 2)) printf("# CGMMSND_mixed: Initial mms residue: %.6e\n", rr);
  if(rr < 1.0e-4){
    if( (g_cart_id == 0 && g_debug_level > 2)) printf("# CGMMSND_mixed: norm of source too low: falling back to double mms solver %.6e\n", rr);
    return(cg_mms_tm_nd(Pup, Pdn, Qup, Qdn, solver_pm));
  }
  
  r0r0   = rr;	// for relative precision 
  rr_old = rr;	// for the first iteration
  
  
  
  //allocate an auxiliary solver fields 
  spinor ** sf = NULL;
  const int nr_sf = 6;
  init_solver_field(&sf, Vol, nr_sf);  
   
  spinor32 ** sf32 = NULL;
  const int nr_sf32 = 8;
  init_solver_field_32(&sf32, Vol, nr_sf32);  
  
  
  //spinor fields  
  //we need one less than shifts, since one field is cared of by the usual cg fields
  init_mms_tm_nd_32(noshifts-1, Vol);
   
  // Pup/dn  can be used as auxiliary field to work on, as it is not later used (could be used as initial guess at the very start)
  // Q_up/dn  can be used as feedback, or if not, also as auxiliary field
  

  
  //allocate cg constants
  double * sigma;
  double * zitam1, * zita;
  double * alphas, * betas;
  double gamma;
  double alpham1;
    sigma = (double*)calloc((noshifts), sizeof(double));
    zitam1 = (double*)calloc((noshifts), sizeof(double));
    zita = (double*)calloc((noshifts), sizeof(double));
    alphas = (double*)calloc((noshifts), sizeof(double));
    betas = (double*)calloc((noshifts), sizeof(double));



  spinor32 *  r_up, *  r_dn, * Ad_up, * Ad_dn, *  x_up, *  x_dn, *  d_up, *  d_dn;		
  spinor * r_up_d, * r_dn_d, * x_up_d, * x_dn_d, * Ax_up_d, * Ax_dn_d;
  
 // iteration counter
 int j; 
 
 //reliable update flag
 int rel_update = 0;
 //no of reliable updates done
 int no_rel_update = 0;
 //use reliable update flag
 int use_reliable = 1;
 
 double rel_delta = 1.0e-10;
 int trigger_shift = -1;
 double * res;
 double * res0;
 double * maxres;
 res = (double*)calloc((noshifts), sizeof(double));
 res0 = (double*)calloc((noshifts), sizeof(double));
 maxres = (double*)calloc((noshifts), sizeof(double)); 
    
  /////////////////
  // ASSIGNMENTS //
  /////////////////
  
  x_up  = sf32[0];	
  x_dn  = sf32[1];	
  r_up  = sf32[2];	
  r_dn  = sf32[3];
  d_up  = sf32[4];
  d_dn  = sf32[5];
  Ad_up = sf32[6];
  Ad_dn = sf32[7];


  x_up_d = sf[0];
  x_dn_d = sf[1];
  r_up_d = sf[2];
  r_dn_d = sf[3];
  Ax_up_d = sf[4];
  Ax_dn_d = sf[5];  
  
  /*
  //matrix test
   spinor32 * help_low_up = sf32[0];
   spinor32 * help_low_dn = sf32[1];   
   spinor * help_high_up = sf[0];
   spinor * help_high_dn = sf[1];   
   assign_to_32(help_low_up, Qup, N);
   assign_to_32(help_low_dn, Qdn, N);   
   assign(help_high_up, Qup, N);
   assign(help_high_dn, Qdn, N);   
   double sqn_high = square_norm(help_high_up,N,1) +
                     square_norm(help_high_dn,N,1);
   printf("square_norm(Q_high) = %e\n", sqn_high);
   float sqn_low  = square_norm_32(help_low_up,N,1) +
                    square_norm_32(help_low_dn,N,1);   
   printf("square_norm(Q_low) = %e\n", sqn_low);  
   
   solver_pm->M_ndpsi32(sf32[2], sf32[3], help_low_up, help_low_dn);
   solver_pm->M_ndpsi(sf[2], sf[3], help_high_up, help_high_dn);
   
   assign_to_64(sf[4], sf32[2], N);
   assign_to_64(sf[5], sf32[3], N);   
   diff(sf[0], sf[4], sf[2], N);
   diff(sf[1], sf[5], sf[3], N);   
   double sqnrm = square_norm(sf[0], N, 1) +
                  square_norm(sf[1], N, 1);
   printf("Operator 32 test: (square_norm) / (spinor component) = %.8e\n", sqnrm/24.0/N);
   exit(1);  
  */
  
  // r(0) = b
  assign_to_32(r_up, Qup, N);
  assign_to_32(r_dn, Qdn, N); 
  
  // d(0) = b
  assign_to_32(d_up, Qup, N);
  assign_to_32(d_dn, Qdn, N); 
  

  
  maxres[0] = rr;
  res[0] = rr;
  res0[0] = rr;
  alphas[0] = 1.0;
  betas[0] = 0.0;
  sigma[0] = shifts[0]*shifts[0];
  if(g_cart_id == 0 && g_debug_level > 2) printf("# CGMMSND_mixed: shift %d is %e\n", 0, sigma[0]);

  // currently only implemented for P=0 
  for(int im = 1; im < noshifts; im++) {
    maxres[im] = rr;
    res[im] = rr;
    res0[im] = rr;    
    sigma[im] = shifts[im]*shifts[im] - sigma[0];
    if(g_cart_id == 0 && g_debug_level > 2) printf("# CGMMSND_mixed: shift %d is %e\n", im, sigma[im]);
    // these will be the result spinor fields
    zero_spinor_field_32(mms_x_up[im-1], N);
    zero_spinor_field_32(mms_x_dn[im-1], N);    

    assign_to_32(mms_d_up[im-1], Qup, N);
    assign_to_32(mms_d_dn[im-1], Qdn, N);
    zitam1[im] = 1.0;
    zita[im] = 1.0;
    alphas[im] = 1.0;
    betas[im] = 0.0;
  }

  //zero fields for solution Pup, Pdn
  for(int im = 0; im < noshifts; im++){
    zero_spinor_field(Pup[im], N);
    zero_spinor_field(Pdn[im], N);    
  }
  
  
  //////////
  // LOOP //
  //////////
    
  for (j = 0; j < max_iter; j++) {   
      // A*d(k)
    solver_pm->M_ndpsi32(Ad_up, Ad_dn, d_up,  d_dn);     
    //add zero'th shift
    assign_add_mul_r_32(Ad_up, d_up, (float) sigma[0], N);
    assign_add_mul_r_32(Ad_dn, d_dn, (float) sigma[0], N);
	     
    
    // alpha = r(k)*r(k) / d(k)*A*d(k)
    dAd_up = scalar_prod_r_32(d_up, Ad_up, N, 1);
    dAd_dn = scalar_prod_r_32(d_dn, Ad_dn, N, 1);

    dAd    = dAd_up + dAd_dn; 
    alpham1 = alphas[0];
    alphas[0]  = rr_old / dAd;	// rr_old is taken from the last iteration respectively
    
   
    // r(k+1)
    assign_add_mul_r_32(r_up, Ad_up, (float) -alphas[0],N);
    assign_add_mul_r_32(r_dn, Ad_dn, (float) -alphas[0],N);

    // r(k+1)*r(k+1)
    rr_up  = square_norm_32(r_up, N, 1);
    rr_dn  = square_norm_32(r_dn, N, 1);
    rr     = rr_up + rr_dn;
    
      

    if((g_cart_id == 0) && (g_debug_level > 2)) printf("# CGMMSND_mixed: mms iteration j = %i: rr = %.6e\n", j, rr);

		 

    // aborting ?? // check wether precision is reached ...
    if ( ((check_abs)&&(rr <= eps_sq)) || ((check_rel)&&(rr <= eps_sq*r0r0)) ) 
    {
	if ((check_rel)&&(rr <= eps_sq*r0r0)) {
	  if((g_cart_id == 0) && (g_debug_level > 3)) printf("# CGMMSND_mixed: Reached relative solver precision of eps_rel = %.2e\n", eps_sq);
	}
      break;
   }
    
    // update alphas and zitas  
    // used later
    for(int im = 1; im < noshifts; im++) {
      gamma = zita[im]*alpham1/(alphas[0]*betas[0]*(1.-zita[im]/zitam1[im]) 
				+ alpham1*(1.+sigma[im]*alphas[0]));
      zitam1[im] = zita[im];
      zita[im] = gamma;
      alphas[im] = alphas[0]*zita[im]/zitam1[im];
    }  
    
    //check for reliable update
    res[0] = rr;
    for(int im=1; im<noshifts; im++) res[im] = rr * zita[im]; 
      
    rel_update = 0;
    for(int im = (noshifts-1); im >= 0; im--) {
      if( res[im] > maxres[im] ) maxres[im] = res[im];
      if( (res[im] < rel_delta*res0[im]) && (res0[im]<=maxres[im]) && (use_reliable) ) rel_update=1; 
      if( rel_update && ( trigger_shift == -1) ) trigger_shift = im;
    }     
    
    if(!rel_update)
    {
      // x_j(k+1) = x_j(k) + alpha_j*d_j(k) 
      // alphas are set above
      assign_add_mul_r_32(x_up, d_up, (float) alphas[0], N);   
      assign_add_mul_r_32(x_dn, d_dn, (float) alphas[0], N);
      
      
      for(int im = 1; im < noshifts; im++) {
	assign_add_mul_r_32(mms_x_up[im-1], mms_d_up[im-1], (float) alphas[im],  N);   
	assign_add_mul_r_32(mms_x_dn[im-1], mms_d_dn[im-1], (float) alphas[im],  N);  
      }  
   
      // beta = r(k+1)*r(k+1) / r(k)*r(k)
      betas[0] = rr / rr_old;
      rr_old = rr;  // for next iteration
      
      // d_0(k+1) = r(k+1) + beta*d_0(k) 
      assign_mul_add_r_32(d_up, (float) betas[0], r_up, N);  
      assign_mul_add_r_32(d_dn, (float) betas[0], r_dn, N); 
       
      // d_j(k+1) = zita*r(k+1) + beta*d_j(k)
      for(int im = 1; im < noshifts; im++) {
	betas[im] = betas[0]*zita[im]*alphas[im]/(zitam1[im]*alphas[0]);
	assign_mul_add_mul_r_32(mms_d_up[im-1], r_up, (float) betas[im], (float) zita[im], N);
	assign_mul_add_mul_r_32(mms_d_dn[im-1], r_dn, (float) betas[im], (float) zita[im], N);
      }   
    }
    else{
      //reliable update
      if( (g_cart_id == 0) && (g_debug_level > 3) ){
	printf("# CGMMSND_mixed: Shift %d with offset squared %e triggered a reliable update\n", trigger_shift, sigma[trigger_shift]);
      }
      //add low prec solutions  
      assign_add_mul_r_32(x_up, d_up, (float) alphas[0], N); 
      assign_add_mul_r_32(x_dn, d_dn, (float) alphas[0], N); 
      
      addto_32(Pup[0], x_up, N);
      addto_32(Pdn[0], x_dn, N);	    
      for(int im = 1; im < noshifts; im++) {  
	assign_add_mul_r_32(mms_x_up[im-1], mms_d_up[im-1], alphas[im], N);
	assign_add_mul_r_32(mms_x_dn[im-1], mms_d_dn[im-1], alphas[im], N);	
	addto_32(Pup[im], mms_x_up[im-1], N);
        addto_32(Pdn[im], mms_x_dn[im-1], N);	
      }
      
      //add low precision for shift 0 only
      addto_32(x_up_d, x_up, N); 
      addto_32(x_dn_d, x_dn, N);      
 
      
      solver_pm->M_ndpsi(Ax_up_d, Ax_dn_d, x_up_d,  x_dn_d);
      //add zero'th shift
      assign_add_mul_r(Ax_up_d, x_up_d, sigma[0], N);
      assign_add_mul_r(Ax_dn_d, x_dn_d, sigma[0], N);
      
      diff(r_up_d, Qup, Ax_up_d, N);         
      diff(r_dn_d, Qdn, Ax_dn_d, N); 
 
      rr_up = square_norm(r_up_d, N, 1);
      rr_dn = square_norm(r_dn_d, N, 1);
      rr    = rr_up + rr_dn;
      if ((g_cart_id == 0) && (g_debug_level > 3) ) printf("# CGMMSND_mixed: New residue after reliable update: %.6e\n", rr);
       
      //update res[im]
      res[0] = rr;

       
      if(res[trigger_shift] > res0[trigger_shift]){
	if(g_cart_id == 0) printf("# CGMMSND_mixed: Warning: residue of shift no %d got larger after rel. update\n", trigger_shift);
	//if this is the zero'th shift not getting better -> no further convergence, break
	if(trigger_shift == 0) break;
      }    
      
      //zero float fields
      zero_spinor_field_32(x_up, N);
      zero_spinor_field_32(x_dn, N);        
      for(int im = 1; im < noshifts; im++) {
	zero_spinor_field_32(mms_x_up[im-1], N);
	zero_spinor_field_32(mms_x_dn[im-1], N);  
      }
      
      //update the source
      assign_to_32(r_up, r_up_d, N);
      assign_to_32(r_dn, r_dn_d, N); 
      

      
      betas[0] = res[0]/rr_old;
      rr_old = rr;
      // d_0(k+1) = r(k+1) + beta*d_0(k)
      assign_mul_add_r_32(d_up, betas[0], r_up, N);
      assign_mul_add_r_32(d_dn, betas[0], r_dn, N);      
      // d_j(k+1) = r(k+1) + beta*d_j(k)
      for(int im = 1; im < noshifts; im++) {
	betas[im] = betas[0]*zita[im]*alphas[im]/(zitam1[im]*alphas[0]);
        assign_mul_add_mul_r_32(mms_d_up[im-1], r_up, (float) betas[im], (float) zita[im], N);
	assign_mul_add_mul_r_32(mms_d_dn[im-1], r_dn, (float) betas[im], (float) zita[im], N);
      } 
      
      //new maxres for the shift that initiated the reliable update
      res[trigger_shift] = res[0]*zita[trigger_shift]*zita[trigger_shift];
      res0[trigger_shift] = res[trigger_shift];  
      maxres[trigger_shift] = res[trigger_shift];
      trigger_shift = -1;
      no_rel_update ++;
    }	//reliable update	
    
    //check if some shift is converged
    for(int im = 1; im < noshifts; im++) {    
      if(j > 0 && (j % 10 == 0) && (im == noshifts-1)) {
	double sn = square_norm_32(mms_d_up[im-1], N, 1);
	sn +=       square_norm_32(mms_d_dn[im-1], N, 1);
	if(alphas[noshifts-1]*alphas[noshifts-1]*sn <= eps_sq) {
	  noshifts--;
	  if( (g_debug_level > 1) && (g_cart_id == 0) ) {
	    printf("# CGMMSND_mixed: at iteration %d removed one shift, %d remaining\n", j, noshifts);
	  }
	  //if removed we add the latest solution vector for this shift 	  
	  addto_32(Pup[im], mms_x_up[im-1], N);
          addto_32(Pdn[im], mms_x_dn[im-1], N);
	}
      }
    }
       
  }//LOOP
  
  if( (g_cart_id == 0) && (g_debug_level > 1) ) printf("Final mms residue: %.6e\n", rr);

  //add the latest solutions 
  for(int im = 0; im < noshifts; im++) {  
    if(im == 0){   
      addto_32(Pup[0], x_up, N);
      addto_32(Pdn[0], x_dn, N);        
    }
    else{     
      addto_32(Pup[im], mms_x_up[im-1], N);
      addto_32(Pdn[im], mms_x_dn[im-1], N);      
    }
  } 
  
  if(g_debug_level > 4){
    if(g_cart_id == 0) printf("# CGMMSND_mixed: Checking mms result:\n");
    //loop over all shifts (-> Nshift) 
    for(int im = 0; im < Nshift; im++){
      solver_pm->M_ndpsi(sf[0], sf[1], Pup[im], Pdn[im]);
      assign_add_mul_r(sf[0], Pup[im] , shifts[im]*shifts[im], N);
      assign_add_mul_r(sf[1], Pdn[im] , shifts[im]*shifts[im], N);
      diff(sf[2], sf[0], Qup, N);
      diff(sf[3], sf[1], Qdn, N);
      rr_up = square_norm(sf[2], N, 1);
      rr_dn = square_norm(sf[3], N, 1);      
      rr = rr_up + rr_dn;
      if(g_cart_id == 0) printf("# CGMMSND_mixed: Shift[%d] squared residue: %e\n", im, rr);
    }
  }
  
 
  finalize_solver(sf, nr_sf);  
  finalize_solver_32(sf32, nr_sf32); 
 
  //free cg constants
  free(sigma); free(zitam1); free(zita); free(alphas); free(betas);    
  
  //free reliable update stuff
  free(res); free(res0); free(maxres);


  //if not converged -> return(-1)
  if(j<max_iter){
    return(j);
  }
  else{
    return(-1);
  }
}//
Пример #18
0
/* P output = solution , Q input = source */
int cg_her_nd(spinor * const P_up,spinor * P_dn, spinor * const Q_up, spinor * const Q_dn, 
	      const int max_iter, double eps_sq, const int rel_prec, 
	      const int N, matrix_mult_nd f) {
  double normsp, normsq, pro, err, alpha_cg, beta_cg, squarenorm;
  int iteration;
  double err1, err2;
  spinor ** up_field = NULL;
  spinor ** dn_field = NULL;  
  const int nr_sf = 5;
  /* do we really need so many fields??? */
  init_solver_field(&up_field, VOLUMEPLUSRAND, nr_sf);
  init_solver_field(&dn_field, VOLUMEPLUSRAND, nr_sf);

  squarenorm = square_norm(Q_up, N, 1);
  squarenorm+= square_norm(Q_dn, N, 1);
  /*        !!!!   INITIALIZATION    !!!! */
  assign(up_field[0], P_up, N);
  assign(dn_field[0], P_dn, N);
  
  /*        (r_0,r_0)  =  normsq         */
  normsp =square_norm(P_up, N, 1);
  normsp+=square_norm(P_dn, N, 1);

/*   assign(up_field[5], Q_up, N); */
/*   assign(dn_field[5], Q_dn, N); */
  
  /* initialize residue r and search vector p */
  if(normsp==0){
    /* if a starting solution vector equal to zero is chosen */
    assign(up_field[1], Q_up, N);
    assign(dn_field[1], Q_dn, N);
    assign(up_field[2], Q_up, N);
    assign(dn_field[2], Q_dn, N);
    normsq =square_norm(Q_up, N, 1);
    normsq+=square_norm(Q_dn, N, 1);
  }
  else {
    /* if a starting solution vector different from zero is chosen */
    f(up_field[3],dn_field[3],
      up_field[0],dn_field[0]);
   
    diff(up_field[1], Q_up, up_field[3], N);
    diff(dn_field[1], Q_dn, dn_field[3], N);
    assign(up_field[2], up_field[1], N);
    assign(dn_field[2], dn_field[1], N);
    normsq =square_norm(up_field[2], N, 1);
    normsq+=square_norm(dn_field[2], N, 1);
  }

  /* main loop */
  for(iteration=0;iteration<max_iter;iteration++){
    f(up_field[4],dn_field[4],
      up_field[2],dn_field[2]);

    pro =scalar_prod_r(up_field[2], up_field[4], N, 1);
    pro+=scalar_prod_r(dn_field[2], dn_field[4], N, 1);
     
    /*  Compute alpha_cg(i+1)   */
    alpha_cg=normsq/pro;
     
    /*  Compute x_(i+1) = x_i + alpha_cg(i+1) p_i    */
    assign_add_mul_r(up_field[0], up_field[2],  alpha_cg, N);
    assign_add_mul_r(dn_field[0], dn_field[2],  alpha_cg, N);
    /*  Compute r_(i+1) = r_i - alpha_cg(i+1) Qp_i   */
    assign_add_mul_r(up_field[1], up_field[4], -alpha_cg, N);
    assign_add_mul_r(dn_field[1], dn_field[4], -alpha_cg, N);

    /* Check whether the precision is reached ... */
    err1 =square_norm(up_field[1], N, 1);
    err2 =square_norm(dn_field[1], N, 1);
    err = err1 + err2;
    if(g_debug_level > 1 && g_proc_id == g_stdio_proc) {
      printf("cg_her_nd : i = %d  esqr  %e = %e + %e \n",iteration,err, err1, err2); fflush( stdout);
    }

    if(((err <= eps_sq) && (rel_prec == 0)) || ((err <= eps_sq*squarenorm) && (rel_prec == 1))) {
      assign(P_up, up_field[0], N);
      assign(P_dn, dn_field[0], N);
      g_sloppy_precision = 0;
      finalize_solver(up_field, nr_sf);
      finalize_solver(dn_field, nr_sf);
      return(iteration+1);
    }
#ifdef _USE_HALFSPINOR
    if(((err*err <= eps_sq) && (rel_prec == 0)) || ((err*err <= eps_sq*squarenorm) && (rel_prec == 1))) {
      g_sloppy_precision = 1;
      if(g_debug_level > 2 && g_proc_id == g_stdio_proc) {
	printf("sloppy precision on\n"); fflush( stdout);
      }
    }
#endif
    /* Compute beta_cg(i+1)
       Compute p_(i+1) = r_i+1 + beta_(i+1) p_i     */
    beta_cg=err/normsq;
    assign_mul_add_r(up_field[2], beta_cg, up_field[1], N);
    assign_mul_add_r(dn_field[2], beta_cg, dn_field[1], N);
    normsq=err;
  }

  assign(P_up, up_field[0], N);
  assign(P_dn, dn_field[0], N);
  g_sloppy_precision = 0;  
  
  finalize_solver(up_field, nr_sf);
  finalize_solver(dn_field, nr_sf);
  return(-1);
}
Пример #19
0
void op_invert(const int op_id, const int index_start, const int write_prop) {
  operator * optr = &operator_list[op_id];
  double atime = 0., etime = 0., nrm1 = 0., nrm2 = 0.;
  int i;
  optr->iterations = 0;
  optr->reached_prec = -1.;
  g_kappa = optr->kappa;
  boundary(g_kappa);

  atime = gettime();
  if(optr->type == TMWILSON || optr->type == WILSON || optr->type == CLOVER) {
    g_mu = optr->mu;
    g_c_sw = optr->c_sw;
    if(optr->type == CLOVER) {
      if (g_cart_id == 0 && g_debug_level > 1) {
	printf("#\n# csw = %e, computing clover leafs\n", g_c_sw);
      }
      init_sw_fields(VOLUME);
      sw_term( (const su3**) g_gauge_field, optr->kappa, optr->c_sw); 
      /* this must be EE here!   */
      /* to match clover_inv in Qsw_psi */
      sw_invert(EE, optr->mu);
    }

    for(i = 0; i < 2; i++) {
      if (g_cart_id == 0) {
        printf("#\n# 2 kappa mu = %e, kappa = %e, c_sw = %e\n", g_mu, g_kappa, g_c_sw);
      }
      if(optr->type != CLOVER) {
	if(use_preconditioning){
	  g_precWS=(void*)optr->precWS;
	}
	else {
	  g_precWS=NULL;
	}
	
	optr->iterations = invert_eo( optr->prop0, optr->prop1, optr->sr0, optr->sr1,
				      optr->eps_sq, optr->maxiter,
				      optr->solver, optr->rel_prec,
				      0, optr->even_odd_flag,optr->no_extra_masses, optr->extra_masses, optr->id );
	
	/* check result */
	M_full(g_spinor_field[4], g_spinor_field[5], optr->prop0, optr->prop1);
      }
      else {
	optr->iterations = invert_clover_eo(optr->prop0, optr->prop1, optr->sr0, optr->sr1,
					    optr->eps_sq, optr->maxiter,
					    optr->solver, optr->rel_prec,
					    &g_gauge_field, &Qsw_pm_psi, &Qsw_minus_psi);
	/* check result */
 	Msw_full(g_spinor_field[4], g_spinor_field[5], optr->prop0, optr->prop1);
      }

      diff(g_spinor_field[4], g_spinor_field[4], optr->sr0, VOLUME / 2);
      diff(g_spinor_field[5], g_spinor_field[5], optr->sr1, VOLUME / 2);

      nrm1 = square_norm(g_spinor_field[4], VOLUME / 2, 1);
      nrm2 = square_norm(g_spinor_field[5], VOLUME / 2, 1);
      optr->reached_prec = nrm1 + nrm2;

      /* convert to standard normalisation  */
      /* we have to mult. by 2*kappa        */
      if (optr->kappa != 0.) {
        mul_r(optr->prop0, (2*optr->kappa), optr->prop0, VOLUME / 2);
        mul_r(optr->prop1, (2*optr->kappa), optr->prop1, VOLUME / 2);
      }
      if (optr->solver != CGMMS && write_prop) /* CGMMS handles its own I/O */
        optr->write_prop(op_id, index_start, i);
      if(optr->DownProp) {
        optr->mu = -optr->mu;
      } else 
        break;
    }
  }
  else if(optr->type == DBTMWILSON || optr->type == DBCLOVER) {
    g_mubar = optr->mubar;
    g_epsbar = optr->epsbar;
    g_c_sw = 0.;
    if(optr->type == DBCLOVER) {
      g_c_sw = optr->c_sw;
      if (g_cart_id == 0 && g_debug_level > 1) {
	printf("#\n# csw = %e, computing clover leafs\n", g_c_sw);
      }
      init_sw_fields(VOLUME);
      sw_term( (const su3**) g_gauge_field, optr->kappa, optr->c_sw); 
      sw_invert_nd(optr->mubar*optr->mubar-optr->epsbar*optr->epsbar);
    }

    for(i = 0; i < SourceInfo.no_flavours; i++) {
      if(optr->type != DBCLOVER) {
	optr->iterations = invert_doublet_eo( optr->prop0, optr->prop1, optr->prop2, optr->prop3, 
					      optr->sr0, optr->sr1, optr->sr2, optr->sr3,
					      optr->eps_sq, optr->maxiter,
					      optr->solver, optr->rel_prec);
      }
      else {
	optr->iterations = invert_cloverdoublet_eo( optr->prop0, optr->prop1, optr->prop2, optr->prop3, 
						    optr->sr0, optr->sr1, optr->sr2, optr->sr3,
						    optr->eps_sq, optr->maxiter,
						    optr->solver, optr->rel_prec);
      }
      g_mu = optr->mubar;
      if(optr->type != DBCLOVER) {
	M_full(g_spinor_field[DUM_DERI+1], g_spinor_field[DUM_DERI+2], optr->prop0, optr->prop1); 
      }
      else {
	Msw_full(g_spinor_field[DUM_DERI+1], g_spinor_field[DUM_DERI+2], optr->prop0, optr->prop1); 
      }
      assign_add_mul_r(g_spinor_field[DUM_DERI+1], optr->prop2, -optr->epsbar, VOLUME/2);
      assign_add_mul_r(g_spinor_field[DUM_DERI+2], optr->prop3, -optr->epsbar, VOLUME/2);

      g_mu = -g_mu;
      if(optr->type != DBCLOVER) {
	M_full(g_spinor_field[DUM_DERI+3], g_spinor_field[DUM_DERI+4], optr->prop2, optr->prop3); 
      }
      else {
	Msw_full(g_spinor_field[DUM_DERI+3], g_spinor_field[DUM_DERI+4], optr->prop2, optr->prop3);
      }
      assign_add_mul_r(g_spinor_field[DUM_DERI+3], optr->prop0, -optr->epsbar, VOLUME/2);
      assign_add_mul_r(g_spinor_field[DUM_DERI+4], optr->prop1, -optr->epsbar, VOLUME/2);

      diff(g_spinor_field[DUM_DERI+1], g_spinor_field[DUM_DERI+1], optr->sr0, VOLUME/2); 
      diff(g_spinor_field[DUM_DERI+2], g_spinor_field[DUM_DERI+2], optr->sr1, VOLUME/2); 
      diff(g_spinor_field[DUM_DERI+3], g_spinor_field[DUM_DERI+3], optr->sr2, VOLUME/2); 
      diff(g_spinor_field[DUM_DERI+4], g_spinor_field[DUM_DERI+4], optr->sr3, VOLUME/2); 

      nrm1  = square_norm(g_spinor_field[DUM_DERI+1], VOLUME/2, 1); 
      nrm1 += square_norm(g_spinor_field[DUM_DERI+2], VOLUME/2, 1); 
      nrm1 += square_norm(g_spinor_field[DUM_DERI+3], VOLUME/2, 1); 
      nrm1 += square_norm(g_spinor_field[DUM_DERI+4], VOLUME/2, 1); 
      optr->reached_prec = nrm1;
      g_mu = g_mu1;
      /* For standard normalisation */
      /* we have to mult. by 2*kappa */
      mul_r(g_spinor_field[DUM_DERI], (2*optr->kappa), optr->prop0, VOLUME/2);
      mul_r(g_spinor_field[DUM_DERI+1], (2*optr->kappa), optr->prop1, VOLUME/2);
      mul_r(g_spinor_field[DUM_DERI+2], (2*optr->kappa), optr->prop2, VOLUME/2);
      mul_r(g_spinor_field[DUM_DERI+3], (2*optr->kappa), optr->prop3, VOLUME/2);
      /* the final result should be stored in the convention used in */
      /* hep-lat/0606011                                             */
      /* this requires multiplication of source with                 */
      /* (1+itau_2)/sqrt(2) and the result with (1-itau_2)/sqrt(2)   */

      mul_one_pm_itau2(optr->prop0, optr->prop2, g_spinor_field[DUM_DERI], 
                       g_spinor_field[DUM_DERI+2], -1., VOLUME/2);
      mul_one_pm_itau2(optr->prop1, optr->prop3, g_spinor_field[DUM_DERI+1], 
                       g_spinor_field[DUM_DERI+3], -1., VOLUME/2);
      /* write propagator */
      if(write_prop) optr->write_prop(op_id, index_start, i);

      mul_r(optr->prop0, 1./(2*optr->kappa), g_spinor_field[DUM_DERI], VOLUME/2);
      mul_r(optr->prop1, 1./(2*optr->kappa), g_spinor_field[DUM_DERI+1], VOLUME/2);
      mul_r(optr->prop2, 1./(2*optr->kappa), g_spinor_field[DUM_DERI+2], VOLUME/2);
      mul_r(optr->prop3, 1./(2*optr->kappa), g_spinor_field[DUM_DERI+3], VOLUME/2);

      /* mirror source, but not for volume sources */
      if(i == 0 && SourceInfo.no_flavours == 2 && SourceInfo.type != 1) {
        if (g_cart_id == 0) {
          fprintf(stdout, "# Inversion done in %d iterations, squared residue = %e!\n",
                  optr->iterations, optr->reached_prec);
        }
        mul_one_pm_itau2(g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+2], optr->sr0, optr->sr2, -1., VOLUME/2);
        mul_one_pm_itau2(g_spinor_field[DUM_DERI+1], g_spinor_field[DUM_DERI+3], optr->sr1, optr->sr3, -1., VOLUME/2);

        mul_one_pm_itau2(optr->sr0, optr->sr2, g_spinor_field[DUM_DERI+2], g_spinor_field[DUM_DERI], +1., VOLUME/2);
        mul_one_pm_itau2(optr->sr1, optr->sr3, g_spinor_field[DUM_DERI+3], g_spinor_field[DUM_DERI+1], +1., VOLUME/2);

      }
      /* volume sources need only one inversion */
      else if(SourceInfo.type == 1) i++;
    }
  }
  else if(optr->type == OVERLAP) {
    g_mu = 0.;
    m_ov=optr->m;
    eigenvalues(&optr->no_ev, 5000, optr->ev_prec, 0, optr->ev_readwrite, nstore, optr->even_odd_flag);
/*     ov_check_locality(); */
/*      index_jd(&optr->no_ev_index, 5000, 1.e-12, optr->conf_input, nstore, 4); */
    ov_n_cheby=optr->deg_poly;

    if(use_preconditioning==1)
      g_precWS=(void*)optr->precWS;
    else
      g_precWS=NULL;


    if(g_debug_level > 3) ov_check_ginsparg_wilson_relation_strong(); 

    invert_overlap(op_id, index_start); 

    if(write_prop) optr->write_prop(op_id, index_start, 0);
  }
  etime = gettime();
  if (g_cart_id == 0 && g_debug_level > 0) {
    fprintf(stdout, "# Inversion done in %d iterations, squared residue = %e!\n",
            optr->iterations, optr->reached_prec);
    fprintf(stdout, "# Inversion done in %1.2e sec. \n", etime - atime);
  }
  return;
}
Пример #20
0
void Q_over_sqrt_Q_sqr(spinor * const R, double * const c, 
		       const int n, spinor * const S,
		       const double rnorm, const double minev) {
  
  int j;
  double fact1, fact2, temp1, temp2, temp3, temp4, maxev, tnorm;
  spinor  *sv, *d,  *dd,  *aux,  *aux3;
  double ap_eps_sq = 0.;

  sv=lock_Dov_WS_spinor(2);
  d=lock_Dov_WS_spinor(3);
  dd=lock_Dov_WS_spinor(4);
  aux=lock_Dov_WS_spinor(5);
  aux3=lock_Dov_WS_spinor(6);


  eigenvalues_for_cg_computed = no_eigenvalues - 1;
  if(eigenvalues_for_cg_computed < 0) eigenvalues_for_cg_computed = 0;
  maxev=1.0;
  
  fact1=4/(maxev-minev);
  fact2=-2*(maxev+minev)/(maxev-minev);
  
  zero_spinor_field(d, VOLUME);
  zero_spinor_field(dd, VOLUME); 
  
  if(1) assign_sub_lowest_eigenvalues(aux3, S, no_eigenvalues-1, VOLUME);
  else assign(aux3, S, VOLUME);
  
  /* Check whether switch for adaptive precision is on */
  /* this might be implemented again in the future */
  /* Use the 'old' version using Clenshaw's recursion for the 
     Chebysheff polynomial 
  */
  if(1) {
    for (j = n-1; j >= 1; j--) {
      assign(sv, d, VOLUME); 
      
      if ( (j%10) == 0 ) {
	assign_sub_lowest_eigenvalues(aux, d, no_eigenvalues-1, VOLUME);
      }
      else {
	assign(aux, d, VOLUME);
      }
      
      norm_Q_sqr_psi(R, aux, rnorm);
/*       printf("%d %e %e\n", j, R[0].s0.c0.re, R[0].s0.c0.im); */
/*       printf("%e %e\n", R[0].s1.c0.re, R[0].s1.c0.im); */
      temp1=-1.0;
      temp2=c[j];
      assign_mul_add_mul_add_mul_add_mul_r(d, R, dd, aux3, fact2, fact1, temp1, temp2, VOLUME);
      assign(dd, sv, VOLUME);
    } 
    
    if(1) assign_sub_lowest_eigenvalues(R, d, no_eigenvalues-1, VOLUME);
    else assign(R, d, VOLUME);
    
    norm_Q_sqr_psi(aux, R, rnorm);
    temp1=-1.0;
    temp2=c[0]/2.;
    temp3=fact1/2.;
    temp4=fact2/2.;
    assign_mul_add_mul_add_mul_add_mul_r(aux, d, dd, aux3, temp3, temp4, temp1, temp2, VOLUME);
    norm_Q_n_psi(R, aux, 1, rnorm);
  }
  else {
    /* Use the adaptive precision version using the forward recursion 
       for the Chebysheff polynomial 
    */
    
    /* d = T_0(Q^2) */
    assign(d, aux3, VOLUME);
    /* dd = T_1(Q^2) */
    norm_Q_sqr_psi(dd, d, rnorm);
    temp3 = fact1/2.;
    temp4 = fact2/2.;  
    assign_mul_add_mul_r(dd, d, temp3, temp4, VOLUME);
    /* r = c_1 T_1(Q^2) + 1./2 c_0 */
    temp1 = c[1];
    temp2 = c[0]/2.;
    mul_add_mul_r(R, dd, d, temp1, temp2, VOLUME);
    
    temp1=-1.0;
    for (j = 2; j <= n-1; j++) {
      /* aux = T_j(Q^2) = 2 Q^2 T_{j-1}(Q^2) - T_{j-2}(Q^2) */
      norm_Q_sqr_psi(aux, dd, rnorm);
      assign_mul_add_mul_add_mul_r(aux, dd, d, fact1, fact2, temp1, VOLUME);
      /* r = r + c_j T_j(Q^2) */
      temp2 = c[j];
      assign_add_mul_r(R, aux, temp2, VOLUME);
      /* The stoppping criterio tnorm = |T_j(Q^2)| */
      tnorm=square_norm(aux, VOLUME, 1);
      tnorm*=(temp2*temp2);
      
      /*
	auxnorm=square_norm(R);
	if(g_proc_id == g_stdio_proc){printf("j= %d\t|c T|^2= %g\t c_j= %g\t|r|^2= %g\n",j,tnorm,temp2,auxnorm); fflush( stdout);};
      */
      
      if(tnorm < ap_eps_sq) break; 
       /* d = T_{j-1}(Q^2) */
      assign(d, dd, VOLUME);
      /* dd = T_{j}(Q^2) */
      assign(dd, aux, VOLUME);
    }
    if(g_proc_id == g_stdio_proc && g_debug_level > 0) {
      printf("Order of Chebysheff approximation = %d\n",j); 
      fflush( stdout);
    }
     
    /* r = Q r */
    assign(aux, R, VOLUME); 
    norm_Q_n_psi(R, aux, 1, rnorm);

  }
  /* add in piece from projected subspace */
  addproj_q_invsqrt(R, S, no_eigenvalues-1, VOLUME);
  
  unlock_Dov_WS_spinor(2);
  unlock_Dov_WS_spinor(3);
  unlock_Dov_WS_spinor(4);
  unlock_Dov_WS_spinor(5);
  unlock_Dov_WS_spinor(6);
  return;
}
Пример #21
0
/* P output = solution , Q input = source */
int pcg_her(spinor * const P, spinor * const Q, const int max_iter, 
	    double eps_sq, const int rel_prec, const int N, matrix_mult f) {
  double normsp, pro, pro2, err, alpha_cg, beta_cg, squarenorm;
  int iteration;
  spinor ** solver_field = NULL;
  const int nr_sf = 5;

  if(N == VOLUME) {
    init_solver_field(&solver_field, VOLUMEPLUSRAND, nr_sf);
  }
  else {
    init_solver_field(&solver_field, VOLUMEPLUSRAND/2, nr_sf);
  }
  squarenorm = square_norm(Q, N, 1);
  /*        !!!!   INITIALIZATION    !!!! */
  assign(solver_field[0], P, N);
  /*        (r_0,r_0)  =  normsq         */
  normsp = square_norm(P, N, 1);

  assign(solver_field[3], Q, N);
  /* initialize residue r and search vector p */
  if(normsp==0){
    /* if a starting solution vector equal to zero is chosen */
    /* r0 */
    assign(solver_field[1], solver_field[3], N);
    /* p0 */
  }
  else{
    /* if a starting solution vector different from zero is chosen */
    /* r0 = b - A x0 */
    f(solver_field[2], solver_field[0]);
    diff(solver_field[1], solver_field[3], solver_field[2], N);
  }
  /* z0 = M^-1 r0 */
  invert_eigenvalue_part(solver_field[3], solver_field[1], 10, N);
  /* p0 = z0 */
  assign(solver_field[2], solver_field[3], N);

  /* Is this really real? */
  pro2 = scalar_prod_r(solver_field[1], solver_field[3], N, 1);  
  /* main loop */
  for(iteration = 0; iteration < max_iter; iteration++) {
    /* A p */
    f(solver_field[4], solver_field[2]);

    pro = scalar_prod_r(solver_field[2], solver_field[4], N, 1);
    /*  Compute alpha_cg(i+1)   */
    alpha_cg=pro2/pro;
     
    /*  Compute x_(i+1) = x_i + alpha_cg(i+1) p_i    */
    assign_add_mul_r(solver_field[0], solver_field[2],  alpha_cg, N);
    /*  Compute r_(i+1) = r_i - alpha_cg(i+1) Qp_i   */
    assign_add_mul_r(solver_field[1], solver_field[4], -alpha_cg, N);

    /* Check whether the precision is reached ... */
    err=square_norm(solver_field[1], N, 1);
    if(g_debug_level > 1 && g_proc_id == g_stdio_proc) {
      printf("%d\t%g\n",iteration,err); fflush( stdout);
    }

    if(((err <= eps_sq) && (rel_prec == 0)) || ((err <= eps_sq*squarenorm) && (rel_prec == 1))) {
      assign(P, solver_field[0], N);
      g_sloppy_precision = 0;
      finalize_solver(solver_field, nr_sf);
      return(iteration+1);
    }
#ifdef _USE_HALFSPINOR
    if(((err*err <= eps_sq) && (rel_prec == 0)) || ((err*err <= eps_sq*squarenorm) && (rel_prec == 1)) || iteration > 1400) {
      g_sloppy_precision = 1;
      if(g_debug_level > 2 && g_proc_id == g_stdio_proc) {
	printf("sloppy precision on\n"); fflush( stdout);
      }
    }
#endif
    /* z_j */
    beta_cg = 1/pro2;
/*     invert_eigenvalue_part(solver_field[3], solver_field[1], 10, N); */
    /* Compute beta_cg(i+1)
       Compute p_(i+1) = r_i+1 + beta_(i+1) p_i     */
    pro2 = scalar_prod_r(solver_field[1], solver_field[3], N, 1);
    beta_cg *= pro2;
    assign_mul_add_r(solver_field[2], beta_cg, solver_field[3], N);
  }
  assign(P, solver_field[0], N);
  g_sloppy_precision = 0;
/*   return(-1); */
  finalize_solver(solver_field, nr_sf);
  return(1);
}
Пример #22
0
int invert_doublet_eo(spinor * const Even_new_s, spinor * const Odd_new_s, 
		      spinor * const Even_new_c, spinor * const Odd_new_c, 
		      spinor * const Even_s, spinor * const Odd_s,
		      spinor * const Even_c, spinor * const Odd_c,
		      const double precision, const int max_iter,
		      const int solver_flag, const int rel_prec) {

  int iter = 0;
  
  
#ifdef HAVE_GPU
#  ifdef TEMPORALGAUGE
  
  /* initialize temporal gauge here */
  int retval;
  double dret1, dret2;
  double plaquette1 = 0.0;
  double plaquette2 = 0.0;
  
  if (usegpu_flag) {
    
    /* need VOLUME here (not N=VOLUME/2)*/
    if ((retval = init_temporalgauge_trafo(VOLUME, g_gauge_field)) != 0 ) {				// initializes the transformation matrices
      if (g_proc_id == 0) printf("Error while gauge fixing to temporal gauge. Aborting...\n");   	//	g_tempgauge_field as a copy of g_gauge_field
      exit(200);
    }
    
    /* do trafo */
    plaquette1 = measure_plaquette(g_gauge_field);
    apply_gtrafo(g_gauge_field, g_trafo);								// transformation of the gauge field
    plaquette2 = measure_plaquette(g_gauge_field);
    if (g_proc_id == 0) printf("\tPlaquette before gauge fixing: %.16e\n", plaquette1/6./VOLUME);
    if (g_proc_id == 0) printf("\tPlaquette after gauge fixing:  %.16e\n", plaquette2/6./VOLUME);
    
    /* do trafo to odd_s part of source */
    dret1 = square_norm(Odd_s, VOLUME/2 , 1);
    apply_gtrafo_spinor_odd(Odd_s, g_trafo);								// odd spinor transformation, strange
    dret2 = square_norm(Odd_s, VOLUME/2, 1);
    if (g_proc_id == 0) printf("\tsquare norm before gauge fixing: %.16e\n", dret1); 
    if (g_proc_id == 0) printf("\tsquare norm after gauge fixing:  %.16e\n", dret2);
    
    /* do trafo to odd_c part of source */
    dret1 = square_norm(Odd_c, VOLUME/2 , 1);
    apply_gtrafo_spinor_odd(Odd_c, g_trafo);								// odd spinor transformation, charm
    dret2 = square_norm(Odd_c, VOLUME/2, 1);
    if (g_proc_id == 0) printf("\tsquare norm before gauge fixing: %.16e\n", dret1); 
    if (g_proc_id == 0) printf("\tsquare norm after gauge fixing:  %.16e\n", dret2);       
    
    /* do trafo to even_s part of source */
    dret1 = square_norm(Even_s, VOLUME/2 , 1);
    apply_gtrafo_spinor_even(Even_s, g_trafo);							// even spinor transformation, strange
    dret2 = square_norm(Even_s, VOLUME/2, 1);
    if (g_proc_id == 0) printf("\tsquare norm before gauge fixing: %.16e\n", dret1); 
    if (g_proc_id == 0) printf("\tsquare norm after gauge fixing:  %.16e\n", dret2);
    
    /* do trafo to even_c part of source */
    dret1 = square_norm(Even_c, VOLUME/2 , 1);
    apply_gtrafo_spinor_even(Even_c, g_trafo);							// even spinor transformation, charm
    dret2 = square_norm(Even_c, VOLUME/2, 1);
    if (g_proc_id == 0) printf("\tsquare norm before gauge fixing: %.16e\n", dret1); 
    if (g_proc_id == 0) printf("\tsquare norm after gauge fixing:  %.16e\n", dret2);
    
#    ifdef MPI
    xchange_gauge(g_gauge_field);
#    endif
    
  } 
#  endif  
#endif /* HAVE_GPU*/


  /* here comes the inversion using even/odd preconditioning */
  if(g_proc_id == 0) {printf("# Using even/odd preconditioning!\n"); fflush(stdout);}
  M_ee_inv_ndpsi(Even_new_s, Even_new_c, 
		 Even_s, Even_c,
		 g_mubar, g_epsbar);
  Hopping_Matrix(OE, g_spinor_field[DUM_DERI], Even_new_s);
  Hopping_Matrix(OE, g_spinor_field[DUM_DERI+1], Even_new_c);
  
  /* The sign is plus, since in Hopping_Matrix */
  /* the minus is missing                      */
  assign_mul_add_r(g_spinor_field[DUM_DERI], +1., Odd_s, VOLUME/2);
  assign_mul_add_r(g_spinor_field[DUM_DERI+1], +1., Odd_c, VOLUME/2);
  
  /* Do the inversion with the preconditioned  */
  /* matrix to get the odd sites               */
  
  /* Here we invert the hermitean operator squared */
  
  if(g_proc_id == 0) {
    printf("# Using CG for TMWILSON flavour doublet!\n"); 
    fflush(stdout);
  }
  gamma5(g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI], VOLUME/2);
  gamma5(g_spinor_field[DUM_DERI+1], g_spinor_field[DUM_DERI+1], VOLUME/2);
  
  
#ifdef HAVE_GPU
  if (usegpu_flag) {	// GPU, mixed precision solver
#  if defined(MPI) && defined(PARALLELT)
    iter = mixedsolve_eo_nd(Odd_new_s, Odd_new_c, g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1],
			    max_iter, precision, rel_prec);
#  elif !defined(MPI) && !defined(PARALLELT)
    iter = mixedsolve_eo_nd(Odd_new_s, Odd_new_c, g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1],
			    max_iter, precision, rel_prec);
#  else
    printf("MPI and/or PARALLELT are not appropriately set for the GPU implementation. Aborting...\n");
    exit(-1);
#  endif
  }
  else {		// CPU, conjugate gradient
    iter = cg_her_nd(Odd_new_s, Odd_new_c, g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1],
		     max_iter, precision, rel_prec, 
		     VOLUME/2, &Qtm_pm_ndpsi);
  }
#else			// CPU, conjugate gradient
  iter = cg_her_nd(Odd_new_s, Odd_new_c, g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1],
		   max_iter, precision, rel_prec, 
		   VOLUME/2, &Qtm_pm_ndpsi);
#endif
  
  
  Qtm_dagger_ndpsi(Odd_new_s, Odd_new_c,
		   Odd_new_s, Odd_new_c);

  /* Reconstruct the even sites                */
  Hopping_Matrix(EO, g_spinor_field[DUM_DERI], Odd_new_s);
  Hopping_Matrix(EO, g_spinor_field[DUM_DERI+1], Odd_new_c);
  M_ee_inv_ndpsi(g_spinor_field[DUM_DERI+2], g_spinor_field[DUM_DERI+3],
		 g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1],
		 g_mubar, g_epsbar);
  
  /* The sign is plus, since in Hopping_Matrix */
  /* the minus is missing                      */
  assign_add_mul_r(Even_new_s, g_spinor_field[DUM_DERI+2], +1., VOLUME/2);
  assign_add_mul_r(Even_new_c, g_spinor_field[DUM_DERI+3], +1., VOLUME/2);
  
  
#ifdef HAVE_GPU  
  /* return from temporal gauge again */
#  ifdef TEMPORALGAUGE
  
  if (usegpu_flag) { 
    
    /* undo trafo */
    /* apply_inv_gtrafo(g_gauge_field, g_trafo);*/
    /* copy back the saved original field located in g_tempgauge_field -> update necessary*/
    plaquette1 = measure_plaquette(g_gauge_field);
    copy_gauge_field(g_gauge_field, g_tempgauge_field);
    g_update_gauge_copy = 1;
    plaquette2 = measure_plaquette(g_gauge_field);
    if (g_proc_id == 0) printf("\tPlaquette before inverse gauge fixing: %.16e\n", plaquette1/6./VOLUME);
    if (g_proc_id == 0) printf("\tPlaquette after inverse gauge fixing:  %.16e\n", plaquette2/6./VOLUME);
    
    /* undo trafo to source Even_s */
    dret1 = square_norm(Even_s, VOLUME/2 , 1);
    apply_inv_gtrafo_spinor_even(Even_s, g_trafo);
    dret2 = square_norm(Even_s, VOLUME/2, 1);
    if (g_proc_id == 0) printf("\tsquare norm before gauge fixing: %.16e\n", dret1); 
    if (g_proc_id == 0) printf("\tsquare norm after gauge fixing:  %.16e\n", dret2);
    
    
    /* undo trafo to source Even_c */
    dret1 = square_norm(Even_c, VOLUME/2 , 1);
    apply_inv_gtrafo_spinor_even(Even_c, g_trafo);
    dret2 = square_norm(Even_c, VOLUME/2, 1);
    if (g_proc_id == 0) printf("\tsquare norm before gauge fixing: %.16e\n", dret1);
    if (g_proc_id == 0) printf("\tsquare norm after gauge fixing:  %.16e\n", dret2); 
    
    /* undo trafo to source Odd_s */
    dret1 = square_norm(Odd_s, VOLUME/2 , 1);
    apply_inv_gtrafo_spinor_odd(Odd_s, g_trafo);
    dret2 = square_norm(Odd_s, VOLUME/2, 1);
    if (g_proc_id == 0) printf("\tsquare norm before gauge fixing: %.16e\n", dret1); 
    if (g_proc_id == 0) printf("\tsquare norm after gauge fixing:  %.16e\n", dret2);
    
    /* undo trafo to source Odd_c */
    dret1 = square_norm(Odd_c, VOLUME/2 , 1);
    apply_inv_gtrafo_spinor_odd(Odd_c, g_trafo);
    dret2 = square_norm(Odd_c, VOLUME/2, 1);
    if (g_proc_id == 0) printf("\tsquare norm before gauge fixing: %.16e\n", dret1); 
    if (g_proc_id == 0) printf("\tsquare norm after gauge fixing:  %.16e\n", dret2); 
    
    
    // Even_new_s
    dret1 = square_norm(Even_new_s, VOLUME/2 , 1);
    apply_inv_gtrafo_spinor_even(Even_new_s, g_trafo);
    dret2 = square_norm(Even_new_s, VOLUME/2, 1);
    if (g_proc_id == 0) printf("\tsquare norm before gauge fixing: %.16e\n", dret1); 
    if (g_proc_id == 0) printf("\tsquare norm after gauge fixing:  %.16e\n", dret2);
    
    // Even_new_c
    dret1 = square_norm(Even_new_c, VOLUME/2 , 1);
    apply_inv_gtrafo_spinor_even(Even_new_c, g_trafo);
    dret2 = square_norm(Even_new_c, VOLUME/2, 1);
    if (g_proc_id == 0) printf("\tsquare norm before gauge fixing: %.16e\n", dret1); 
    if (g_proc_id == 0) printf("\tsquare norm after gauge fixing:  %.16e\n", dret2);
    
    // Odd_new_s
    dret1 = square_norm(Odd_new_s, VOLUME/2 , 1);
    apply_inv_gtrafo_spinor_odd(Odd_new_s, g_trafo);
    dret2 = square_norm(Odd_new_s, VOLUME/2, 1);
    if (g_proc_id == 0) printf("\tsquare norm before gauge fixing: %.16e\n", dret1); 
    if (g_proc_id == 0) printf("\tsquare norm after gauge fixing:  %.16e\n", dret2);
    
    // Odd_new_c
    dret1 = square_norm(Odd_new_c, VOLUME/2 , 1);
    apply_inv_gtrafo_spinor_odd(Odd_new_c, g_trafo);
    dret2 = square_norm(Odd_new_c, VOLUME/2, 1);
    if (g_proc_id == 0) printf("\tsquare norm before gauge fixing: %.16e\n", dret1); 
    if (g_proc_id == 0) printf("\tsquare norm after gauge fixing:  %.16e\n", dret2); 
    
    finalize_temporalgauge();
    
#    ifdef MPI
    xchange_gauge(g_gauge_field);
#    endif
    
  }
#  endif
#endif
  return(iter);
}
Пример #23
0
/* k output , l input */
int solve_cg(spinor * const k, spinor * const l, double eps_sq, const int rel_prec) {

  static double normsq, pro, err, alpha_cg, beta_cg, squarenorm, sqnrm, sqnrm2;
  int iteration = 0, i, j;
  int save_sloppy = g_sloppy_precision;
  double atime, etime, flops;
  spinor *x, *delta, *y;
  
  /* initialize residue r and search vector p */
#ifdef MPI
  atime = MPI_Wtime();
#else
  atime = ((double)clock())/((double)(CLOCKS_PER_SEC));
#endif
  squarenorm = square_norm(l, VOLUME/2, 1);

  if(g_sloppy_precision_flag == 1) { 
    delta = g_spinor_field[DUM_SOLVER+3];
    x = g_spinor_field[DUM_SOLVER+4];
    y = g_spinor_field[DUM_SOLVER+5];
    assign(delta, l, VOLUME/2);
    Qtm_pm_psi(y, k);
    diff(delta, l, y, VOLUME/2);
    sqnrm = square_norm(delta, VOLUME/2, 1);
    if(((sqnrm <= eps_sq) && (rel_prec == 0)) || ((sqnrm <= eps_sq*squarenorm) && (rel_prec == 1))) {
      return(0);
    }
    
    for(i = 0; i < 20; i++) {
      g_sloppy_precision = 1;
      /* main CG loop in lower precision */
      zero_spinor_field(x, VOLUME/2);
      assign(g_spinor_field[DUM_SOLVER+1], delta, VOLUME/2);
      assign(g_spinor_field[DUM_SOLVER+2], delta, VOLUME/2);
      sqnrm2 = sqnrm;
      for(j = 0; j <= ITER_MAX_CG; j++) {
	Qtm_pm_psi(g_spinor_field[DUM_SOLVER], g_spinor_field[DUM_SOLVER+2]);
	pro = scalar_prod_r(g_spinor_field[DUM_SOLVER+2], g_spinor_field[DUM_SOLVER], VOLUME/2, 1);
	alpha_cg = sqnrm2 / pro;
	assign_add_mul_r(x, g_spinor_field[DUM_SOLVER+2], alpha_cg, VOLUME/2);
	
	assign_mul_add_r(g_spinor_field[DUM_SOLVER], -alpha_cg, g_spinor_field[DUM_SOLVER+1], VOLUME/2);
	err = square_norm(g_spinor_field[DUM_SOLVER], VOLUME/2, 1);
	
	if(g_proc_id == g_stdio_proc && g_debug_level > 1) {
	  printf("inner CG: %d res^2 %g\n", iteration+j+1, err);
	  fflush(stdout);
	}
	
	if (((err <= eps_sq) && (rel_prec == 0)) || ((err <= eps_sq*squarenorm) && (rel_prec == 1))){
	  break;
	}
	beta_cg = err / sqnrm2;
	assign_mul_add_r(g_spinor_field[DUM_SOLVER+2], beta_cg, g_spinor_field[DUM_SOLVER], VOLUME/2);
	assign(g_spinor_field[DUM_SOLVER+1], g_spinor_field[DUM_SOLVER], VOLUME/2);
	sqnrm2 = err;
      }
      /* end main CG loop */
      iteration += j;
      g_sloppy_precision = 0;
      add(k, k, x, VOLUME/2);
      
      Qtm_pm_psi(y, x);
      diff(delta, delta, y, VOLUME/2);
      sqnrm = square_norm(delta, VOLUME/2, 1);
      if(g_debug_level > 0 && g_proc_id == g_stdio_proc) {
	printf("mixed CG(linsolve): true residue %d\t%g\t\n",iteration, sqnrm); fflush( stdout);
      }
      
      if(((sqnrm <= eps_sq) && (rel_prec == 0)) || ((sqnrm <= eps_sq*squarenorm) && (rel_prec == 1))) {
	break;
      }
      iteration++;
    }
  }
  else {
    Qtm_pm_psi(g_spinor_field[DUM_SOLVER], k); 
    
    diff(g_spinor_field[DUM_SOLVER+1], l, g_spinor_field[DUM_SOLVER], VOLUME/2);
    assign(g_spinor_field[DUM_SOLVER+2], g_spinor_field[DUM_SOLVER+1], VOLUME/2);
    normsq=square_norm(g_spinor_field[DUM_SOLVER+1], VOLUME/2, 1);
    
    /* main loop */
    for(iteration = 1; iteration <= ITER_MAX_CG; iteration++) {
      Qtm_pm_psi(g_spinor_field[DUM_SOLVER], g_spinor_field[DUM_SOLVER+2]);
      pro=scalar_prod_r(g_spinor_field[DUM_SOLVER+2], g_spinor_field[DUM_SOLVER], VOLUME/2, 1);
      alpha_cg=normsq/pro;
      assign_add_mul_r(k, g_spinor_field[DUM_SOLVER+2], alpha_cg, VOLUME/2);
      
      assign_mul_add_r(g_spinor_field[DUM_SOLVER], -alpha_cg, g_spinor_field[DUM_SOLVER+1], VOLUME/2);
      err=square_norm(g_spinor_field[DUM_SOLVER], VOLUME/2, 1);
      
      if(g_proc_id == g_stdio_proc && g_debug_level > 1) {
	printf("CG (linsolve): iterations: %d res^2 %e\n", iteration, err);
	fflush(stdout);
      }
      
      if (((err <= eps_sq) && (rel_prec == 0)) || ((err <= eps_sq*squarenorm) && (rel_prec == 1))){
	break;
      }
      beta_cg = err/normsq;
      assign_mul_add_r(g_spinor_field[DUM_SOLVER+2], beta_cg, g_spinor_field[DUM_SOLVER], VOLUME/2);
      assign(g_spinor_field[DUM_SOLVER+1], g_spinor_field[DUM_SOLVER], VOLUME/2);
      normsq=err;
    }
  }
#ifdef MPI
  etime = MPI_Wtime();
#else
  etime = ((double)clock())/((double)(CLOCKS_PER_SEC));
#endif
  /* 2 A + 2 Nc Ns + N_Count ( 2 A + 10 Nc Ns ) */
  /* 2*1320.0 because the linalg is over VOLUME/2 */
  flops = (2*(2*1320.0+2*3*4) + 2*3*4 + iteration*(2.*(2*1320.0+2*3*4) + 10*3*4))*VOLUME/2/1.0e6f;
  if(g_proc_id==0 && g_debug_level > 0) {
    printf("CG: iter: %d eps_sq: %1.4e t/s: %1.4e\n", iteration, eps_sq, etime-atime); 
    printf("CG: flopcount: t/s: %1.4e mflops_local: %.1f mflops: %.1f\n", 
	   etime-atime, flops/(etime-atime), g_nproc*flops/(etime-atime));
  }
  g_sloppy_precision = save_sloppy;
  return(iteration);
}
Пример #24
0
/* P output = solution , Q input = source */
int cg_mms_tm(spinor ** const P, spinor * const Q,
		 solver_params_t * solver_params, double * cgmms_reached_prec) {

  static double normsq, pro, err, squarenorm;
  int iteration, N = solver_params->sdim, no_shifts = solver_params->no_shifts;
  static double gamma, alpham1;
  spinor ** solver_field = NULL;
  double atime, etime;
  const int nr_sf = 3;

  atime = gettime();
  if(solver_params->sdim == VOLUME) {
    init_solver_field(&solver_field, VOLUMEPLUSRAND, nr_sf);
    init_mms_tm(no_shifts, VOLUMEPLUSRAND);
  } 
  else {
    init_solver_field(&solver_field, VOLUMEPLUSRAND/2, nr_sf); 
    init_mms_tm(no_shifts, VOLUMEPLUSRAND/2);
  } 

  zero_spinor_field(P[0], N);
  alphas[0] = 1.0;
  betas[0] = 0.0;
  sigma[0] = solver_params->shifts[0]*solver_params->shifts[0];
  if(g_proc_id == 0 && g_debug_level > 1) printf("# CGMMS: shift %d is %e\n", 0, sigma[0]);

  for(int im = 1; im < no_shifts; im++) {
    sigma[im] = solver_params->shifts[im]*solver_params->shifts[im] - sigma[0];
    if(g_proc_id == 0 && g_debug_level > 1) printf("# CGMMS: shift %d is %e\n", im, sigma[im]);
    // these will be the result spinor fields
    zero_spinor_field(P[im], N);
    // these are intermediate fields
    assign(ps_mms_solver[im-1], Q, N);
    zitam1[im] = 1.0;
    zita[im] = 1.0;
    alphas[im] = 1.0;
    betas[im] = 0.0;
  }

  /* currently only implemented for P=0 */
  squarenorm = square_norm(Q, N, 1);
  /* if a starting solution vector equal to zero is chosen */
  assign(solver_field[0], Q, N);
  assign(solver_field[1], Q, N);
  normsq = squarenorm;

  /* main loop */
  for(iteration = 0; iteration < solver_params->max_iter; iteration++) {

    /*   Q^2*p and then (p,Q^2*p)  */
    solver_params->M_psi(solver_field[2], solver_field[1]);
    // add the zero's shift
    assign_add_mul_r(solver_field[2], solver_field[1], sigma[0], N);
    pro = scalar_prod_r(solver_field[1], solver_field[2], N, 1);

    /* For the update of the coeff. of the shifted pol. we need alphas[0](i-1) and alpha_cg(i).
       This is the reason why we need this double definition of alpha */
    alpham1 = alphas[0];

    /* Compute alphas[0](i+1) */
    alphas[0] = normsq/pro;
    for(int im = 1; im < no_shifts; im++) {

      /* Now gamma is a temp variable that corresponds to zita(i+1) */ 
      gamma = zita[im]*alpham1/(alphas[0]*betas[0]*(1.-zita[im]/zitam1[im]) 
				+ alpham1*(1.+sigma[im]*alphas[0]));

      // Now zita(i-1) is put equal to the old zita(i)
      zitam1[im] = zita[im];
      // Now zita(i+1) is updated 
      zita[im] = gamma;
      // Update of alphas(i) = alphas[0](i)*zita(i+1)/zita(i) 
      alphas[im] = alphas[0]*zita[im]/zitam1[im];

      // Compute xs(i+1) = xs(i) + alphas(i)*ps(i) 
      assign_add_mul_r(P[im], ps_mms_solver[im-1], alphas[im], N); 
      // in the CG the corrections are decreasing with the iteration number increasing
      // therefore, we can remove shifts when the norm of the correction vector
      // falls below a threshold
      // this is useful for computing time and needed, because otherwise
      // zita might get smaller than DOUBLE_EPS and, hence, zero
      if(iteration > 0 && (iteration % 20 == 0) && (im == no_shifts-1)) {
	double sn = square_norm(ps_mms_solver[im-1], N, 1);
	if(alphas[no_shifts-1]*alphas[no_shifts-1]*sn <= solver_params->squared_solver_prec) {
	  no_shifts--;
	  if(g_debug_level > 2 && g_proc_id == 0) {
	    printf("# CGMMS: at iteration %d removed one shift, %d remaining\n", iteration, no_shifts);
      	  }
	}
      }
    }
    
    /*  Compute x_(i+1) = x_i + alphas[0](i+1) p_i    */
    assign_add_mul_r(P[0], solver_field[1],  alphas[0], N);
    /*  Compute r_(i+1) = r_i - alphas[0](i+1) Qp_i   */
    assign_add_mul_r(solver_field[0], solver_field[2], -alphas[0], N);

    /* Check whether the precision eps_sq is reached */

    err = square_norm(solver_field[0], N, 1);

    if(g_debug_level > 2 && g_proc_id == g_stdio_proc) {
      printf("# CGMMS iteration: %d residue: %g\n", iteration, err); fflush( stdout );
    }

    if( ((err <= solver_params->squared_solver_prec) && (solver_params->rel_prec == 0)) ||
        ((err <= solver_params->squared_solver_prec*squarenorm) && (solver_params->rel_prec > 0)) ||
        (iteration == solver_params->max_iter -1) ) {
      /* FIXME temporary output of precision until a better solution can be found */
      *cgmms_reached_prec = err;
      break;
    }

    /* Compute betas[0](i+1) = (r(i+1),r(i+1))/(r(i),r(i))
       Compute p(i+1) = r(i+1) + beta(i+1)*p(i)  */
    betas[0] = err/normsq;
    assign_mul_add_r(solver_field[1], betas[0], solver_field[0], N);
    normsq = err;

    /* Compute betas(i+1) = betas[0](i+1)*(zita(i+1)*alphas(i))/(zita(i)*alphas[0](i))
       Compute ps(i+1) = zita(i+1)*r(i+1) + betas(i+1)*ps(i)  */
    for(int im = 1; im < no_shifts; im++) {
      betas[im] = betas[0]*zita[im]*alphas[im]/(zitam1[im]*alphas[0]);
      assign_mul_add_mul_r(ps_mms_solver[im-1], solver_field[0], betas[im], zita[im], N);
    }
  }
  etime = gettime();
  g_sloppy_precision = 0;
  if(iteration == solver_params->max_iter -1) iteration = -1;
  else iteration++;
  if(g_debug_level > 0 && g_proc_id == 0) {
    printf("# CGMMS (%d shifts): iter: %d eps_sq: %1.4e %1.4e t/s\n", solver_params->no_shifts, iteration, solver_params->squared_solver_prec, etime - atime); 
  }
  
  finalize_solver(solver_field, nr_sf);
  return(iteration);
}
Пример #25
0
/* P output = solution , Q input = source */
int cg_mms_tm(spinor * const P, spinor * const Q, const int max_iter, 
	      double eps_sq, const int rel_prec, const int N, matrix_mult f) {

  static double normsq, pro, err, alpha_cg = 1., beta_cg = 0., squarenorm;
  int iteration, im, append = 0;
  char filename[100];
  static double gamma, alpham1;
  int const cg_mms_default_precision = 32;
  double tmp_mu = g_mu;
  WRITER * writer = NULL;
  paramsInverterInfo *inverterInfo = NULL;
  paramsPropagatorFormat *propagatorFormat = NULL;
  spinor * temp_save; //used to save all the masses
  spinor ** solver_field = NULL;
  const int nr_sf = 5;

  init_solver_field(&solver_field, VOLUMEPLUSRAND, nr_sf);
  init_mms_tm(g_no_extra_masses);

  /* currently only implemented for P=0 */
  zero_spinor_field(P, N);
  /*  Value of the bare MMS-masses (\mu^2 - \mu_0^2) */
  for(im = 0; im < g_no_extra_masses; im++) {
    sigma[im] = g_extra_masses[im]*g_extra_masses[im] - g_mu*g_mu;
    assign(xs_mms_solver[im], P, N);
    assign(ps_mms_solver[im], Q, N);
    zitam1[im] = 1.0;
    zita[im] = 1.0;
    alphas[im] = 1.0;
    betas[im] = 0.0;
  }

  squarenorm = square_norm(Q, N, 1);
  assign(solver_field[0], P, N);
/*   normsp = square_norm(P, N, 1); */

  /* initialize residue r and search vector p */
/*   if(normsp == 0){ */
  /* currently only implemented for P=0 */
  if(1) {
    /* if a starting solution vector equal to zero is chosen */
    assign(solver_field[1], Q, N);
    assign(solver_field[2], Q, N);
    normsq = square_norm(Q, N, 1);
  }
  else{
    /* if a starting solution vector different from zero is chosen */
    f(solver_field[3], solver_field[0]);

    diff(solver_field[1], Q, solver_field[3], N);
    assign(solver_field[2], solver_field[1], N);
    normsq = square_norm(solver_field[2], N, 1);
  }

  /* main loop */
  for(iteration = 0; iteration < max_iter; iteration++) {

    /*   Q^2*p and then (p,Q^2*p)  */
    f(solver_field[4], solver_field[2]);
    pro = scalar_prod_r(solver_field[2], solver_field[4], N, 1);

    /* For the update of the coeff. of the shifted pol. we need alpha_cg(i-1) and alpha_cg(i).
       This is the reason why we need this double definition of alpha */
    alpham1 = alpha_cg;

    /* Compute alpha_cg(i+1) */
    alpha_cg = normsq/pro;
    for(im = 0; im < g_no_extra_masses; im++) {

      /* Now gamma is a temp variable that corresponds to zita(i+1) */ 
      gamma = zita[im]*alpham1/(alpha_cg*beta_cg*(1.-zita[im]/zitam1[im]) 
				+ alpham1*(1.+sigma[im]*alpha_cg));

      /* Now zita(i-1) is put equal to the old zita(i) */
      zitam1[im] = zita[im];
      /* Now zita(i+1) is updated */
      zita[im] = gamma;
      /* Update of alphas(i) = alpha_cg(i)*zita(i+1)/zita(i) */ 
      alphas[im] = alpha_cg*zita[im]/zitam1[im];
      /* Compute xs(i+1) = xs(i) + alphas(i)*ps(i) */
      assign_add_mul_r(xs_mms_solver[im], ps_mms_solver[im], alphas[im], N); 
    }

    /*  Compute x_(i+1) = x_i + alpha_cg(i+1) p_i    */
    assign_add_mul_r(solver_field[0], solver_field[2],  alpha_cg, N);
    /*  Compute r_(i+1) = r_i - alpha_cg(i+1) Qp_i   */
    assign_add_mul_r(solver_field[1], solver_field[4], -alpha_cg, N);

    /* Check whether the precision eps_sq is reached */

    err = square_norm(solver_field[1], N, 1);
    if(g_debug_level > 2 && g_proc_id == g_stdio_proc) {
      printf("CGMMS iteration: %d residue: %g\n", iteration, err); fflush( stdout );
    }

    if( ((err <= eps_sq) && (rel_prec == 0)) ||
      ((err <= eps_sq*squarenorm) && (rel_prec == 1)) ) {

      assign(P, solver_field[0], N);
      f(solver_field[2], P);
      diff(solver_field[3], solver_field[2], Q, N);
      err = square_norm(solver_field[3], N, 1);
      if(g_debug_level > 0 && g_proc_id == g_stdio_proc) {
        printf("# CG MMS true residue at final iteration (%d) was %g.\n", iteration, err); 
        fflush( stdout);
      }
      g_sloppy_precision = 0;
      g_mu = tmp_mu;

      /* save all the results of (Q^dagger Q)^(-1) \gamma_5 \phi */
      /* here ... */
      /* when im == -1 save the base mass*/
      for(im = -1; im < g_no_extra_masses; im++) {
        if(im==-1) {
          temp_save=solver_field[0];
        } else {
          temp_save=xs_mms_solver[im];
        }

        if(SourceInfo.type != 1) {
          if (PropInfo.splitted) {
            sprintf(filename, "%s.%.4d.%.2d.%.2d.cgmms.%.2d.inverted", SourceInfo.basename, SourceInfo.nstore, SourceInfo.t, SourceInfo.ix, im+1);
          } else {
            sprintf(filename, "%s.%.4d.%.2d.cgmms.%.2d.inverted", SourceInfo.basename, SourceInfo.nstore, SourceInfo.t, im+1);
          }
        }
        else {
          sprintf(filename, "%s.%.4d.%.5d.cgmms.%.2d.0", SourceInfo.basename, SourceInfo.nstore, SourceInfo.sample, im+1);
        }
        if(g_kappa != 0) {
          mul_r(temp_save, (2*g_kappa)*(2*g_kappa), temp_save, N);
        }

        append = !PropInfo.splitted;

        construct_writer(&writer, filename, append);

        if (PropInfo.splitted || SourceInfo.ix == index_start) {
          //Create the inverter info NOTE: always set to TWILSON=12 and 1 flavour (to be adjusted)
          inverterInfo = construct_paramsInverterInfo(err, iteration+1, 12, 1);
          if (im == -1) {
            inverterInfo->cgmms_mass = inverterInfo->mu;
          } else {
            inverterInfo->cgmms_mass = g_extra_masses[im]/(2 * inverterInfo->kappa);
          }
          write_spinor_info(writer, PropInfo.format, inverterInfo, append);
          //Create the propagatorFormat NOTE: always set to 1 flavour (to be adjusted)
          propagatorFormat = construct_paramsPropagatorFormat(cg_mms_default_precision, 1);
          write_propagator_format(writer, propagatorFormat);
          free(inverterInfo);
          free(propagatorFormat);
        }
        convert_lexic_to_eo(solver_field[2], solver_field[1], temp_save);
        write_spinor(writer, &solver_field[2], &solver_field[1], 1, 32);
        destruct_writer(writer);
      }
      finalize_solver(solver_field, nr_sf);
      return(iteration+1);
    }

    /* Compute beta_cg(i+1) = (r(i+1),r(i+1))/(r(i),r(i))
       Compute p(i+1) = r(i+1) + beta(i+1)*p(i)  */
    beta_cg = err/normsq;
    assign_mul_add_r(solver_field[2], beta_cg, solver_field[1], N);
    normsq = err;

    /* Compute betas(i+1) = beta_cg(i)*(zita(i+1)*alphas(i))/(zita(i)*alpha_cg(i))
       Compute ps(i+1) = zita(i+1)*r(i+1) + betas(i+1)*ps(i)  */
    for(im = 0; im < g_no_extra_masses; im++) {
      betas[im] = beta_cg*zita[im]*alphas[im]/(zitam1[im]*alpha_cg);
      assign_mul_add_mul_r(ps_mms_solver[im], solver_field[1], betas[im], zita[im], N);
    }
  }
  assign(P, solver_field[0], N);
  g_sloppy_precision = 0;
  finalize_solver(solver_field, nr_sf);
  return(-1);
}
Пример #26
0
void poly_precon(spinor * const R, spinor * const S, const double prec, const int n) {
  int j;
  double fact1, fact2, temp1, temp2, temp3, temp4, invmaxev = 1./4., maxev=4., tnorm, minev=g_mu*g_mu, auxnorm;
  static spinor *sv_, *sv, *d_, *d, *dd_, *dd, *aux_, *aux, *aux3_, *aux3;
  static int initp = 0;
  static double * c;
  const int N = VOLUME;


  
  maxev = 4.0;
  invmaxev = 1./maxev;
  minev = 0.1;
/*   minev = 1.5*1.5*g_mu*g_mu; */

  if(initp == 0) {
    c = (double*)calloc(1000, sizeof(double));
#if (defined SSE || defined SSE2 || defined SSE3)
    sv_  = calloc(VOLUMEPLUSRAND+1, sizeof(spinor));
    sv   = (spinor *)(((unsigned long int)(sv_)+ALIGN_BASE)&~ALIGN_BASE);
    d_   = calloc(VOLUMEPLUSRAND+1, sizeof(spinor));
    d    = (spinor *)(((unsigned long int)(d_)+ALIGN_BASE)&~ALIGN_BASE);
    dd_  = calloc(VOLUMEPLUSRAND+1, sizeof(spinor));
    dd   = (spinor *)(((unsigned long int)(dd_)+ALIGN_BASE)&~ALIGN_BASE);
    aux_ = calloc(VOLUMEPLUSRAND+1, sizeof(spinor));
    aux  = (spinor *)(((unsigned long int)(aux_)+ALIGN_BASE)&~ALIGN_BASE);
    aux3_= calloc(VOLUMEPLUSRAND+1, sizeof(spinor));
    aux3 = (spinor *)(((unsigned long int)(aux3_)+ALIGN_BASE)&~ALIGN_BASE);
#else 
    sv_  = calloc(VOLUMEPLUSRAND+1, sizeof(spinor));
    sv   = sv_;
    d_   = calloc(VOLUMEPLUSRAND+1, sizeof(spinor));
    d    = d_;
    dd_  = calloc(VOLUMEPLUSRAND+1, sizeof(spinor));
    dd   = dd_;
    aux_ = calloc(VOLUMEPLUSRAND+1, sizeof(spinor));
    aux  = aux_;
    aux3_= calloc(VOLUMEPLUSRAND+1, sizeof(spinor));
    aux3 = aux3_;
#endif
    get_c(minev, maxev, c, 100);
    initp = 1;
  }


  fact1 = 4. / (maxev - minev);
  fact2 = -2 * (maxev + minev) / (maxev - minev);
   
  zero_spinor_field(&d[0], N);
  zero_spinor_field(&dd[0], N); 
  assign(&aux3[0], &S[0], N); 
/*   gamma5(&aux3[0], &S[0], N); */

  /* Use the adaptive precision version using the forward recursion 
     for the Chebysheff polynomial 
  */

  /* d = T_0(Q^2) */
  assign(&d[0], &aux3[0], N);
  /* dd = T_1(Q^2) */
  Q_pm_psi(&dd[0], &d[0]);
/*   mul_r(dd, invmaxev, dd, N); */
  /*    norm_Q_sqr_psi(&dd[0], &d[0], g_m_D_psi, rnorm); */
  temp3 = fact1/2;
  temp4 = fact2/2;  
  assign_mul_add_mul_r(&dd[0], &d[0], temp3, temp4, N);
  /* r = c_1 T_1(Q^2) + 1/2 c_0 */
  temp1 = c[1];
  temp2 = c[0]/2;
  mul_add_mul_r(&R[0], &dd[0], &d[0], temp1, temp2, N);
     
  temp1 = -1.0;
  for (j=2; j<=n-1; j++) {
    /* aux = T_j(Q^2) = 2 Q^2 T_{j-1}(Q^2) - T_{j-2}(Q^2) */
    Q_pm_psi(&aux[0], &dd[0]);
/*     mul_r(aux, invmaxev, aux, N); */
    /*        norm_Q_sqr_psi(&aux[0], &dd[0], g_m_D_psi, rnorm); */
    assign_mul_add_mul_add_mul_r(&aux[0],&dd[0],&d[0],fact1,fact2,temp1, N);
    /* r = r + c_j T_j(Q^2) */
    temp2=c[j];
    assign_add_mul_r(&R[0],&aux[0],temp2, N);
    /* The stoppping criterio tnorm = |T_j(Q^2)| */
    tnorm = square_norm(aux, N, 1);
    tnorm *= (temp2*temp2);
     
    
    auxnorm = square_norm(R, N, 1);
    if(g_proc_id == g_stdio_proc) {
      printf("j= %d\t|c T|^2= %g\t%g\t c_j= %g\t|r|^2= %g\n",j,tnorm,prec, temp2,auxnorm); fflush( stdout);
      fflush(stdout);
    }
         
    if(tnorm < prec) break;
    /* d = T_{j-1}(Q^2) */
    assign(&d[0], &dd[0], N);
    /* dd = T_{j}(Q^2) */
    assign(&dd[0], &aux[0], N);
  }
  if(g_proc_id == g_stdio_proc) {
    printf("Order of Chebysheff approximation = %d\n",j); 
    fflush( stdout);
  }
   

  /* r = Q r */

/*   assign(aux, R, N); */
/*   Q_minus_psi(R, aux); */

  return;
}