예제 #1
0
파일: Msap.c 프로젝트: Finkenrath/tmLQCD
void Msap(spinor * const P, spinor * const Q, const int Ncy, const int Niter) {
  int blk, ncy = 0, eo, vol;
  spinor * r, * a, * b;
  double nrm;
  spinor ** solver_field = NULL;
  const int nr_sf = 6;

  /* 
   * here it would be probably better to get the working fields as a parameter 
   * from the calling function
   */
  init_solver_field(&solver_field, VOLUME, nr_sf);
  r = solver_field[0];
  a = solver_field[1];
  b = solver_field[2];

  for(ncy = 0; ncy < Ncy; ncy++) {
    /* compute the global residue        */
    /* this can be done more efficiently */
    /* here only a naive implementation  */
    for(eo = 0; eo < 2; eo++) {
      D_psi(r, P);
      diff(r, Q, r, VOLUME);
      nrm = square_norm(r, VOLUME, 1);
      if(g_proc_id == 0 && g_debug_level > 2 && eo == 1) {  /*  GG, was 1 */
	printf("Msap: %d %1.3e\n", ncy, nrm);
	fflush(stdout);
      }
      /* choose the even (odd) block */

      /*blk = eolist[eo];*/

      for (blk = 0; blk < nb_blocks; blk++) {
	if(block_list[blk].evenodd == eo) {
	  vol = block_list[blk].volume;

	  /* get part of r corresponding to block blk into b */
	  copy_global_to_block(b, r, blk);
	  // does this work?? i.e. solver_field[3]
	  mrblk(a, b, solver_field[3], Niter, 1.e-31, 1, vol, &dummy_Di, blk);

	  /* add a up to full spinor P */
	  add_block_to_global(P, a, blk);
	}
      }
    }
  }
  finalize_solver(solver_field, nr_sf);
  return;
}
double cloverdetratio_rwacc(const int id, hamiltonian_field_t * const hf) {
  monomial * mnl = &monomial_list[id];
  int save_sloppy = g_sloppy_precision_flag;
  double atime, etime;
  atime = gettime();

  g_mu = mnl->mu2;
  boundary(mnl->kappa2);

  init_sw_fields();
  sw_term( (const su3**) hf->gaugefield, mnl->kappa2, mnl->c_sw); 
  sw_invert(EE, mnl->mu2);
  g_mu3 = 0.;
  mnl->Qp(mnl->w_fields[1], mnl->pf);

  g_mu3 = 0.;
  g_mu = mnl->mu;
  boundary(mnl->kappa);
  sw_term( (const su3**) hf->gaugefield, mnl->kappa, mnl->c_sw); 
  sw_invert(EE, mnl->mu);

  chrono_guess(mnl->w_fields[0], mnl->w_fields[1], mnl->csg_field, mnl->csg_index_array, 
	       mnl->csg_N, mnl->csg_n, VOLUME/2, &Qtm_plus_psi);
  g_sloppy_precision_flag = 0;    
  mnl->iter0 += solve_degenerate(mnl->w_fields[0], mnl->w_fields[1], mnl->solver_params, mnl->maxiter, mnl->accprec,  
		      g_relative_precision_flag, VOLUME/2, mnl->Qsq, mnl->solver);
  mnl->Qm(mnl->w_fields[0], mnl->w_fields[0]);

  g_sloppy_precision_flag = save_sloppy;

  /* Compute the energy contr. from second field */
  mnl->energy1 = square_norm(mnl->w_fields[0], VOLUME/2, 1);

  g_mu = g_mu1;
  g_mu3 = 0.;
  boundary(g_kappa);
  etime = gettime();
  if(g_proc_id == 0) {
    if(g_debug_level > 1) {
      printf("# Time for %s monomial rwacc step: %e s\n", mnl->name, etime-atime);
    }
    if(g_debug_level > 3) {
      printf("called cloverdetratio_rwacc for id %d dH = %1.10e\n", 
	     id, mnl->energy1 - mnl->energy0);
    }
  }
  return(mnl->energy1 - mnl->energy0);
}
void cloverdet_heatbath(const int id, hamiltonian_field_t * const hf) {

  monomial * mnl = &monomial_list[id];
  double atime, etime;
  atime = gettime();
  int N = VOLUME/2;

  g_mu = mnl->mu;
  g_mu3 = mnl->rho;
  g_c_sw = mnl->c_sw;
  boundary(mnl->kappa);
  mnl->csg_n = 0;
  mnl->csg_n2 = 0;
  mnl->iter0 = 0;
  mnl->iter1 = 0;

  init_sw_fields();
  sw_term( (const su3**) hf->gaugefield, mnl->kappa, mnl->c_sw); 

  if(!mnl->even_odd_flag) {
    N = VOLUME;
    random_spinor_field_lexic(mnl->w_fields[0], mnl->rngrepro, RN_GAUSS);
  }
  else {
    sw_invert(EE, mnl->mu);
    random_spinor_field_eo(mnl->w_fields[0], mnl->rngrepro, RN_GAUSS);
  }
  mnl->energy0 = square_norm(mnl->w_fields[0], N, 1);
  
  mnl->Qp(mnl->pf, mnl->w_fields[0]);
  chrono_add_solution(mnl->pf, mnl->csg_field, mnl->csg_index_array,
                      mnl->csg_N, &mnl->csg_n, N);

  g_mu = g_mu1;
  g_mu3 = 0.;
  boundary(g_kappa);
  etime = gettime();
  if(g_proc_id == 0) {
    if(g_debug_level > 1) {
      printf("# Time for %s monomial heatbath: %e s\n", mnl->name, etime-atime);
    }
    if(g_debug_level > 3) {
      printf("called cloverdet_heatbath for id %d energy %f\n", id, mnl->energy0);
    }
  }
  return;
}
예제 #4
0
double Scene::compute_radius() const
{
    double square_radius = 0.0;
    const GAABB3 bbox = compute_bbox();

    if (bbox.is_valid())
    {
        for (size_t i = 0; i < 8; ++i)
        {
            const double square_distance =
                static_cast<double>(square_norm(bbox.compute_corner(i)));

            square_radius = max(square_radius, square_distance);
        }
    }

    return sqrt(square_radius);
}
예제 #5
0
double cloverdet_acc(const int id, hamiltonian_field_t * const hf) {
  monomial * mnl = &monomial_list[id];
  int save_sloppy = g_sloppy_precision_flag;
  double atime, etime;
  atime = gettime();

  g_mu = mnl->mu;
  g_mu3 = mnl->rho;
  g_c_sw = mnl->c_sw;
  boundary(mnl->kappa);

  sw_term( (const su3**) hf->gaugefield, mnl->kappa, mnl->c_sw); 
  sw_invert(EE, mnl->mu);

  chrono_guess(mnl->w_fields[0], mnl->pf, mnl->csg_field, mnl->csg_index_array,
	       mnl->csg_N, mnl->csg_n, VOLUME/2, mnl->Qsq);
  g_sloppy_precision_flag = 0;
  mnl->iter0 = cg_her(mnl->w_fields[0], mnl->pf, mnl->maxiter, mnl->accprec,  
		      g_relative_precision_flag, VOLUME/2, mnl->Qsq); 
  mnl->Qm(mnl->w_fields[0], mnl->w_fields[0]);
  
  g_sloppy_precision_flag = save_sloppy;
  /* Compute the energy contr. from first field */
  mnl->energy1 = square_norm(mnl->w_fields[0], VOLUME/2, 1);

  g_mu = g_mu1;
  g_mu3 = 0.;
  boundary(g_kappa);
  etime = gettime();
  if(g_proc_id == 0) {
    if(g_debug_level > 1) {
      printf("# Time for %s monomial acc step: %e s\n", mnl->name, etime-atime);
    }
    if(g_debug_level > 3) {
      printf("called cloverdet_acc for id %d dH = %1.10e\n", 
	     id, mnl->energy1 - mnl->energy0);
    }
  }
  return(mnl->energy1 - mnl->energy0);
}
예제 #6
0
void cloverdetratio_heatbath(const int id, hamiltonian_field_t * const hf) {
  monomial * mnl = &monomial_list[id];

  g_mu = mnl->mu;
  g_c_sw = mnl->c_sw;
  boundary(mnl->kappa);
  mnl->csg_n = 0;
  mnl->csg_n2 = 0;
  mnl->iter0 = 0;
  mnl->iter1 = 0;
  
  init_sw_fields();
  sw_term( (const su3**) hf->gaugefield, mnl->kappa, mnl->c_sw); 
  sw_invert(EE, mnl->mu);

  random_spinor_field(g_spinor_field[4], VOLUME/2, mnl->rngrepro);
  mnl->energy0  = square_norm(g_spinor_field[4], VOLUME/2, 1);
  
  g_mu3 = mnl->rho;
  mnl->Qp(g_spinor_field[3], g_spinor_field[4]);
  g_mu3 = mnl->rho2;
  zero_spinor_field(mnl->pf,VOLUME/2);

  mnl->iter0 = cg_her(mnl->pf, g_spinor_field[3], mnl->maxiter, mnl->accprec,  
		      g_relative_precision_flag, VOLUME/2, mnl->Qsq); 

  chrono_add_solution(mnl->pf, mnl->csg_field, mnl->csg_index_array,
		      mnl->csg_N, &mnl->csg_n, VOLUME/2);
  mnl->Qm(mnl->pf, mnl->pf);

  if(g_proc_id == 0 && g_debug_level > 3) {
    printf("called cloverdetratio_heatbath for id %d \n", id);
  }
  g_mu3 = 0.;
  g_mu = g_mu1;
  boundary(g_kappa);
  return;
}
예제 #7
0
double Scene::compute_radius() const
{
    double square_radius = 0.0;

    for (const_each<AssemblyInstanceContainer> i = impl->m_assembly_instances; i; ++i)
    {
        const AssemblyInstance& inst = *i;
        const GAABB3 inst_bbox = inst.compute_parent_bbox();

        GVector3 corners[8];
        inst_bbox.compute_corners(corners);

        for (size_t j = 0; j < 8; ++j)
        {
            const double square_distance = square_norm(corners[j]);

            if (square_radius < square_distance)
                square_radius = square_distance;
        }
    }

    return sqrt(square_radius);
}
예제 #8
0
파일: cg_mms_tm.c 프로젝트: palao/tmLQCD
/* P output = solution , Q input = source */
int cg_mms_tm(spinor * const P, spinor * const Q, const int max_iter, 
	      double eps_sq, const int rel_prec, const int N, matrix_mult f) {

  static double normsq, pro, err, alpha_cg = 1., beta_cg = 0., squarenorm;
  int iteration, im, append = 0;
  char filename[100];
  static double gamma, alpham1;
  int const cg_mms_default_precision = 32;
  double tmp_mu = g_mu;
  WRITER * writer = NULL;
  paramsInverterInfo *inverterInfo = NULL;
  paramsPropagatorFormat *propagatorFormat = NULL;
  spinor * temp_save; //used to save all the masses
  spinor ** solver_field = NULL;
  const int nr_sf = 5;

  init_solver_field(&solver_field, VOLUMEPLUSRAND, nr_sf);
  init_mms_tm(g_no_extra_masses);

  /* currently only implemented for P=0 */
  zero_spinor_field(P, N);
  /*  Value of the bare MMS-masses (\mu^2 - \mu_0^2) */
  for(im = 0; im < g_no_extra_masses; im++) {
    sigma[im] = g_extra_masses[im]*g_extra_masses[im] - g_mu*g_mu;
    assign(xs_mms_solver[im], P, N);
    assign(ps_mms_solver[im], Q, N);
    zitam1[im] = 1.0;
    zita[im] = 1.0;
    alphas[im] = 1.0;
    betas[im] = 0.0;
  }

  squarenorm = square_norm(Q, N, 1);
  assign(solver_field[0], P, N);
/*   normsp = square_norm(P, N, 1); */

  /* initialize residue r and search vector p */
/*   if(normsp == 0){ */
  /* currently only implemented for P=0 */
  if(1) {
    /* if a starting solution vector equal to zero is chosen */
    assign(solver_field[1], Q, N);
    assign(solver_field[2], Q, N);
    normsq = square_norm(Q, N, 1);
  }
  else{
    /* if a starting solution vector different from zero is chosen */
    f(solver_field[3], solver_field[0]);

    diff(solver_field[1], Q, solver_field[3], N);
    assign(solver_field[2], solver_field[1], N);
    normsq = square_norm(solver_field[2], N, 1);
  }

  /* main loop */
  for(iteration = 0; iteration < max_iter; iteration++) {

    /*   Q^2*p and then (p,Q^2*p)  */
    f(solver_field[4], solver_field[2]);
    pro = scalar_prod_r(solver_field[2], solver_field[4], N, 1);

    /* For the update of the coeff. of the shifted pol. we need alpha_cg(i-1) and alpha_cg(i).
       This is the reason why we need this double definition of alpha */
    alpham1 = alpha_cg;

    /* Compute alpha_cg(i+1) */
    alpha_cg = normsq/pro;
    for(im = 0; im < g_no_extra_masses; im++) {

      /* Now gamma is a temp variable that corresponds to zita(i+1) */ 
      gamma = zita[im]*alpham1/(alpha_cg*beta_cg*(1.-zita[im]/zitam1[im]) 
				+ alpham1*(1.+sigma[im]*alpha_cg));

      /* Now zita(i-1) is put equal to the old zita(i) */
      zitam1[im] = zita[im];
      /* Now zita(i+1) is updated */
      zita[im] = gamma;
      /* Update of alphas(i) = alpha_cg(i)*zita(i+1)/zita(i) */ 
      alphas[im] = alpha_cg*zita[im]/zitam1[im];
      /* Compute xs(i+1) = xs(i) + alphas(i)*ps(i) */
      assign_add_mul_r(xs_mms_solver[im], ps_mms_solver[im], alphas[im], N); 
    }

    /*  Compute x_(i+1) = x_i + alpha_cg(i+1) p_i    */
    assign_add_mul_r(solver_field[0], solver_field[2],  alpha_cg, N);
    /*  Compute r_(i+1) = r_i - alpha_cg(i+1) Qp_i   */
    assign_add_mul_r(solver_field[1], solver_field[4], -alpha_cg, N);

    /* Check whether the precision eps_sq is reached */

    err = square_norm(solver_field[1], N, 1);
    if(g_debug_level > 2 && g_proc_id == g_stdio_proc) {
      printf("CGMMS iteration: %d residue: %g\n", iteration, err); fflush( stdout );
    }

    if( ((err <= eps_sq) && (rel_prec == 0)) ||
      ((err <= eps_sq*squarenorm) && (rel_prec == 1)) ) {

      assign(P, solver_field[0], N);
      f(solver_field[2], P);
      diff(solver_field[3], solver_field[2], Q, N);
      err = square_norm(solver_field[3], N, 1);
      if(g_debug_level > 0 && g_proc_id == g_stdio_proc) {
        printf("# CG MMS true residue at final iteration (%d) was %g.\n", iteration, err); 
        fflush( stdout);
      }
      g_sloppy_precision = 0;
      g_mu = tmp_mu;

      /* save all the results of (Q^dagger Q)^(-1) \gamma_5 \phi */
      /* here ... */
      /* when im == -1 save the base mass*/
      for(im = -1; im < g_no_extra_masses; im++) {
        if(im==-1) {
          temp_save=solver_field[0];
        } else {
          temp_save=xs_mms_solver[im];
        }

        if(SourceInfo.type != 1) {
          if (PropInfo.splitted) {
            sprintf(filename, "%s.%.4d.%.2d.%.2d.cgmms.%.2d.inverted", SourceInfo.basename, SourceInfo.nstore, SourceInfo.t, SourceInfo.ix, im+1);
          } else {
            sprintf(filename, "%s.%.4d.%.2d.cgmms.%.2d.inverted", SourceInfo.basename, SourceInfo.nstore, SourceInfo.t, im+1);
          }
        }
        else {
          sprintf(filename, "%s.%.4d.%.5d.cgmms.%.2d.0", SourceInfo.basename, SourceInfo.nstore, SourceInfo.sample, im+1);
        }
        if(g_kappa != 0) {
          mul_r(temp_save, (2*g_kappa)*(2*g_kappa), temp_save, N);
        }

        append = !PropInfo.splitted;

        construct_writer(&writer, filename, append);

        if (PropInfo.splitted || SourceInfo.ix == index_start) {
          //Create the inverter info NOTE: always set to TWILSON=12 and 1 flavour (to be adjusted)
          inverterInfo = construct_paramsInverterInfo(err, iteration+1, 12, 1);
          if (im == -1) {
            inverterInfo->cgmms_mass = inverterInfo->mu;
          } else {
            inverterInfo->cgmms_mass = g_extra_masses[im]/(2 * inverterInfo->kappa);
          }
          write_spinor_info(writer, PropInfo.format, inverterInfo, append);
          //Create the propagatorFormat NOTE: always set to 1 flavour (to be adjusted)
          propagatorFormat = construct_paramsPropagatorFormat(cg_mms_default_precision, 1);
          write_propagator_format(writer, propagatorFormat);
          free(inverterInfo);
          free(propagatorFormat);
        }
        convert_lexic_to_eo(solver_field[2], solver_field[1], temp_save);
        write_spinor(writer, &solver_field[2], &solver_field[1], 1, 32);
        destruct_writer(writer);
      }
      finalize_solver(solver_field, nr_sf);
      return(iteration+1);
    }

    /* Compute beta_cg(i+1) = (r(i+1),r(i+1))/(r(i),r(i))
       Compute p(i+1) = r(i+1) + beta(i+1)*p(i)  */
    beta_cg = err/normsq;
    assign_mul_add_r(solver_field[2], beta_cg, solver_field[1], N);
    normsq = err;

    /* Compute betas(i+1) = beta_cg(i)*(zita(i+1)*alphas(i))/(zita(i)*alpha_cg(i))
       Compute ps(i+1) = zita(i+1)*r(i+1) + betas(i+1)*ps(i)  */
    for(im = 0; im < g_no_extra_masses; im++) {
      betas[im] = beta_cg*zita[im]*alphas[im]/(zitam1[im]*alpha_cg);
      assign_mul_add_mul_r(ps_mms_solver[im], solver_field[1], betas[im], zita[im], N);
    }
  }
  assign(P, solver_field[0], N);
  g_sloppy_precision = 0;
  finalize_solver(solver_field, nr_sf);
  return(-1);
}
예제 #9
0
파일: cr.c 프로젝트: Finkenrath/tmLQCD
int cr(spinor * const P, spinor * const Q, 
        const int m, const int max_restarts,
        const double eps_sq, const int rel_prec,
        const int N, const int precon, matrix_mult f) {

    int k, l, restart, i, iter = 0;
    double norm_sq, err;
    spinor * xi, * Axi, * chi, * Achi, *tmp;
    _Complex double alpha, beta;
    static _Complex double one = 1.0;
    double norm, rAr, newrAr;
    double atime, etime;
    spinor ** solver_field = NULL;
    const int nr_sf = 5;
    int save_sloppy = g_sloppy_precision;

    if(N == VOLUME) {
        init_solver_field(&solver_field, VOLUMEPLUSRAND, nr_sf);
    }
    else {
        init_solver_field(&solver_field, VOLUMEPLUSRAND/2, nr_sf);
    }

    atime = gettime();

    xi = solver_field[0];
    Axi = solver_field[1];
    chi = solver_field[2];
    Achi = solver_field[3];
    tmp = solver_field[4];

    norm_sq = square_norm(Q, N, 1);
    if(norm_sq < 1.e-32) {
        norm_sq = 1.;
    }

    dfl_sloppy_prec = 0;
    f(tmp, P);
    diff(chi, Q, tmp, N);
    assign(xi, chi, N);
    f(Axi, xi);
    f(Achi, chi);
    rAr = scalar_prod(chi, Achi, N, 1);
    err = square_norm(chi, N, 1);
    if(((err <= eps_sq) && (rel_prec == 0)) || ((err <= eps_sq*norm_sq) && (rel_prec == 1))) {
        finalize_solver(solver_field, nr_sf);
        return(iter);
    }
    

    for(k = 0; k < m; k++) {

        dfl_sloppy_prec = 1;

        norm = square_norm(Axi, N, 1);
        alpha = rAr/norm;
        assign_add_mul(P, xi, alpha, N);
        /* get the new residual */
        assign_diff_mul(chi, Axi, alpha, N);

        err = square_norm(chi, N, 1);
        iter ++;
        etime = gettime();
        if(g_proc_id == g_stdio_proc && g_debug_level > 3){
            printf("# CR: %d\t%g iterated residue, time spent %f s\n", iter, err, (etime - atime)); 
            fflush(stdout);
        }
        /* Precision reached? */
        if((k == m-1) || ((err <= eps_sq) && (rel_prec == 0)) || ((err <= eps_sq*norm_sq) && (rel_prec == 1))) {
            break;
        }

#ifdef _USE_HALFSPINOR
        if(((err*err <= eps_sq) && (rel_prec == 0)) || ((err*err <= eps_sq*norm_sq) && (rel_prec == 1))) {
            if (g_sloppy_precision_flag == 1) {
                g_sloppy_precision = 1;
                if(g_debug_level > 2 && g_proc_id == g_stdio_proc) {
                    printf("sloppy precision on\n"); fflush( stdout);
                }
            }
        }
#endif

        f(Achi, chi); 

        newrAr = scalar_prod(chi, Achi, N, 1); 
        beta = newrAr/rAr;
        assign_mul_add_mul(xi, beta, chi, one, N);
        assign_mul_add_mul(Axi,beta, Achi, one, N);
        rAr = newrAr;

    }

    g_sloppy_precision = save_sloppy;
    finalize_solver(solver_field, nr_sf);
    return(-1);
}
예제 #10
0
파일: linsolve.c 프로젝트: annube/tmLQCD
/*lambda: largest eigenvalue, k eigenvector */
int evamax(double *rz, int k, double q_off, double eps_sq) {
  static double ritz,norm0,normg,normg0,beta_cg;
  static double costh,sinth,cosd,sind,aaa,normp,xxx;
  static double xs1,xs2,xs3;
  int iteration;
  /* Initialize k to be gaussian */
  random_spinor_field(g_spinor_field[k], VOLUME/2);
  norm0=square_norm(g_spinor_field[k], VOLUME/2, 1); 
  /*normalize k */
  assign_mul_bra_add_mul_r( g_spinor_field[k], 1./sqrt(norm0),0., g_spinor_field[k], VOLUME/2);
  Q_psi(DUM_SOLVER,k,q_off);
  Q_psi(DUM_SOLVER,DUM_SOLVER,q_off);
  /*compute the ritz functional */
  /*put g on DUM_SOLVER+2 and p on DUM_SOLVER+1*/
  ritz=scalar_prod_r(g_spinor_field[DUM_SOLVER], g_spinor_field[k], VOLUME/2, 1); 
  zero_spinor_field(g_spinor_field[DUM_SOLVER+2],VOLUME/2);
  assign_add_mul_r_add_mul(g_spinor_field[DUM_SOLVER+2], g_spinor_field[DUM_SOLVER], g_spinor_field[k],
			   1., -ritz, VOLUME/2);
  assign(g_spinor_field[DUM_SOLVER+1], g_spinor_field[DUM_SOLVER+2], VOLUME/2);
  normg0=square_norm(g_spinor_field[DUM_SOLVER+2], VOLUME/2, 1);
  
  /* main loop */
  for(iteration=1;iteration<=ITER_MAX_BCG;iteration++) {
    if(normg0 <= eps_sq) break;
    Q_psi(DUM_SOLVER+2,DUM_SOLVER+1,q_off);
    Q_psi(DUM_SOLVER+2,DUM_SOLVER+2,q_off);
    /*   compute costh and sinth */
    normp=square_norm(g_spinor_field[DUM_SOLVER+1], VOLUME/2, 1);
    xxx=scalar_prod_r(g_spinor_field[DUM_SOLVER+2], g_spinor_field[DUM_SOLVER+1], VOLUME/2, 1);
    
    xs1=0.5*(ritz+xxx/normp);
    xs2=0.5*(ritz-xxx/normp);
    normp=sqrt(normp);
    xs3=normg0/normp;
    aaa=sqrt(xs2*xs2+xs3*xs3);
    cosd=xs2/aaa;
    sind=xs3/aaa;
    
    if(cosd>=0.) { 
      costh=sqrt(0.5*(1.+cosd));
      sinth=0.5*sind/costh;
    }
    else {
      sinth=sqrt(0.5*(1.-cosd));
      costh=0.5*sind/sinth;
    } 
    ritz=xs1+aaa;
    
    assign_add_mul_r_add_mul(g_spinor_field[k], g_spinor_field[k], g_spinor_field[DUM_SOLVER+1], 
			     costh-1., sinth/normp, VOLUME/2);
    assign_add_mul_r_add_mul(g_spinor_field[DUM_SOLVER], g_spinor_field[DUM_SOLVER], g_spinor_field[DUM_SOLVER+2],
			     costh-1., sinth/normp, VOLUME/2);
    
    /*   compute g */
    zero_spinor_field(g_spinor_field[DUM_SOLVER+2],VOLUME/2);
    assign_add_mul_r_add_mul(g_spinor_field[DUM_SOLVER+2], g_spinor_field[DUM_SOLVER], g_spinor_field[k], 
			     1., -ritz, VOLUME/2);
    
    /*   calculate the norm of g' and beta_cg=costh g'^2/g^2 */
    normg=square_norm(g_spinor_field[DUM_SOLVER+2], VOLUME/2, 1);
    beta_cg=costh*normg/normg0;
    if(beta_cg*costh*normp>20.*sqrt(normg))  beta_cg=0.;
    normg0=normg;    
    /*   compute the new value of p */
    assign_add_mul_r(g_spinor_field[DUM_SOLVER+1], g_spinor_field[k], -scalar_prod_r(g_spinor_field[k], g_spinor_field[DUM_SOLVER+1], VOLUME/2), VOLUME/2, 1);
    assign_mul_add_r(g_spinor_field[DUM_SOLVER+1],beta_cg, g_spinor_field[DUM_SOLVER+2], VOLUME/2);
    /*   restore the state of the iteration */
    if(iteration%20==0) {
      /* readjust x */
      xxx=sqrt(square_norm(g_spinor_field[k], VOLUME/2), 1);
      assign_mul_bra_add_mul_r( g_spinor_field[k], 1./xxx,0., g_spinor_field[k], VOLUME/2);
      Q_psi(DUM_SOLVER,k,q_off);
      Q_psi(DUM_SOLVER,DUM_SOLVER,q_off);
      /*compute the ritz functional */
      ritz=scalar_prod_r(g_spinor_field[DUM_SOLVER], g_spinor_field[k], VOLUME/2, 1);
      /*put g on DUM_SOLVER+2 and p on DUM_SOLVER+1*/
      zero_spinor_field(g_spinor_field[DUM_SOLVER+2],VOLUME/2);
      assign_add_mul_r_add_mul(g_spinor_field[DUM_SOLVER+2], g_spinor_field[DUM_SOLVER], g_spinor_field[k],
			       1., -ritz, VOLUME/2);
      normg0=square_norm(g_spinor_field[DUM_SOLVER+2], VOLUME/2, 1);
      /*subtract a linear combination of x and g from p to 
	insure (x,p)=0 and (p,g)=(g,g) */
      cosd=scalar_prod_r(g_spinor_field[k], g_spinor_field[DUM_SOLVER+1], VOLUME/2, 1);
      assign_add_mul_r(g_spinor_field[DUM_SOLVER+1], g_spinor_field[k], -cosd, VOLUME/2);
      cosd=scalar_prod_r(g_spinor_field[DUM_SOLVER+1], g_spinor_field[DUM_SOLVER+2], VOLUME/2, 1)-normg0;
      assign_add_mul_r(g_spinor_field[DUM_SOLVER+1], g_spinor_field[DUM_SOLVER+2], -cosd/sqrt(normg0), VOLUME/2);
    }
  }
  *rz=ritz;
  return iteration;
}
예제 #11
0
void op_invert(const int op_id, const int index_start, const int write_prop) {
  operator * optr = &operator_list[op_id];
  double atime = 0., etime = 0., nrm1 = 0., nrm2 = 0.;
  int i;
  optr->iterations = 0;
  optr->reached_prec = -1.;
  g_kappa = optr->kappa;
  boundary(g_kappa);

  atime = gettime();
  if(optr->type == TMWILSON || optr->type == WILSON || optr->type == CLOVER) {
    g_mu = optr->mu;
    g_c_sw = optr->c_sw;
    if(optr->type == CLOVER) {
      if (g_cart_id == 0 && g_debug_level > 1) {
	printf("#\n# csw = %e, computing clover leafs\n", g_c_sw);
      }
      init_sw_fields(VOLUME);
      sw_term( (const su3**) g_gauge_field, optr->kappa, optr->c_sw); 
      /* this must be EE here!   */
      /* to match clover_inv in Qsw_psi */
      sw_invert(EE, optr->mu);
    }

    for(i = 0; i < 2; i++) {
      if (g_cart_id == 0) {
        printf("#\n# 2 kappa mu = %e, kappa = %e, c_sw = %e\n", g_mu, g_kappa, g_c_sw);
      }
      if(optr->type != CLOVER) {
	if(use_preconditioning){
	  g_precWS=(void*)optr->precWS;
	}
	else {
	  g_precWS=NULL;
	}
	
	optr->iterations = invert_eo( optr->prop0, optr->prop1, optr->sr0, optr->sr1,
				      optr->eps_sq, optr->maxiter,
				      optr->solver, optr->rel_prec,
				      0, optr->even_odd_flag,optr->no_extra_masses, optr->extra_masses, optr->id );
	
	/* check result */
	M_full(g_spinor_field[4], g_spinor_field[5], optr->prop0, optr->prop1);
      }
      else {
	optr->iterations = invert_clover_eo(optr->prop0, optr->prop1, optr->sr0, optr->sr1,
					    optr->eps_sq, optr->maxiter,
					    optr->solver, optr->rel_prec,
					    &g_gauge_field, &Qsw_pm_psi, &Qsw_minus_psi);
	/* check result */
 	Msw_full(g_spinor_field[4], g_spinor_field[5], optr->prop0, optr->prop1);
      }

      diff(g_spinor_field[4], g_spinor_field[4], optr->sr0, VOLUME / 2);
      diff(g_spinor_field[5], g_spinor_field[5], optr->sr1, VOLUME / 2);

      nrm1 = square_norm(g_spinor_field[4], VOLUME / 2, 1);
      nrm2 = square_norm(g_spinor_field[5], VOLUME / 2, 1);
      optr->reached_prec = nrm1 + nrm2;

      /* convert to standard normalisation  */
      /* we have to mult. by 2*kappa        */
      if (optr->kappa != 0.) {
        mul_r(optr->prop0, (2*optr->kappa), optr->prop0, VOLUME / 2);
        mul_r(optr->prop1, (2*optr->kappa), optr->prop1, VOLUME / 2);
      }
      if (optr->solver != CGMMS && write_prop) /* CGMMS handles its own I/O */
        optr->write_prop(op_id, index_start, i);
      if(optr->DownProp) {
        optr->mu = -optr->mu;
      } else 
        break;
    }
  }
  else if(optr->type == DBTMWILSON || optr->type == DBCLOVER) {
    g_mubar = optr->mubar;
    g_epsbar = optr->epsbar;
    g_c_sw = 0.;
    if(optr->type == DBCLOVER) {
      g_c_sw = optr->c_sw;
      if (g_cart_id == 0 && g_debug_level > 1) {
	printf("#\n# csw = %e, computing clover leafs\n", g_c_sw);
      }
      init_sw_fields(VOLUME);
      sw_term( (const su3**) g_gauge_field, optr->kappa, optr->c_sw); 
      sw_invert_nd(optr->mubar*optr->mubar-optr->epsbar*optr->epsbar);
    }

    for(i = 0; i < SourceInfo.no_flavours; i++) {
      if(optr->type != DBCLOVER) {
	optr->iterations = invert_doublet_eo( optr->prop0, optr->prop1, optr->prop2, optr->prop3, 
					      optr->sr0, optr->sr1, optr->sr2, optr->sr3,
					      optr->eps_sq, optr->maxiter,
					      optr->solver, optr->rel_prec);
      }
      else {
	optr->iterations = invert_cloverdoublet_eo( optr->prop0, optr->prop1, optr->prop2, optr->prop3, 
						    optr->sr0, optr->sr1, optr->sr2, optr->sr3,
						    optr->eps_sq, optr->maxiter,
						    optr->solver, optr->rel_prec);
      }
      g_mu = optr->mubar;
      if(optr->type != DBCLOVER) {
	M_full(g_spinor_field[DUM_DERI+1], g_spinor_field[DUM_DERI+2], optr->prop0, optr->prop1); 
      }
      else {
	Msw_full(g_spinor_field[DUM_DERI+1], g_spinor_field[DUM_DERI+2], optr->prop0, optr->prop1); 
      }
      assign_add_mul_r(g_spinor_field[DUM_DERI+1], optr->prop2, -optr->epsbar, VOLUME/2);
      assign_add_mul_r(g_spinor_field[DUM_DERI+2], optr->prop3, -optr->epsbar, VOLUME/2);

      g_mu = -g_mu;
      if(optr->type != DBCLOVER) {
	M_full(g_spinor_field[DUM_DERI+3], g_spinor_field[DUM_DERI+4], optr->prop2, optr->prop3); 
      }
      else {
	Msw_full(g_spinor_field[DUM_DERI+3], g_spinor_field[DUM_DERI+4], optr->prop2, optr->prop3);
      }
      assign_add_mul_r(g_spinor_field[DUM_DERI+3], optr->prop0, -optr->epsbar, VOLUME/2);
      assign_add_mul_r(g_spinor_field[DUM_DERI+4], optr->prop1, -optr->epsbar, VOLUME/2);

      diff(g_spinor_field[DUM_DERI+1], g_spinor_field[DUM_DERI+1], optr->sr0, VOLUME/2); 
      diff(g_spinor_field[DUM_DERI+2], g_spinor_field[DUM_DERI+2], optr->sr1, VOLUME/2); 
      diff(g_spinor_field[DUM_DERI+3], g_spinor_field[DUM_DERI+3], optr->sr2, VOLUME/2); 
      diff(g_spinor_field[DUM_DERI+4], g_spinor_field[DUM_DERI+4], optr->sr3, VOLUME/2); 

      nrm1  = square_norm(g_spinor_field[DUM_DERI+1], VOLUME/2, 1); 
      nrm1 += square_norm(g_spinor_field[DUM_DERI+2], VOLUME/2, 1); 
      nrm1 += square_norm(g_spinor_field[DUM_DERI+3], VOLUME/2, 1); 
      nrm1 += square_norm(g_spinor_field[DUM_DERI+4], VOLUME/2, 1); 
      optr->reached_prec = nrm1;
      g_mu = g_mu1;
      /* For standard normalisation */
      /* we have to mult. by 2*kappa */
      mul_r(g_spinor_field[DUM_DERI], (2*optr->kappa), optr->prop0, VOLUME/2);
      mul_r(g_spinor_field[DUM_DERI+1], (2*optr->kappa), optr->prop1, VOLUME/2);
      mul_r(g_spinor_field[DUM_DERI+2], (2*optr->kappa), optr->prop2, VOLUME/2);
      mul_r(g_spinor_field[DUM_DERI+3], (2*optr->kappa), optr->prop3, VOLUME/2);
      /* the final result should be stored in the convention used in */
      /* hep-lat/0606011                                             */
      /* this requires multiplication of source with                 */
      /* (1+itau_2)/sqrt(2) and the result with (1-itau_2)/sqrt(2)   */

      mul_one_pm_itau2(optr->prop0, optr->prop2, g_spinor_field[DUM_DERI], 
                       g_spinor_field[DUM_DERI+2], -1., VOLUME/2);
      mul_one_pm_itau2(optr->prop1, optr->prop3, g_spinor_field[DUM_DERI+1], 
                       g_spinor_field[DUM_DERI+3], -1., VOLUME/2);
      /* write propagator */
      if(write_prop) optr->write_prop(op_id, index_start, i);

      mul_r(optr->prop0, 1./(2*optr->kappa), g_spinor_field[DUM_DERI], VOLUME/2);
      mul_r(optr->prop1, 1./(2*optr->kappa), g_spinor_field[DUM_DERI+1], VOLUME/2);
      mul_r(optr->prop2, 1./(2*optr->kappa), g_spinor_field[DUM_DERI+2], VOLUME/2);
      mul_r(optr->prop3, 1./(2*optr->kappa), g_spinor_field[DUM_DERI+3], VOLUME/2);

      /* mirror source, but not for volume sources */
      if(i == 0 && SourceInfo.no_flavours == 2 && SourceInfo.type != 1) {
        if (g_cart_id == 0) {
          fprintf(stdout, "# Inversion done in %d iterations, squared residue = %e!\n",
                  optr->iterations, optr->reached_prec);
        }
        mul_one_pm_itau2(g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+2], optr->sr0, optr->sr2, -1., VOLUME/2);
        mul_one_pm_itau2(g_spinor_field[DUM_DERI+1], g_spinor_field[DUM_DERI+3], optr->sr1, optr->sr3, -1., VOLUME/2);

        mul_one_pm_itau2(optr->sr0, optr->sr2, g_spinor_field[DUM_DERI+2], g_spinor_field[DUM_DERI], +1., VOLUME/2);
        mul_one_pm_itau2(optr->sr1, optr->sr3, g_spinor_field[DUM_DERI+3], g_spinor_field[DUM_DERI+1], +1., VOLUME/2);

      }
      /* volume sources need only one inversion */
      else if(SourceInfo.type == 1) i++;
    }
  }
  else if(optr->type == OVERLAP) {
    g_mu = 0.;
    m_ov=optr->m;
    eigenvalues(&optr->no_ev, 5000, optr->ev_prec, 0, optr->ev_readwrite, nstore, optr->even_odd_flag);
/*     ov_check_locality(); */
/*      index_jd(&optr->no_ev_index, 5000, 1.e-12, optr->conf_input, nstore, 4); */
    ov_n_cheby=optr->deg_poly;

    if(use_preconditioning==1)
      g_precWS=(void*)optr->precWS;
    else
      g_precWS=NULL;


    if(g_debug_level > 3) ov_check_ginsparg_wilson_relation_strong(); 

    invert_overlap(op_id, index_start); 

    if(write_prop) optr->write_prop(op_id, index_start, 0);
  }
  etime = gettime();
  if (g_cart_id == 0 && g_debug_level > 0) {
    fprintf(stdout, "# Inversion done in %d iterations, squared residue = %e!\n",
            optr->iterations, optr->reached_prec);
    fprintf(stdout, "# Inversion done in %1.2e sec. \n", etime - atime);
  }
  return;
}
예제 #12
0
/* P output = solution , Q input = source */
int mixed_cg_her(spinor * const P, spinor * const Q, solver_params_t solver_params, 
                 const int max_iter, double eps_sq, const int rel_prec, const int N,
                 matrix_mult f, matrix_mult32 f32) {

  int i = 0, iter = 0, j = 0;
  float sqnrm = 0., sqnrm2, squarenorm;
  float pro, err, alpha_cg, beta_cg;
  double sourcesquarenorm, sqnrm_d, squarenorm_d;
  spinor *delta, *y, *xhigh;
  spinor32 *x, *stmp;
  spinor ** solver_field = NULL;
  spinor32 ** solver_field32 = NULL;  
  const int nr_sf = 3;
  const int nr_sf32 = 4;

  int max_inner_it = mixcg_maxinnersolverit;
  int N_outer = max_iter/max_inner_it;
  //to be on the save side we allow at least 10 outer iterations
  if(N_outer < 10) N_outer = 10;
  
  int save_sloppy = g_sloppy_precision_flag;
  double atime, etime, flops;
  
  if(N == VOLUME) {
    init_solver_field(&solver_field, VOLUMEPLUSRAND, nr_sf);    
    init_solver_field_32(&solver_field32, VOLUMEPLUSRAND, nr_sf32);
  }
  else {
    init_solver_field(&solver_field, VOLUMEPLUSRAND/2, nr_sf);
    init_solver_field_32(&solver_field32, VOLUMEPLUSRAND/2, nr_sf32);    
  }

  squarenorm_d = square_norm(Q, N, 1);
  sourcesquarenorm = squarenorm_d;
  sqnrm_d = squarenorm_d;
 
  delta = solver_field[0];
  y = solver_field[1];
  xhigh = solver_field[2];
  x = solver_field32[3];   
  assign(delta, Q, N);
  
  //set solution to zero
  zero_spinor_field(P, N);
  
  atime = gettime();
  for(i = 0; i < N_outer; i++) {

    /* main CG loop in lower precision */
    zero_spinor_field_32(x, N);
    zero_spinor_field_32(solver_field32[0], N);   
    assign_to_32(solver_field32[1], delta, N);
    assign_to_32(solver_field32[2], delta, N);
    
    sqnrm = (float) sqnrm_d;
    sqnrm2 = sqnrm;
    
    /*inner CG loop */
    for(j = 0; j <= max_inner_it; j++) {
      
      f32(solver_field32[0], solver_field32[2]); 
      pro = scalar_prod_r_32(solver_field32[2], solver_field32[0], N, 1);
      alpha_cg = sqnrm2 / pro;
      
      assign_add_mul_r_32(x, solver_field32[2], alpha_cg, N);
      
      assign_mul_add_r_32(solver_field32[0], -alpha_cg, solver_field32[1], N);      
      
      err = square_norm_32(solver_field32[0], N, 1);

      if(g_proc_id == g_stdio_proc && g_debug_level > 2) {
	      printf("inner CG: %d res^2 %g\n", iter+j, err);
	      fflush(stdout);
      }
    
      //if (((err <= eps_sq) && (rel_prec == 0)) || ((err <= eps_sq*squarenorm) && (rel_prec == 1))){
      if((err <= mixcg_innereps*sqnrm)|| (j==max_inner_it) ||  ((1.3*err <= eps_sq) && (rel_prec == 0)) || ((1.3*err <= eps_sq*sourcesquarenorm) && (rel_prec == 1))) {
	      break;
      }
      beta_cg = err / sqnrm2;
      assign_mul_add_r_32(solver_field32[2], beta_cg, solver_field32[0], N);
      stmp = solver_field32[0];
      solver_field32[0] = solver_field32[1];
      solver_field32[1] = stmp;
      sqnrm2 = err;
    }
    /* end inner CG loop */
    iter += j;

    /* we want to apply a true double matrix with f(y,P) -> set sloppy off here*/
    g_sloppy_precision_flag = 0;
    
    /* calculate defect in double precision */
    assign_to_64(xhigh, x, N);    
    add(P, P, xhigh, N);
    f(y, P);
    diff(delta, Q, y, N);
    sqnrm_d = square_norm(delta, N, 1);
    if(g_debug_level > 2 && g_proc_id == 0) {
      printf("mixed CG: last inner residue: %g\t\n", err);
      printf("mixed CG: true residue %d %g\t\n",iter, sqnrm_d); fflush(stdout);
    }
    
    /* here we can reset it to its initial value*/
    g_sloppy_precision_flag = save_sloppy;
    
    if(((sqnrm_d <= eps_sq) && (rel_prec == 0)) || ((sqnrm_d <= eps_sq*sourcesquarenorm) && (rel_prec == 1))) {
      etime = gettime();     

      if(g_debug_level > 0 && g_proc_id == 0) {
      	if(N != VOLUME){
      	  /* 2 A + 2 Nc Ns + N_Count ( 2 A + 10 Nc Ns ) */
      	  /* 2*1608.0 because the linalg is over VOLUME/2 */
      	  flops = (2*(2*1608.0+2*3*4) + 2*3*4 + iter*(2.*(2*1608.0+2*3*4) + 10*3*4))*N/1.0e6f;
      	  printf("# mixed CG: iter: %d eps_sq: %1.4e t/s: %1.4e\n", iter, eps_sq, etime-atime); 
      	  printf("# mixed CG: flopcount (for e/o tmWilson only): t/s: %1.4e mflops_local: %.1f mflops: %.1f\n", 
      	      etime-atime, flops/(etime-atime), g_nproc*flops/(etime-atime));
      	}
      	else{
      	  /* 2 A + 2 Nc Ns + N_Count ( 2 A + 10 Nc Ns ) */
      	  flops = (2*(1608.0+2*3*4) + 2*3*4 + iter*(2.*(1608.0+2*3*4) + 10*3*4))*N/1.0e6f;      
      	  printf("# mixed CG: iter: %d eps_sq: %1.4e t/s: %1.4e\n", iter, eps_sq, etime-atime); 
      	  printf("# mixed CG: flopcount (for non-e/o tmWilson only): t/s: %1.4e mflops_local: %.1f mflops: %.1f\n", 
      	      etime-atime, flops/(etime-atime), g_nproc*flops/(etime-atime));      
      	}
      }      
      
      finalize_solver(solver_field, nr_sf);
      finalize_solver_32(solver_field32, nr_sf32); 
      return(iter+i);
    }
    iter++;
  }
  finalize_solver(solver_field, nr_sf);
  finalize_solver_32(solver_field32, nr_sf32); 
  return(-1);
}
예제 #13
0
파일: gcr.c 프로젝트: annube/tmLQCD
int gcr(spinor * const P, spinor * const Q, 
	const int m, const int max_restarts,
	const double eps_sq, const int rel_prec,
	const int N, const int precon, matrix_mult f) {

  int k, l, restart, i, iter = 0;
  double norm_sq, err;
  spinor * rho, * tmp;
  complex ctmp;
  spinor ** solver_field = NULL;
  const int nr_sf = 2;

  if(N == VOLUME) {
    init_solver_field(&solver_field, VOLUMEPLUSRAND, nr_sf);
  }
  else {
    init_solver_field(&solver_field, VOLUMEPLUSRAND/2, nr_sf);
  }

  rho = solver_field[0];
  tmp = solver_field[1];

  init_gcr(m, N+RAND);

  norm_sq = square_norm(Q, N, 1);
  if(norm_sq < 1.e-32) {
    norm_sq = 1.;
  }
  
  for(restart = 0; restart < max_restarts; restart++) {
    dfl_sloppy_prec = 0;
    f(tmp, P);
    diff(rho, Q, tmp, N);
    err = square_norm(rho, N, 1);
    if(g_proc_id == g_stdio_proc && g_debug_level > 2){
      printf("GCR: iteration number: %d, true residue: %g\n", iter, err); 
      fflush(stdout);
    }
    if(((err <= eps_sq) && (rel_prec == 0)) || ((err <= eps_sq*norm_sq) && (rel_prec == 1))) {
      finalize_solver(solver_field, nr_sf);
      return(iter);
    }
    for(k = 0; k < m; k++) {
      
      if(precon == 0) {
	assign(xi[k], rho, N);
      }
      else {
        zero_spinor_field(xi[k], N);  
        Msap_eo(xi[k], rho, 6);   
 	/* Msap(xi[k], rho, 8); */
      }
	  
      dfl_sloppy_prec = 1;
      dfl_little_D_prec = 1.e-12;
      f(tmp, xi[k]); 
	  
      /* tmp will become chi[k] */
      for(l = 0; l < k; l++) {
        a[l][k] = scalar_prod(chi[l], tmp, N, 1);
        assign_diff_mul(tmp, chi[l], a[l][k], N);
      }
      b[k] = sqrt(square_norm(tmp, N, 1));
      mul_r(chi[k], 1./b[k], tmp, N);
      c[k] = scalar_prod(chi[k], rho, N, 1);
      assign_diff_mul(rho, chi[k], c[k], N);
      err = square_norm(rho, N, 1);
      iter ++;
      if(g_proc_id == g_stdio_proc && g_debug_level > 0){
        if(rel_prec == 1) printf("# GCR: %d\t%g >= %g iterated residue\n", iter, err, eps_sq*norm_sq); 
        else printf("# GCR: %d\t%g >= %giterated residue\n", iter, err, eps_sq);
        fflush(stdout);
      }
      /* Precision reached? */
      if((k == m-1) || ((err <= eps_sq) && (rel_prec == 0)) || ((err <= eps_sq*norm_sq) && (rel_prec == 1))) {
        break;
      }
    }

    /* prepare for restart */
    _mult_real(c[k], c[k], 1./b[k]);
    assign_add_mul(P, xi[k], c[k], N);
    for(l = k-1; l >= 0; l--) {
      for(i = l+1; i <= k; i++) {
        _mult_assign_complex(ctmp, a[l][i], c[i]);
        /* c[l] -= ctmp */
        _diff_complex(c[l], ctmp);
      }
      _mult_real(c[l], c[l], 1./b[l]);
      assign_add_mul(P, xi[l], c[l], N);
    }
  }
  finalize_solver(solver_field, nr_sf);
  return(-1);
}
예제 #14
0
int incr_eigcg(const int N, const int nrhs,  const int nrhs1, spinor * const x, spinor * const b, 
               const int ldh, matrix_mult f, const double eps_sq1, const double eps_sq, double restart_eps_sq,  
               const int rand_guess_opt, const int rel_prec, const int maxit, int nev, const int v_max) 
{ 
  /*Static variables and arrays.*/
  static spinor **solver_field; /*4 spinor fields*/

  static int ncurEvals=0;       /* current number of stored eigenvectors */
  static int ncurRHS=0;         /* current number of the system being solved */                   

  static spinor **evecs;        /* accumulated eigenvectors for deflation. */

  static void *_evals;
  static double *evals;         /* Ritz values */

  static void *_v;
  static spinor  *V;            /* work array for eigenvector search basis in eigCG */

  static void *_h;
  static _Complex double  *H;            /* The ncurEvals^2 matrix: H=evecs'*A*evecs */ 

  static void *_hu;
  static _Complex double  *HU;           /* used for diagonalization of H if eigenvalues requested
                                   also used as a copy of H if needed*/
  static void *_initwork;                            
  static _Complex double  *initwork;     /* vector of size ldh using with init-CG */ 

  static void *_ework;
  static _Complex double  *ework;
  /* end of the thinking part */

  static void *_work;
  static _Complex double  *work;

  static void *_rwork;
  static double *rwork;
  
  static void *_IPIV;
  static int *IPIV;        /*integer array to store permutations when solving the small linear system*/

  /* some constants */
  char cU='U'; char cN='N';  char cV='V'; 
  _Complex double  tpone= 1.0e+00;
  _Complex double  tzero= 0.0e+00;
  //tpone.re=+1.0e+00; tpone.im=0.0e+00; 
  //tzero.re=+0.0e+00; tzero.im=0.0e+00;

  /* Timing vars */
  double wt1,wt2,wE,wI;

  double eps_sq_used;

 
  /* Variables */
  double machEps = 1e-15;  
  double normb, normsq, tmpd,tmpd2;
  _Complex double  tempz;
  int i,j, ONE = 1;
  int tmpsize,tmpi,info=0;
  int numIts, flag, nAdded, nev_used;
  int maxit_remain;
  int esize,nrsf;

  int parallel; /* for parallel processing of the scalar products */

  /* leading dimension for spinor vectors */
  int LDN;
  if(N==VOLUME)
     LDN = VOLUMEPLUSRAND;
  else
     LDN = VOLUMEPLUSRAND/2;
  
 
  #ifdef MPI
    parallel=1;
  #else
    parallel=0;
  #endif
 
  /*think more about this */
  esize=2*12*N+4*nev*nev;  /* fixed size for ework used for restarting in eigcg*/

  nrsf=4;  /*number of solver fields */
  
  int lwork=3*ldh;

  double cur_res; //current residual squared (initial value will be computed in eigcg)

  /*increment the RHS counter*/
  ncurRHS = ncurRHS +1; 

  //set the tolerance to be used for this right-hand side 
  if(ncurRHS > nrhs1){
    eps_sq_used = eps_sq;
  }
  else{
    eps_sq_used = eps_sq1;
  }

  if(ncurRHS==1)/* If this is the first system, allocate needed memory for the solver*/
  {
    init_solver_field(&solver_field, LDN, nrsf); 
  }

  if(nev==0){ /*incremental eigcg is used as a cg solver. No need to restart forcing no-restart*/
    if(g_proc_id == g_stdio_proc && g_debug_level > 0) {
       fprintf(stdout, "CG won't be restarted in this mode since no deflation will take place (nev=0)\n"); 
       fflush(stdout);
    } 
  
    restart_eps_sq=0.0;
  }




  if((ncurRHS==1) && (nev >0) )/* If this is the first right-hand side and eigenvectors are needed, allocate needed memory*/
  { 
    init_solver_field(&evecs, LDN, ldh); 
     
    #if (defined SSE || defined SSE2 || defined SSE3)

    /*Extra elements are needed for allignment */
    //_v = malloc(LDN*v_max*sizeof(spinor)+ALIGN_BASE);
    _v = calloc(LDN*v_max+ALIGN_BASE,sizeof(spinor));
    V  = (spinor *)(((unsigned long int)(_v)+ALIGN_BASE)&~ALIGN_BASE);

    //_h=malloc(ldh*ldh*sizeof(_Complex double )+ALIGN_BASE);
    _h=calloc(ldh*ldh+ALIGN_BASE,sizeof(_Complex double ));
    H = (_Complex double  *)(((unsigned long int)(_h)+ALIGN_BASE)&~ALIGN_BASE);
 
    //_hu=malloc(ldh*ldh*sizeof(_Complex double )+ALIGN_BASE);
    _hu=calloc(ldh*ldh+ALIGN_BASE,sizeof(_Complex double ));
    HU = (_Complex double  *)(((unsigned long int)(_hu)+ALIGN_BASE)&~ALIGN_BASE);
    
    //_ework = malloc(esize*sizeof(_Complex double )+ALIGN_BASE);
    _ework = calloc(esize+ALIGN_BASE,sizeof(_Complex double ));
    ework=(_Complex double  *)(((unsigned long int)(_ework)+ALIGN_BASE)&~ALIGN_BASE);

    //_initwork = malloc(ldh*sizeof(_Complex double )+ALIGN_BASE);
    _initwork = calloc(ldh+ALIGN_BASE,sizeof(_Complex double ));
    initwork = (_Complex double  *)(((unsigned long int)(_initwork)+ALIGN_BASE)&~ALIGN_BASE);

    //_work = malloc(lwork*sizeof(_Complex double )+ALIGN_BASE);
    _work = calloc(lwork+ALIGN_BASE,sizeof(_Complex double ));
    work = (_Complex double  *)(((unsigned long int)(_work)+ALIGN_BASE)&~ALIGN_BASE);

    //_rwork = malloc(3*ldh*sizeof(double)+ALIGN_BASE);
    _rwork = calloc(3*ldh+ALIGN_BASE,sizeof(double));
    rwork = (double *)(((unsigned long int)(_rwork)+ALIGN_BASE)&~ALIGN_BASE);

    
    //_IPIV = malloc(ldh*sizeof(int)+ALIGN_BASE);
    _IPIV = calloc(ldh+ALIGN_BASE,sizeof(int));
    IPIV = (int *)(((unsigned long int)(_IPIV)+ALIGN_BASE)&~ALIGN_BASE);

    //_evals = malloc(ldh*sizeof(double)+ALIGN_BASE);
    _evals = calloc(ldh+ALIGN_BASE,sizeof(double)); 
    evals = (double *)(((unsigned long int)(_evals)+ALIGN_BASE)&~ALIGN_BASE);


    #else

    V = (spinor *) calloc(LDN*v_max,sizeof(spinor));
    H = calloc(ldh*ldh, sizeof(_Complex double ));
    HU= calloc(ldh*ldh, sizeof(_Complex double ));
    initwork = calloc(ldh, sizeof(_Complex double ));
    ework = calloc(esize, sizeof(_Complex double ));
    work = calloc(lwork,sizeof(_Complex double ));
    rwork= calloc(3*ldh,sizeof(double));
    IPIV = calloc(ldh, sizeof(int));
    evals = (double *) calloc(ldh, sizeof(double));

    #endif
    
  } /*if(ncurRHS==1)*/

  
  if(g_proc_id == g_stdio_proc && g_debug_level > 0) {
    fprintf(stdout, "System %d, eps_sq %e\n",ncurRHS,eps_sq_used); 
    fflush(stdout);
  } 
  
     /*---------------------------------------------------------------*/
     /* Call eigCG until this right-hand side converges               */
     /*---------------------------------------------------------------*/
  wE = 0.0; wI = 0.0;     /* Start accumulator timers */
  flag = -1;    	   /* First time through. Run eigCG regularly */
  maxit_remain = maxit;   /* Initialize Max and current # of iters   */
  numIts = 0;  

  while( flag == -1 || flag == 3)
  {
    //if(g_proc_id==g_stdio_proc)
      //printf("flag= %d, ncurEvals= %d\n",flag,ncurEvals);
    
    if(ncurEvals > 0)
    {
      /* --------------------------------------------------------- */
      /* Perform init-CG with evecs vectors                        */
      /* xinit = xinit + evecs*Hinv*evec'*(b-Ax0) 		     */
      /* --------------------------------------------------------- */

      wt1 = gettime();

      /*r0=b-Ax0*/
      normsq = square_norm(x,N,parallel);
      if(normsq>0.0)
      {
	f(solver_field[0],x); /* solver_field[0]= A*x */
	diff(solver_field[1],b,solver_field[0],N);  /* solver_filed[1]=b-A*x */		
      }
      else
	assign(solver_field[1],b,N); /* solver_field[1]=b */
	
      /* apply the deflation using init-CG */
      /* evecs'*(b-Ax) */
      for(i=0; i<ncurEvals; i++)
      {
        initwork[i]= scalar_prod(evecs[i],solver_field[1],N,parallel);
      }
    
      /* solve the linear system H y = c */
      tmpsize=ldh*ncurEvals;
      _FT(zcopy) (&tmpsize,H,&ONE,HU,&ONE); /* copy H into HU */
      _FT(zgesv) (&ncurEvals,&ONE,HU,&ldh,IPIV,initwork,&ldh,&info);

      if(info != 0)
      {
         if(g_proc_id == g_stdio_proc) {
            fprintf(stderr, "Error in ZGESV:, info =  %d\n",info); 
            fflush(stderr);
         }
         exit(1);
      }
    
      /* x = x + evecs*inv(H)*evecs'*r */
      for(i=0; i<ncurEvals; i++)
      {
        assign_add_mul(x,evecs[i],initwork[i],N);
      }
      
      /* compute elapsed time and add to accumulator */

      wt2 = gettime();
      
      wI = wI + wt2-wt1;
      
    }/* if(ncurEvals > 0) */


    /* ------------------------------------------------------------ */
    /* Adjust nev for eigcg according to available ldh/restart      */
    /* ------------------------------------------------------------ */
	  
    if (flag == 3) { /* restart with the same rhs, set nev_used = 0 */
      nev_used = 0;
      /* if convergence seems before next restart do not restart again */
      if(rel_prec)
      {
	       if (cur_res*(restart_eps_sq) < eps_sq*normb*normb) 
	           restart_eps_sq=0.0;
      }
      else
      {
	       if (cur_res*(restart_eps_sq) < eps_sq) 
	          restart_eps_sq=0.0;
      } /* if(rel_prec) */
	  
    }
    else
    {    
      /* First time through this rhs. Find nev evecs */
      /* limited by the ldh evecs we can store in total */
      if (ldh-ncurEvals < nev)
	       nev = ldh - ncurEvals;
      nev_used = nev;
      
    }

    /* ------------------------------------------------------------ */
    /* Solve Ax = b with x initial guess                            */
    /* ------------------------------------------------------------ */

    wt1 = gettime();

    eigcg( N, LDN, x, b, &normb, eps_sq_used, restart_eps_sq, rel_prec, maxit_remain, 
	     &numIts, &cur_res, &flag, solver_field, f, 
	     nev_used, v_max, V, esize, ework);
     
    //if(g_proc_id == g_stdio_proc) 
        //printf("eigcg flag= %d \n",flag); 
      
    wt2 = gettime();

    wE = wE + wt2-wt1;
    
    /* if flag == 3 update the remain max number of iterations */
    maxit_remain = maxit - numIts;
    
  }
  /* end while (flag ==-1 || flag == 3)               */
  /* ------------------------------------------------ */

  /* ---------- */
  /* Reporting  */
  /* ---------- */
  /* compute the exact residual */
  f(solver_field[0],x); /* solver_field[0]= A*x */
  diff(solver_field[1],b,solver_field[0],N);  /* solver_filed[1]=b-A*x */	
  normsq=square_norm(solver_field[1],N,parallel);
  if(g_debug_level > 0 && g_proc_id == g_stdio_proc)
  {
    fprintf(stdout, "For this rhs:\n");
    fprintf(stdout, "Total initCG Wallclock : %-f\n", wI);
    fprintf(stdout, "Total eigpcg Wallclock : %-f\n", wE);
    fprintf(stdout, "Iterations: %-d\n", numIts); 
    fprintf(stdout, "Residual: %e, Actual Resid of LinSys  : %e\n", cur_res,normsq);
    if (flag != 0) {
      fprintf(stderr, "Error: eigcg returned with nonzero exit status\n");
      return flag;
      fflush(stderr);
    }
    fflush(stdout);
  }
  /* ------------------------------------------------------------------- */
  /* ------------------------------------------------------------------- */
  /* Update the evecs and the factorization of evecs'*A*evecs            */
  /* ------------------------------------------------------------------- */
  if (nev > 0) 
  {

    wt1 = gettime();

    /* Append new Ritz vectors to the basis and orthogonalize them to evecs */
    for(i=0; i<nev_used; i++)
      assign(evecs[i+ncurEvals],&V[i*LDN],N);
    
    nAdded = ortho_new_vectors(evecs,N,ncurEvals,nev_used,machEps);

    /* expand H */
    for(j=ncurEvals; j< (ncurEvals+nAdded); j++)
    {
      f(solver_field[0],evecs[j]);
      
      for(i=0; i<=j; i++)
      {
	       H[i+j*ldh] = scalar_prod(evecs[i],solver_field[0],N,parallel);
	       H[j+i*ldh]=  conj(H[i+j*ldh]);
	       //H[j+i*ldh].re =  H[i+j*ldh].re;
	       //H[j+i*ldh].im = -H[i+j*ldh].im;
      }
      
    }
    
    /* update the number of vectors in the basis */
    ncurEvals = ncurEvals + nAdded;

    /* ---------- */
    /* Reporting  */
    /* ---------- */

    wt2 = gettime();

    
    if(g_proc_id == g_stdio_proc && g_debug_level > 0)
    {
      fprintf(stdout,"ncurRHS %d\n",ncurRHS);
      fprintf(stdout,"ncurEvals %d \n",ncurEvals);
      fprintf(stdout,"Update\n");
      fprintf(stdout,"Added %d vecs\n",nAdded);
      fprintf(stdout,"U Wallclock : %-f\n", wt2-wt1);
      fprintf(stdout,"Note: Update Wall time doesn't include time for computing eigenvalues and their residuals.\n"); 
      fflush(stdout);     
    }
    
    if(g_debug_level > 3)  /*compute eigenvalues and their residuals if requested*/
    {
      /* copy H into HU */
      tmpsize=ldh*ncurEvals;
      _FT(zcopy) (&tmpsize,H,&ONE,HU,&ONE);

      /* compute eigenvalues and eigenvectors of HU (using V and spinor fields as tmp work spaces)*/
      _FT(zheev)(&cV, &cU, &ncurEvals, HU, &ldh, evals, work, &lwork, rwork, &info,1,1);

      if(info != 0)
      {
	if(g_proc_id == g_stdio_proc) 
	{
	  fprintf(stderr,"Error in ZHEEV:, info =  %d\n",info); 
          fflush(stderr);
	}
	exit(1);
      }

      /* compute residuals and print out results */
      for(i=0; i<ncurEvals; i++)
      {
	    tmpi=12*N;
            tmpsize=12*LDN;

	    
            _FT(zgemv)(&cN,&tmpi,&ncurEvals,&tpone,(_Complex double  *)evecs[0],&tmpsize,
			                 &HU[i*ldh], &ONE,&tzero,(_Complex double  *) solver_field[0],&ONE,1);

            normsq=square_norm(solver_field[0],N,parallel);
            
            f(solver_field[1],solver_field[0]);

            tempz = scalar_prod(solver_field[0],solver_field[1],N,parallel);

            evals[i] = creal(tempz)/normsq;

            mul_r(solver_field[2],evals[i],solver_field[0],N);

            diff(solver_field[3],solver_field[1],solver_field[2], N);
	    
	    tmpd2= square_norm(solver_field[3],N,parallel);

            tmpd= sqrt(tmpd2/normsq);
	    
	    if(g_proc_id == g_stdio_proc)
	    {fprintf(stdout,"RR Eval[%d]: %22.15E rnorm: %22.15E\n", i+1, evals[i], tmpd); fflush(stdout);}
	
      } 
       
    }/*if(plvl >= 2)*/
  } /* if(nev>0) */

  /*--------------------------------------*/
  /*free memory that is no longer needed  */
  /* and reset ncurRHS and ncurEvals      */
  /*--------------------------------------*/

  if(ncurRHS == nrhs) /*this was the last system to be solved */
  {
     ncurRHS=0;
     ncurEvals=0;
     finalize_solver(solver_field,nrsf);
  }

  if( (ncurRHS == nrhs) && (nev >0) )/*this was the last system to be solved and there were allocated memory for eigenvector computation*/
  {
     finalize_solver(evecs,ldh);
     #if (defined SSE || defined SSE2 || defined SSE3)
     free(_v);
     free(_h);
     free(_hu);
     free(_ework);
     free(_initwork);
     free(_IPIV);
     free(_evals);
     free(_rwork);
     free(_work);
     #else
     free(V);
     free(H);
     free(HU);
     free(ework);
     free(initwork);
     free(IPIV);
     free(evals);
     free(rwork);
     free(work);
     #endif
  }

  return numIts;
}
예제 #15
0
void DirectLightingIntegrator::add_emitting_triangle_sample_contribution(
    const LightSample&          sample,
    const MISHeuristic          mis_heuristic,
    const Dual3d&               outgoing,
    Spectrum&                   radiance,
    SpectrumStack&              aovs) const
{
    const Material* material = sample.m_triangle->m_material;
    const Material::RenderData& material_data = material->get_render_data();
    const EDF* edf = material_data.m_edf;

    // No contribution if we are computing indirect lighting but this light does not cast indirect light.
    if (m_indirect && !(edf->get_flags() & EDF::CastIndirectLight))
        return;

    // Compute the incoming direction in world space.
    Vector3d incoming = sample.m_point - m_point;

    // Cull light samples behind the shading surface if the BSDF is either reflective or transmissive,
    // but not both.
    if (m_bsdf.get_type() != BSDF::AllBSDFTypes)
    {
        double cos_in = dot(incoming, m_shading_basis.get_normal());
        if (m_bsdf.get_type() == BSDF::Transmissive)
            cos_in = -cos_in;
        if (cos_in <= 0.0)
            return;
    }

    // No contribution if the shading point is behind the light.
    double cos_on = dot(-incoming, sample.m_shading_normal);
    if (cos_on <= 0.0)
        return;

    // Compute the transmission factor between the light sample and the shading point.
    const double transmission =
        m_shading_context.get_tracer().trace_between(
            m_shading_point,
            sample.m_point,
            VisibilityFlags::ShadowRay);

    // Discard occluded samples.
    if (transmission == 0.0)
        return;

    // Compute the square distance between the light sample and the shading point.
    const double square_distance = square_norm(incoming);
    const double rcp_sample_square_distance = 1.0 / square_distance;
    const double rcp_sample_distance = sqrt(rcp_sample_square_distance);

    // Don't use this sample if we're closer than the light near start value.
    if (square_distance < square(edf->get_light_near_start()))
        return;

    // Normalize the incoming direction.
    incoming *= rcp_sample_distance;
    cos_on *= rcp_sample_distance;

    // Evaluate the BSDF.
    Spectrum bsdf_value;
    const double bsdf_prob =
        m_bsdf.evaluate(
            m_bsdf_data,
            false,              // not adjoint
            true,               // multiply by |cos(incoming, normal)|
            m_geometric_normal,
            m_shading_basis,
            outgoing.get_value(),
            incoming,
            m_light_sampling_modes,
            bsdf_value);
    if (bsdf_prob == 0.0)
        return;

    // Build a shading point on the light source.
    ShadingPoint light_shading_point;
    sample.make_shading_point(
        light_shading_point,
        sample.m_shading_normal,
        m_shading_context.get_intersector());

#ifdef APPLESEED_WITH_OSL
    if (material_data.m_shader_group)
    {
        m_shading_context.execute_osl_emission(
            *material_data.m_shader_group,
            light_shading_point);
    }
#endif

    // Evaluate the EDF inputs.
    InputEvaluator edf_input_evaluator(m_shading_context.get_texture_cache());
    edf->evaluate_inputs(edf_input_evaluator, light_shading_point);

    // Evaluate the EDF.
    Spectrum edf_value;
    edf->evaluate(
        edf_input_evaluator.data(),
        sample.m_geometric_normal,
        Basis3d(sample.m_shading_normal),
        -incoming,
        edf_value);

    const double g = cos_on * rcp_sample_square_distance;
    double weight = transmission * g / sample.m_probability;

    // Apply MIS weighting.
    weight *=
        mis(
            mis_heuristic,
            m_light_sample_count * sample.m_probability,
            m_bsdf_sample_count * bsdf_prob * g);

    // Add the contribution of this sample to the illumination.
    edf_value *= static_cast<float>(weight);
    edf_value *= bsdf_value;
    radiance += edf_value;
    aovs.add(edf->get_render_layer_index(), edf_value);
}
예제 #16
0
bool DirectLightingIntegrator::compute_incoming_radiance(
    SamplingContext&            sampling_context,
    Vector3d&                   incoming,
    double&                     incoming_prob,
    Spectrum&                   radiance) const
{
    if (!m_light_sampler.has_lights_or_emitting_triangles())
        return false;

    sampling_context.split_in_place(3, 1);
    const Vector3d s = sampling_context.next_vector2<3>();

    LightSample sample;
    m_light_sampler.sample(m_time, s, sample);

    if (sample.m_triangle)
    {
        const Material* material = sample.m_triangle->m_material;
        const Material::RenderData& material_data = material->get_render_data();
        const EDF* edf = material_data.m_edf;

        // No contribution if we are computing indirect lighting but this light does not cast indirect light.
        if (m_indirect && !(edf->get_flags() & EDF::CastIndirectLight))
            return false;

        // Compute the incoming direction in world space.
        incoming = sample.m_point - m_point;

        // No contribution if the shading point is behind the light.
        double cos_on_light = dot(-incoming, sample.m_shading_normal);
        if (cos_on_light <= 0.0)
            return false;

        // Compute the transmission factor between the light sample and the shading point.
        const double transmission =
            m_shading_context.get_tracer().trace_between(
                m_shading_point,
                sample.m_point,
                VisibilityFlags::ShadowRay);

        // Discard occluded samples.
        if (transmission == 0.0)
            return false;

        // Don't use this sample if we're closer than the light near start value.
        const double square_distance = square_norm(incoming);
        if (square_distance < square(edf->get_light_near_start()))
            return false;

        // Normalize the incoming direction.
        const double rcp_square_distance = 1.0 / square_distance;
        const double rcp_distance = sqrt(rcp_square_distance);
        incoming *= rcp_distance;
        cos_on_light *= rcp_distance;

        // Build a shading point on the light source.
        ShadingPoint light_shading_point;
        sample.make_shading_point(
            light_shading_point,
            sample.m_shading_normal,
            m_shading_context.get_intersector());

#ifdef APPLESEED_WITH_OSL
        if (material_data.m_shader_group)
        {
            m_shading_context.execute_osl_emission(
                *material_data.m_shader_group,
                light_shading_point);
        }
#endif

        // Evaluate the EDF inputs.
        InputEvaluator edf_input_evaluator(m_shading_context.get_texture_cache());
        edf->evaluate_inputs(edf_input_evaluator, light_shading_point);

        // Evaluate the EDF.
        edf->evaluate(
            edf_input_evaluator.data(),
            sample.m_geometric_normal,
            Basis3d(sample.m_shading_normal),
            -incoming,
            radiance);

        // Compute probability with respect to solid angle of incoming direction.
        const double g = cos_on_light * rcp_square_distance;
        incoming_prob = sample.m_probability / g;

        // Compute and return the incoming radiance.
        radiance *= static_cast<float>(transmission * g / sample.m_probability);
    }
    else
    {
        const Light* light = sample.m_light;

        // No contribution if we are computing indirect lighting but this light does not cast indirect light.
        if (m_indirect && !(light->get_flags() & Light::CastIndirectLight))
            return false;

        // Evaluate the light.
        InputEvaluator input_evaluator(m_shading_context.get_texture_cache());
        Vector3d emission_position, emission_direction;
        light->evaluate(
            input_evaluator,
            sample.m_light_transform,
            m_point,
            emission_position,
            emission_direction,
            radiance);

        // Compute the transmission factor between the light sample and the shading point.
        const double transmission =
            m_shading_context.get_tracer().trace_between(
                m_shading_point,
                emission_position,
                VisibilityFlags::ShadowRay);

        // Discard occluded samples.
        if (transmission == 0.0)
            return false;

        // Compute the incoming direction in world space.
        incoming = -emission_direction;
        incoming_prob = BSDF::DiracDelta;

        // Compute and return the incoming radiance.
        const double attenuation = light->compute_distance_attenuation(m_point, emission_position);
        radiance *= static_cast<float>(transmission * attenuation / sample.m_probability);
    }

    return true;
}
예제 #17
0
int arpack_cg(
  /* solver params */
  const int N,                   /* (IN) Number of lattice sites for this process*/
  solver_params_t solver_params, /* (IN) parameters for solver */
  spinor * const x,              /* (IN/OUT) initial guess on input, solution on output for this RHS*/
  spinor * const b,              /* (IN) right-hand side*/
  matrix_mult f,                 /* (IN) f(s,r) computes s=A*r, i.e. matrix-vector multiply in double precision */
  matrix_mult f32,               /* (IN) f(s,r) computes s=A*r, i.e. matrix-vector multiply in single precision */
  const double eps_sq,           /* (IN) squared tolerance of convergence of the linear system for systems nrhs1+1 till nrhs*/
  const int rel_prec,            /* (IN) 0 for using absoute error for convergence
                                         1 for using relative error for convergence*/
  const int maxit,               /* (IN) Maximum allowed number of iterations to solution for the linear system*/
  matrix_mult f_final,           /* (IN) final operator application during projection of type 1 */
  matrix_mult f_initial          /* (IN) initial operator application during projection of type 1 */
) {

  /* Static variables and arrays. */
  static int ncurRHS=0;                  /* current number of the system being solved */                   
  static void *_ax,*_r,*_tmps1,*_tmps2;                  
  static spinor *ax,*r,*tmps1,*tmps2;                  
  static _Complex double *evecs,*evals,*H,*HU,*Hinv,*initwork,*tmpv1;
  static _Complex double *zheev_work;
  static double *hevals,*zheev_rwork;
  static int *IPIV; 
  static int info_arpack=0;
  static int nconv=0; /* number of converged eigenvectors as returned by arpack */
  int i,j,tmpsize;
  char cV='V',cN='N', cU='U';   
  int ONE=1;
  int zheev_lwork,zheev_info;
  _Complex double c1, c2, c3, tpone=1.0,tzero=0.0;
  double d1,d2,d3;
  double et1,et2;  /* timing variables */
  char evecs_filename[500];
  char howmny = 'P';
  FILE *evecs_fs=NULL;
  size_t evecs_count;
  WRITER *evecs_writer=NULL;
  spinor *evecs_ptr0 = NULL, *evecs_ptr1 = NULL;
  paramsPropagatorFormat *evecs_propagatorFormat = NULL;
  void *evecs_io_buffer = NULL;

  int parallel;        /* for parallel processing of the scalar products */
#ifdef TM_USE_MPI
    parallel=1;
#else
    parallel=0;
#endif

  /* leading dimension for spinor vectors */
  int LDN;
  if(N==VOLUME)
     LDN = VOLUMEPLUSRAND;
  else
     LDN = VOLUMEPLUSRAND/2; 

  /*(IN) Number of right-hand sides to be solved*/ 
  const int nrhs =   solver_params.arpackcg_nrhs; 
  /*(IN) First number of right-hand sides to be solved using tolerance eps_sq1*/ 
  const int nrhs1 =   solver_params.arpackcg_nrhs1;
  /*(IN) squared tolerance of convergence of the linear system for systems 1 till nrhs1*/
  const double eps_sq1 = solver_params.arpackcg_eps_sq1;
  /*(IN) suqared tolerance for restarting cg */
  const double res_eps_sq =   solver_params.arpackcg_res_eps_sq;

  /* parameters for arpack */

  /*(IN) number of eigenvectors to be computed by arpack*/
  const int nev = solver_params.arpackcg_nev;
   /*(IN) size of the subspace used by arpack with the condition (nev+1) =< ncv*/
  const int ncv = solver_params.arpackcg_ncv;
  /*(IN) tolerance for computing eigenvalues with arpack */
  double arpack_eig_tol =   solver_params.arpackcg_eig_tol;
  /*(IN) maximum number of iterations to be used by arpack*/
  int arpack_eig_maxiter =   solver_params.arpackcg_eig_maxiter;
  /*(IN) 0 for eigenvalues with smallest real part "SR"
         1 for eigenvalues with largest real part "LR"
         2 for eigenvalues with smallest absolute value "SM"
         3 for eigenvalues with largest absolute value "LM"
         4 for eigenvalues with smallest imaginary part "SI"
         5 for eigenvalues with largest imaginary part  "LI"*/
  int kind =   solver_params.arpackcg_evals_kind;
  /*(IN) 0 don't compute the eiegnvalues and their residuals of the original system 
         1 compute the eigenvalues and the residuals for the original system (the orthonormal basis
           still be used in deflation and they are not overwritten).*/
  int comp_evecs =   solver_params.arpackcg_comp_evecs;
  /*(IN) 0 no polynomial acceleration; 1 use polynomial acceleration*/
  int acc =   solver_params.use_acc;
  /*(IN) degree of the Chebyshev polynomial (irrelevant if acc=0)*/
  int cheb_k = solver_params.cheb_k;
  /*(IN) lower end of the interval where the acceleration will be used (irrelevant if acc=0)*/
  double emin = solver_params.op_evmin;
  /*(IN) upper end of the interval where the acceleration will be used (irrelevant if acc=0)*/
  double emax = solver_params.op_evmax;
  /*(IN) file name to be used for printing out debugging information from arpack*/
  char *arpack_logfile = solver_params.arpack_logfile;
  /*(IN) read eigenvectors in Schur basis from file */
  int  arpack_read_ev = solver_params.arpackcg_read_ev;
  /*(IN) write eigenvectors in Schur basis to file */
  int  arpack_write_ev = solver_params.arpackcg_write_ev;
  /*(IN) file name to be used for reading and writing evecs from and to disc */
  char *arpack_evecs_filename = solver_params.arpack_evecs_filename;
   /*(IN) precision used for writing eigenvectors */
  int arpack_evecs_writeprec = solver_params.arpack_evecs_writeprec;
  /* how to project with approximate eigenvectors */
  int projection_type = solver_params.projection_type;
  /* file format for evecs used by arpack */
  char *arpack_evecs_fileformat = solver_params.arpack_evecs_fileformat; 

  /*-------------------------------------------------------------
    if this is the first right hand side, allocate memory, 
    call arpack, and compute resiudals of eigenvectors if needed
    -------------------------------------------------------------*/ 
  if(ncurRHS==0){ 
#if (defined SSE || defined SSE2 || defined SSE3)
    _ax = malloc((LDN+ALIGN_BASE)*sizeof(spinor));
    if(_ax==NULL)
    {
       if(g_proc_id == g_stdio_proc)
          fprintf(stderr,"[arpack_cg] insufficient memory for _ax inside arpack_cg.\n");
       exit(1);
    }
    else
       {ax  = (spinor *) ( ((unsigned long int)(_ax)+ALIGN_BASE)&~ALIGN_BASE);}

    _r = malloc((LDN+ALIGN_BASE)*sizeof(spinor));
    if(_r==NULL)
    {
       if(g_proc_id == g_stdio_proc)
          fprintf(stderr,"[arpack_cg] insufficient memory for _r inside arpack_cg.\n");
       exit(1);
    }
    else
       {r  = (spinor *) ( ((unsigned long int)(_r)+ALIGN_BASE)&~ALIGN_BASE);}

    _tmps1 = malloc((LDN+ALIGN_BASE)*sizeof(spinor));
    if(_tmps1==NULL)
    {
       if(g_proc_id == g_stdio_proc)
          fprintf(stderr,"[arpack_cg] insufficient memory for _tmps1 inside arpack_cg.\n");
       exit(1);
    }
    else
       {tmps1  = (spinor *) ( ((unsigned long int)(_tmps1)+ALIGN_BASE)&~ALIGN_BASE);}

    _tmps2 = malloc((LDN+ALIGN_BASE)*sizeof(spinor));
    if(_tmps2==NULL)
    {
       if(g_proc_id == g_stdio_proc)
          fprintf(stderr,"[arpack_cg] insufficient memory for _tmps2 inside arpack_cg.\n");
       exit(1);
    }
    else
       {tmps2  = (spinor *) ( ((unsigned long int)(_tmps2)+ALIGN_BASE)&~ALIGN_BASE);}

#else
    ax = (spinor *) malloc(LDN*sizeof(spinor));
    r  = (spinor *) malloc(LDN*sizeof(spinor));
    tmps1 = (spinor *) malloc(LDN*sizeof(spinor));
    tmps2 = (spinor *) malloc(LDN*sizeof(spinor));
    
    if( (ax == NULL)  || (r==NULL) || (tmps1==NULL) || (tmps2==NULL) )
    {
       if(g_proc_id == g_stdio_proc)
          fprintf(stderr,"[arpack_cg] insufficient memory for ax,r,tmps1,tmps2 inside arpack_cg.\n");
       exit(1);
    }
#endif


    evecs = (_Complex double *) malloc(ncv*12*N*sizeof(_Complex double)); /* note: no extra buffer  */
    evals = (_Complex double *) malloc(ncv*sizeof(_Complex double)); 
    tmpv1 = (_Complex double *) malloc(12*N*sizeof(_Complex double));

    if((evecs == NULL)  || (evals==NULL) || (tmpv1==NULL))
    {
       if(g_proc_id == g_stdio_proc)
          fprintf(stderr,"[arpack_cg] insufficient memory for evecs and evals inside arpack_cg.\n");
       exit(1);
    }

    if ( arpack_read_ev == 1) {

      if (strcmp(arpack_evecs_fileformat, "partfile") == 0) {
        /* set evec filenmae */
        sprintf(evecs_filename, "%s.%.5d.pt%.2dpx%.2dpy%.2dpz%.2d", arpack_evecs_filename, nev, g_proc_coords[0], g_proc_coords[1], g_proc_coords[2], g_proc_coords[3]);
        evecs_fs = fopen(evecs_filename, "r");
        if (evecs_fs == NULL) {
          fprintf(stderr, "[arpack_cg] (%.4d) Error, could not open file %s for reading\n", g_cart_id, evecs_filename);
          return(-2);
        }
        fprintf(stdout, "# [arpack_cg] reading eigenvectors from file %s\n", evecs_filename);

        if(arpack_evecs_writeprec == 64) {
 
          evecs_io_buffer = (void*)evecs;
   
          et1=gettime();
          evecs_count = fread( evecs_io_buffer, sizeof(_Complex double), (size_t)nev*12*N, evecs_fs);
          et2=gettime();
        
        } else {
          evecs_io_buffer = malloc(sizeof(_Complex double) * (size_t)nev*12*N );
          if( evecs_io_buffer == NULL) {
            fprintf(stderr, "[arpack_cg] (%.4d) Error, could not allocate memory for evecs_io_buffer\n", g_cart_id);
            return(-42);
          }
  
          et1=gettime();
          evecs_count = fread( evecs_io_buffer, sizeof(_Complex double)/2, (size_t)nev*12*N, evecs_fs);
          et2=gettime();

          single2double(evecs, evecs_io_buffer, nev*24*N);

          free( evecs_io_buffer );
          evecs_io_buffer = NULL;
        }
       
        if( evecs_count != ((size_t)nev*12*N) ) {
          fprintf(stderr, "[arpack_cg] (%.4d) Error, could not proper amount of data from file %s\n", g_cart_id, evecs_filename);
          return(-3);
        }
        fclose(evecs_fs);
        evecs_fs = NULL;
        if(g_proc_id == g_stdio_proc) {
          fprintf(stdout,"# [arpack_cg] ARPACK time for reading %d eigenvectors: %+e seconds\n", nev, et2-et1);
        }
      } else if(strcmp(arpack_evecs_fileformat, "single") == 0) {

        if(N==VOLUME) {
          for(i=0; i<nev; i++) {
            sprintf(evecs_filename, "%s.ev%.5d", arpack_evecs_filename, i);
            evecs_ptr0 = (spinor*)&(evecs[i*12*N]);
            evecs_ptr1 = NULL;
            read_spinor(evecs_ptr0,  evecs_ptr1, evecs_filename, 0);
          } /* end of loop on eigenvectors */
        } else if(N==VOLUME/2) {
          for(i=0; i<nev/2; i++) {
            sprintf(evecs_filename, "%s.ev%.5d", arpack_evecs_filename, 2*i);
            evecs_ptr0 = (spinor*)&(evecs[(2*i  )*12*N]);
            evecs_ptr1 = (spinor*)&(evecs[(2*i+1)*12*N]);
            read_spinor(evecs_ptr0,  evecs_ptr1, evecs_filename, 0);
          } /* end of loop on eigenvectors */
        }
      }   /* of if arpack_evecs_fileformat */

      /* set info_arpack pro forma to SUCCESS */
      nconv = nev;
      info_arpack = 0;
    } else {
      et1=gettime();
      evals_arpack(N,nev,ncv,kind,howmny,acc,cheb_k,emin,emax,evals,evecs,arpack_eig_tol,arpack_eig_maxiter,f,&info_arpack,&nconv,arpack_logfile);
      et2=gettime();

      if(info_arpack != 0){ /* arpack didn't converge */
      if(g_proc_id == g_stdio_proc)
        fprintf(stderr,"[arpack_cg] WARNING: ARPACK didn't converge. exiting..\n");
        return -1;
      }
    
      if(g_proc_id == g_stdio_proc)
      {
         fprintf(stdout,"# [arpack_cg] ARPACK has computed %d eigenvectors\n",nconv);
         fprintf(stdout,"# [arpack_cg] ARPACK time: %+e\n",et2-et1);
      }

      if ( arpack_write_ev == 1) {

        if(strcmp(arpack_evecs_fileformat, "partfile") == 0 ) {

          if( g_cart_id == 0 ) fprintf(stdout, "# [arpack_cg] writing evecs in partfile format\n");
          /* set evec filenmae */
          sprintf(evecs_filename, "%s.%.5d.pt%.2dpx%.2dpy%.2dpz%.2d", arpack_evecs_filename, nconv, g_proc_coords[0], g_proc_coords[1], g_proc_coords[2], g_proc_coords[3]);

          evecs_fs = fopen(evecs_filename, "w");
          if (evecs_fs == NULL) {
            fprintf(stderr, "[arpack_cg] (%.4d) Error, could not open file %s for writing\n", g_cart_id, evecs_filename);
            return(-4);
          }
        
          if(arpack_evecs_writeprec == 64) {

            evecs_io_buffer = (void*)evecs;
 
            et1=gettime();
            evecs_count = fwrite( evecs_io_buffer, sizeof(_Complex double), (size_t)nconv*12*N, evecs_fs);
            et2=gettime();

          } else {
            evecs_io_buffer = malloc(sizeof(_Complex double) * (size_t)nconv*12*N );
            if( evecs_io_buffer == NULL) {
              fprintf(stderr, "[arpack_cg] (%.4d) Error, could not allocate memory for evecs_io_buffer\n", g_cart_id);
              return(-41);
            }
            double2single(evecs_io_buffer, evecs, nconv*24*N);
 
            et1=gettime();
            evecs_count = fwrite( evecs_io_buffer, sizeof(_Complex double)/2, (size_t)nconv*12*N, evecs_fs);
            et2=gettime();
            free(evecs_io_buffer);
            evecs_io_buffer = NULL;
          }
 
          if( evecs_count != ((size_t)nconv*12*N) ) {
            fprintf(stderr, "[arpack_cg] (%.4d) Error, could not write proper amount of data to file %s\n", g_cart_id, evecs_filename);
            return(-5);
          }
          fclose(evecs_fs);
          evecs_fs = NULL;

          if(g_proc_id == g_stdio_proc) {
            fprintf(stdout,"[arpack_cg] (%.4d) ARPACK time for writing %d eigenvectors: %+e seconds\n", g_cart_id, nconv, et2-et1);
          }

        } else if (strcmp(arpack_evecs_fileformat, "single") == 0) {

          if(N==VOLUME) {
            for(i=0; i<nconv; i++) {
              sprintf(evecs_filename, "%s.ev%.5d", arpack_evecs_filename, i);
              construct_writer(&evecs_writer, evecs_filename, 0);
              evecs_propagatorFormat = construct_paramsPropagatorFormat(arpack_evecs_writeprec, 1);
              write_propagator_format(evecs_writer, evecs_propagatorFormat);
              free(evecs_propagatorFormat);
              evecs_ptr0 = (spinor*)&(evecs[i*12*N]);
              evecs_ptr1 = NULL;
              write_spinor(evecs_writer, &evecs_ptr0, &evecs_ptr1, 1, arpack_evecs_writeprec);
              destruct_writer(evecs_writer);
              evecs_writer=NULL;
            } /* end of loop on converged eigenvectors */
          } else if(N==VOLUME/2) {
            for(i=0; i<nconv/2; i++) {
              sprintf(evecs_filename, "%s.ev%.5d", arpack_evecs_filename, 2*i);
              construct_writer(&evecs_writer, evecs_filename, 0);
              evecs_propagatorFormat = construct_paramsPropagatorFormat(arpack_evecs_writeprec, 1);
              write_propagator_format(evecs_writer, evecs_propagatorFormat);
              free(evecs_propagatorFormat);
              evecs_ptr0 = (spinor*)&(evecs[(2*i  )*12*N]);
              evecs_ptr1 = (spinor*)&(evecs[(2*i+1)*12*N]);
              write_spinor(evecs_writer, &evecs_ptr0, &evecs_ptr1,1, arpack_evecs_writeprec);
              destruct_writer(evecs_writer);
              evecs_writer=NULL;
            }  /* end of loop on converged eigenvectors */
          }    /* end of if N == VOLUME */

        }      /* of if arpack_evecs_fileformat */

      }        /* end of if arpack_write_ev == 1 */

    }          /* end of if arpack_read_ev == 1 */

    H        = (_Complex double *) malloc(nconv*nconv*sizeof(_Complex double)); 
    Hinv     = (_Complex double *) malloc(nconv*nconv*sizeof(_Complex double)); 
    initwork = (_Complex double *) malloc(nconv*sizeof(_Complex double)); 
    IPIV     = (int *) malloc(nconv*sizeof(int));
    zheev_lwork = 3*nconv;
    zheev_work  = (_Complex double *) malloc(zheev_lwork*sizeof(_Complex double));
    zheev_rwork = (double *) malloc(3*nconv*sizeof(double));
    hevals      = (double *) malloc(nconv*sizeof(double));

    if((H==NULL) || (Hinv==NULL) || (initwork==NULL) || (IPIV==NULL) || (zheev_lwork==NULL) || (zheev_rwork==NULL) || (hevals==NULL))
    {
       if(g_proc_id == g_stdio_proc)
          fprintf(stderr,"[arpack_cg] insufficient memory for H, Hinv, initwork, IPIV, zheev_lwork, zheev_rwork, hevals inside arpack_cg.\n");
       exit(1);
    }

    et1=gettime();
    /* compute the elements of the hermitian matrix H 
       leading dimension is nconv and active dimension is nconv */
    
    if( projection_type == 0) {
    
      for(i=0; i<nconv; i++)
      {
        assign_complex_to_spinor(r,&evecs[i*12*N],12*N);
        f(ax,r);
        c1 = scalar_prod(r,ax,N,parallel);
        H[i+nconv*i] = creal(c1);  /* diagonal should be real */
        for(j=i+1; j<nconv; j++)
        {
          assign_complex_to_spinor(r,&evecs[j*12*N],12*N);
          c1 = scalar_prod(r,ax,N,parallel);
          H[j+nconv*i] = c1;
          H[i+nconv*j] = conj(c1); /* enforce hermiticity */
        }
      }

    } else if ( projection_type == 1 )  {

      for(i=0; i<nconv; i++)
      {
        assign_complex_to_spinor(tmps1, &evecs[i*12*N], 12*N);
        f_final(r, tmps1);
        f(ax,r);
        c1 = scalar_prod(r,ax,N,parallel);
        c2 = scalar_prod(r,r,N,parallel);
        H[i+nconv*i] = creal(c1) / creal(c2);   /* diagonal should be real */
        for(j=i+1; j<nconv; j++)
        {
          assign_complex_to_spinor(tmps1, &evecs[j*12*N], 12*N);
          f_final(r, tmps1);
          c1 = scalar_prod(r,ax,N,parallel);
          c3 = scalar_prod(r, r, N, parallel);

          H[j+nconv*i] = c1 / sqrt( creal(c2) * creal(c3) );
          H[i+nconv*j] = conj(c1) / sqrt( creal(c2) * creal(c3) ); /* enforce hermiticity */
        }
      }
    }


    et2=gettime();
    if(g_proc_id == g_stdio_proc) {
      fprintf(stdout,"[arpack_cg] time to compute H: %+e\n",et2-et1);
    }

/*
    if(g_cart_id == 0) {
      for(i=0; i<nconv; i++) {
      for(j=0; j<nconv; j++) {
        fprintf(stdout, "# [arpack_cg] H[%d, %d] = %25.16e %25.16e\n", i, j, creal(H[i*nconv+j]), cimag(H[i*nconv+j]));
      }}
    }
*/



     et1=gettime();
     /* compute Ritz values and Ritz vectors if needed */
     if( (nconv>0) && (comp_evecs !=0))
     {
         HU = (_Complex double *) malloc(nconv*nconv*sizeof(_Complex double)); 
         if( HU==NULL ) {
           if(g_proc_id == g_stdio_proc)
             fprintf(stderr,"[arpack_cg] insufficient memory for HU inside arpack_cg\n");
             exit(2);
         }
         /* copy H into HU */
         tmpsize=nconv*nconv;
         _FT(zcopy)(&tmpsize,H,&ONE,HU,&ONE);

         /* compute eigenvalues and eigenvectors of HU*/
         /* SUBROUTINE ZHEEV( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, RWORK,INFO ) */
         _FT(zheev)(&cV,&cU,&nconv,HU,&nconv,hevals,zheev_work,&zheev_lwork,zheev_rwork,&zheev_info,1,1);

         if(zheev_info != 0)
         {
	    if(g_proc_id == g_stdio_proc) 
	    {
	        fprintf(stderr,"[arpack_cg] Error in ZHEEV:, info =  %d\n",zheev_info); 
                fflush(stderr);
	    }
	    exit(1);
         }

         /* If you want to replace the schur (orthonormal) basis by eigen basis
            use something like this. It is better to use the schur basis because
            they are better conditioned. Use this part only to get the eigenvalues
            and their resduals for the operator (D^\daggerD)
            esize=(ncv-nconv)*12*N;
            Zrestart_X(evecs,12*N,HU,12*N,nconv,nconv,&evecs[nconv*N],esize); */

         /* compute residuals and print out results */

	 if(g_proc_id == g_stdio_proc)
	 {fprintf(stdout,"# [arpack_cg] Ritz values of A and their residulas (||A*x-lambda*x||/||x||\n"); 
          fprintf(stdout,"# [arpack_cg] =============================================================\n");
          fflush(stdout);}

         for(i=0; i<nconv; i++)
         {
	    tmpsize=12*N;
            _FT(zgemv)(&cN,&tmpsize,&nconv,&tpone,evecs,&tmpsize,
		       &HU[i*nconv],&ONE,&tzero,tmpv1,&ONE,1);

            assign_complex_to_spinor(r,tmpv1,12*N);

            d1=square_norm(r,N,parallel);
            
            f(ax,r);

            mul_r(tmps1,hevals[i],r,N);

            diff(tmps2,ax,tmps1,N);
	    
	    d2= square_norm(tmps2,N,parallel);

            d3= sqrt(d2/d1);
	    
	    if(g_proc_id == g_stdio_proc)
	    {fprintf(stdout,"Eval[%06d]: %22.15E rnorm: %22.15E\n", i, hevals[i], d3); fflush(stdout);}
        } 
        free( HU ); HU = NULL;
     }  /* if( (nconv_arpack>0) && (comp_evecs !=0)) */
     et2=gettime();
     if(g_proc_id == g_stdio_proc) {
       fprintf(stdout,"[arpack_cg] time to compute eigenvectors: %+e\n",et2-et1);
     }

  }  /* if(ncurRHS==0) */
    
  double eps_sq_used,restart_eps_sq_used;  /* tolerance squared for the linear system */

  double cur_res; /* current residual squared */

  /*increment the RHS counter*/
  ncurRHS = ncurRHS +1; 

  /* set the tolerance to be used for this right-hand side  */
  if(ncurRHS > nrhs1){
    eps_sq_used = eps_sq;
  }
  else{
    eps_sq_used = eps_sq1;
  }
  
  if(g_proc_id == g_stdio_proc && g_debug_level > 0) {
    fprintf(stdout, "# [arpack_cg] System %d, eps_sq %e, projection type %d\n",ncurRHS,eps_sq_used, projection_type); 
    fflush(stdout);
  } 
  
  /*---------------------------------------------------------------*/
  /* Call init-CG until this right-hand side converges             */
  /*---------------------------------------------------------------*/
  double wt1,wt2,wE,wI;
  double normsq,tol_sq;
  int flag,maxit_remain,numIts,its;
  int info_lapack;

  wE = 0.0; wI = 0.0;     /* Start accumulator timers */
  flag = -1;    	  /* System has not converged yet */
  maxit_remain = maxit;   /* Initialize Max and current # of iters   */
  numIts = 0;  
  restart_eps_sq_used=res_eps_sq;

  while( flag == -1 )
  {
    
    if(nconv > 0)
    {


      /* --------------------------------------------------------- */
      /* Perform init-CG with evecs vectors                        */
      /* xinit = xinit + evecs*Hinv*evec'*(b-Ax0) 		   */
      /* --------------------------------------------------------- */
      wt1 = gettime();

      /*r0=b-Ax0*/
      f(ax,x); /*ax = A*x */
      diff(r,b,ax,N);  /* r=b-A*x */

      if( projection_type == 0) {

        /* x = x + evecs*inv(H)*evecs'*r */
        for(int i=0; i < nconv; i++)
        {
           assign_complex_to_spinor(tmps1,&evecs[i*12*N],12*N);
           initwork[i]= scalar_prod(tmps1,r,N,parallel);
        }

        /* solve the linear system H y = c */
        tmpsize=nconv*nconv;
        _FT(zcopy) (&tmpsize,H,&ONE,Hinv,&ONE); /* copy H into Hinv */
        /* SUBROUTINE ZGESV( N, NRHS, A, LDA, IPIV, B, LDB, INFO ) */
        _FT(zgesv) (&nconv,&ONE,Hinv,&nconv,IPIV,initwork,&nconv,&info_lapack);

        if(info_lapack != 0)
        {
           if(g_proc_id == g_stdio_proc) {
              fprintf(stderr, "[arpack_cg] Error in ZGESV:, info =  %d\n",info_lapack); 
              fflush(stderr);
           }
           exit(1);
        }

        /* x = x + evecs*inv(H)*evecs'*r */
        for(i=0; i<nconv; i++)
        {
          assign_complex_to_spinor(tmps1,&evecs[i*12*N],12*N);
          assign_add_mul(x,tmps1,initwork[i],N);
        }

      } else if ( projection_type == 1 ) {
        /* x = x + evecs*inv(H)*evecs'*r */

        /* tmps2 = Q^+ r */
        f_initial(tmps2, r);

        for(int i=0; i < nconv; i++) {
          /* tmps1 = v_i */
          assign_complex_to_spinor(tmps1,&evecs[i*12*N],12*N);

          /* initwork_i = v_i^+ Q^+ r / lambda_i^2 */
          initwork[i]= scalar_prod(tmps1, tmps2, N, parallel) / ( H[i*nconv+i] * H[i*nconv+i] );
        }

        memset(tmps2, 0, N*sizeof(spinor) );
        for(i=0; i<nconv; i++) {
          assign_complex_to_spinor(tmps1, &evecs[i*12*N], 12*N);
          assign_add_mul(tmps2, tmps1, initwork[i], N);
        }

        /* apply final operator */
        f_final(tmps1, tmps2);
        assign_add_mul(x, tmps1, 1., N);

      }  /* end of if projection type */

      /* compute elapsed time and add to accumulator */

      wt2 = gettime();
      wI = wI + wt2-wt1;
      
    }/* if(nconv > 0) */


    /* which tolerance to use */
    if(eps_sq_used > restart_eps_sq_used)
    {
       tol_sq = eps_sq_used;
       flag   = 1; /* shouldn't restart again */
    }
    else
    {
       tol_sq = restart_eps_sq_used;
    }

    wt1 = gettime();
    its = cg_her(x,b,maxit_remain,tol_sq,rel_prec,N,f); 
          
    wt2 = gettime();

    wE = wE + wt2-wt1;

    /* check convergence */
    if(its == -1)
    {
       /* cg didn't converge */
       if(g_proc_id == g_stdio_proc) {
         fprintf(stderr, "[arpack_cg] CG didn't converge within the maximum number of iterations in arpack_cg. Exiting...\n"); 
         fflush(stderr);
         exit(1);
         
       }
    } 
    else
    {
       numIts += its;   
       maxit_remain = maxit - numIts; /* remaining number of iterations */
       restart_eps_sq_used = restart_eps_sq_used*res_eps_sq; /* prepare for the next restart */
    }
    
  }
  /* end while (flag ==-1)               */
  
  /* ---------- */
  /* Reporting  */
  /* ---------- */
  /* compute the exact residual */
  f(ax,x); /* ax= A*x */
  diff(r,b,ax,N);  /* r=b-A*x */	
  normsq=square_norm(r,N,parallel);
  if(g_debug_level > 0 && g_proc_id == g_stdio_proc)
  {
    fprintf(stdout, "# [arpack_cg] For this rhs:\n");
    fprintf(stdout, "# [arpack_cg] Total initCG Wallclock : %+e\n", wI);
    fprintf(stdout, "# [arpack_cg] Total cg Wallclock : %+e\n", wE);
    fprintf(stdout, "# [arpack_cg] Iterations: %-d\n", numIts); 
    fprintf(stdout, "# [arpack_cg] Actual Resid of LinSys  : %+e\n",normsq);
  }


  /* free memory if this was your last system to solve */
  if(ncurRHS == nrhs){
#if ( (defined SSE) || (defined SSE2) || (defined SSE3)) 
    free(_ax);  free(_r);  free(_tmps1); free(_tmps2);
#else
    free(ax); free(r); free(tmps1); free(tmps2);
#endif
    free(evecs); free(evals); free(H); free(Hinv);
    free(initwork); free(tmpv1); free(zheev_work);
    free(hevals); free(zheev_rwork); free(IPIV);
  }


  return numIts;
}
예제 #18
0
 T quat_square_norm(const Quaternion<T>& q)
 {
     return square_norm(q);
 }
예제 #19
0
int bicgstabell(spinor * const x0, spinor * const b, const int max_iter, 
		double eps_sq, const int rel_prec, const int _l, const int N, matrix_mult f) {

  double err;
  int i, j, k, l;
  double rho0, rho1, beta, alpha, omega, gamma0 = 0., squarenorm;
  spinor * r[5], * u[5], * r0_tilde, * x;
  double tau[5][5], gamma[25], gammap[25], gammapp[25], sigma[25];
  spinor ** solver_field = NULL;
  const int nr_sf = 2*(_l+1)+2;

  l = _l;
  k = -l;

  if(N == VOLUME) {
    init_solver_field(&solver_field, VOLUMEPLUSRAND, nr_sf);
  }
  else {
    init_solver_field(&solver_field, VOLUMEPLUSRAND/2, nr_sf);
  }
  r0_tilde = solver_field[0];
  for(i = 0; i <= l; i++){
    r[i] = solver_field[2+2*i];
    u[i] = solver_field[3+2*i];
  }

  x = x0; 
  assign(u[0], b, N);
  f(r0_tilde, x);
  diff(r[0], u[0], r0_tilde, N);
  zero_spinor_field(solver_field[1], N);
  assign(r0_tilde, r[0], N);
  squarenorm = square_norm(b, N, 1);

  rho0 = 1.;
  alpha = 0.;
  omega = 1.;
  err = square_norm(r0_tilde, N, 1);
  while( k < max_iter && (((err > eps_sq) && (rel_prec == 0)) 
			  || ((err > eps_sq*squarenorm) && (rel_prec == 1)) 
			  )) {
    k+=l;

    /* The BiCG part */

    rho0 *= -omega;
    for(j = 0; j < l; j++) {
      rho1 = scalar_prod_r(r[j], r0_tilde, N, 1);
      beta = (rho1/rho0);
      beta *= alpha; 
      rho0 = rho1;
      for(i = 0; i <= j; i++) {
	/* u_i = r_i - \beta u_i */
	assign_mul_add_r(u[i], -beta, r[i], N);
      }
      f(u[j+1], u[j]);
      gamma0 = scalar_prod_r(u[j+1], r0_tilde, N, 1);
      alpha = rho0/gamma0;
      /* r_i = r_i - \alpha u_{i+1} */
      for(i = 0; i <= j; i++) {
	assign_add_mul_r(r[i], u[i+1], -alpha, N);
      }
      f(r[j+1], r[j]);
      /* x = x + \alpha u_0 */
      assign_add_mul_r(x, u[0], alpha, N);
      err = square_norm(r[j+1], N, 1);
      if(g_proc_id == 0 && g_debug_level > 1) {printf("%d %d err = %e\n", k, j, err);fflush(stdout);}
    }

    /* The MR part */

    for(j = 1; j <= l; j++){
      for(i = 1; i < j; i++){
	tau[i][j] = scalar_prod_r(r[j], r[i], N, 1)/sigma[i];
	assign_add_mul_r(r[j], r[i], -tau[i][j], N);
      }
      sigma[j] = scalar_prod_r(r[j], r[j], N, 1);
      gammap[j] = scalar_prod_r(r[0], r[j], N, 1)/sigma[j];
    }
    gamma[l] = gammap[l];
    omega = gamma[l];
    for(j = l-1; j > 0; j--) {
      gamma[j] = gammap[j];
      for(i = j+1; i <= l; i++) {
	gamma[j] -= (tau[j][i]*gamma[i]);
      }
    }
    for(j = 1; j < l; j++) {
      gammapp[j] = gamma[j+1];
      for(i = j+1; i < l; i++){
	gammapp[j] += (tau[j][i]*gamma[i+1]);
      }
    }
    assign_add_mul_r(x, r[0], gamma[1], N);
    assign_add_mul_r(r[0], r[l], -gammap[l], N);
    for(j = 1; j < l; j++){
      assign_add_mul_r(x, r[j], gammapp[j], N);
      assign_add_mul_r(r[0], r[j], -gammap[j], N);
    }
    assign_add_mul_r(u[0], u[l], -gamma[l], N);
    for(j = 1; j < l; j++){
      assign_add_mul_r(u[0], u[j], -gamma[j], N);
    }
    err = square_norm(r[0], N, 1);
    if(g_proc_id == 0 && g_debug_level > 0){
      printf(" BiCGstabell iterated %d %d, %e rho0 = %e, alpha = %e, gamma0= %e\n", l, k, err, rho0, alpha, gamma0);
      fflush( stdout );
    }
  }
  finalize_solver(solver_field, nr_sf);
  if(k == max_iter) return(-1);
  return(k);
}
예제 #20
0
/* P output = solution , Q input = source */
int cg_mms_tm(spinor ** const P, spinor * const Q,
		 solver_params_t * solver_params, double * cgmms_reached_prec) {

  static double normsq, pro, err, squarenorm;
  int iteration, N = solver_params->sdim, no_shifts = solver_params->no_shifts;
  static double gamma, alpham1;
  spinor ** solver_field = NULL;
  double atime, etime;
  const int nr_sf = 3;

  atime = gettime();
  if(solver_params->sdim == VOLUME) {
    init_solver_field(&solver_field, VOLUMEPLUSRAND, nr_sf);
    init_mms_tm(no_shifts, VOLUMEPLUSRAND);
  } 
  else {
    init_solver_field(&solver_field, VOLUMEPLUSRAND/2, nr_sf); 
    init_mms_tm(no_shifts, VOLUMEPLUSRAND/2);
  } 

  zero_spinor_field(P[0], N);
  alphas[0] = 1.0;
  betas[0] = 0.0;
  sigma[0] = solver_params->shifts[0]*solver_params->shifts[0];
  if(g_proc_id == 0 && g_debug_level > 1) printf("# CGMMS: shift %d is %e\n", 0, sigma[0]);

  for(int im = 1; im < no_shifts; im++) {
    sigma[im] = solver_params->shifts[im]*solver_params->shifts[im] - sigma[0];
    if(g_proc_id == 0 && g_debug_level > 1) printf("# CGMMS: shift %d is %e\n", im, sigma[im]);
    // these will be the result spinor fields
    zero_spinor_field(P[im], N);
    // these are intermediate fields
    assign(ps_mms_solver[im-1], Q, N);
    zitam1[im] = 1.0;
    zita[im] = 1.0;
    alphas[im] = 1.0;
    betas[im] = 0.0;
  }

  /* currently only implemented for P=0 */
  squarenorm = square_norm(Q, N, 1);
  /* if a starting solution vector equal to zero is chosen */
  assign(solver_field[0], Q, N);
  assign(solver_field[1], Q, N);
  normsq = squarenorm;

  /* main loop */
  for(iteration = 0; iteration < solver_params->max_iter; iteration++) {

    /*   Q^2*p and then (p,Q^2*p)  */
    solver_params->M_psi(solver_field[2], solver_field[1]);
    // add the zero's shift
    assign_add_mul_r(solver_field[2], solver_field[1], sigma[0], N);
    pro = scalar_prod_r(solver_field[1], solver_field[2], N, 1);

    /* For the update of the coeff. of the shifted pol. we need alphas[0](i-1) and alpha_cg(i).
       This is the reason why we need this double definition of alpha */
    alpham1 = alphas[0];

    /* Compute alphas[0](i+1) */
    alphas[0] = normsq/pro;
    for(int im = 1; im < no_shifts; im++) {

      /* Now gamma is a temp variable that corresponds to zita(i+1) */ 
      gamma = zita[im]*alpham1/(alphas[0]*betas[0]*(1.-zita[im]/zitam1[im]) 
				+ alpham1*(1.+sigma[im]*alphas[0]));

      // Now zita(i-1) is put equal to the old zita(i)
      zitam1[im] = zita[im];
      // Now zita(i+1) is updated 
      zita[im] = gamma;
      // Update of alphas(i) = alphas[0](i)*zita(i+1)/zita(i) 
      alphas[im] = alphas[0]*zita[im]/zitam1[im];

      // Compute xs(i+1) = xs(i) + alphas(i)*ps(i) 
      assign_add_mul_r(P[im], ps_mms_solver[im-1], alphas[im], N); 
      // in the CG the corrections are decreasing with the iteration number increasing
      // therefore, we can remove shifts when the norm of the correction vector
      // falls below a threshold
      // this is useful for computing time and needed, because otherwise
      // zita might get smaller than DOUBLE_EPS and, hence, zero
      if(iteration > 0 && (iteration % 20 == 0) && (im == no_shifts-1)) {
	double sn = square_norm(ps_mms_solver[im-1], N, 1);
	if(alphas[no_shifts-1]*alphas[no_shifts-1]*sn <= solver_params->squared_solver_prec) {
	  no_shifts--;
	  if(g_debug_level > 2 && g_proc_id == 0) {
	    printf("# CGMMS: at iteration %d removed one shift, %d remaining\n", iteration, no_shifts);
      	  }
	}
      }
    }
    
    /*  Compute x_(i+1) = x_i + alphas[0](i+1) p_i    */
    assign_add_mul_r(P[0], solver_field[1],  alphas[0], N);
    /*  Compute r_(i+1) = r_i - alphas[0](i+1) Qp_i   */
    assign_add_mul_r(solver_field[0], solver_field[2], -alphas[0], N);

    /* Check whether the precision eps_sq is reached */

    err = square_norm(solver_field[0], N, 1);

    if(g_debug_level > 2 && g_proc_id == g_stdio_proc) {
      printf("# CGMMS iteration: %d residue: %g\n", iteration, err); fflush( stdout );
    }

    if( ((err <= solver_params->squared_solver_prec) && (solver_params->rel_prec == 0)) ||
        ((err <= solver_params->squared_solver_prec*squarenorm) && (solver_params->rel_prec > 0)) ||
        (iteration == solver_params->max_iter -1) ) {
      /* FIXME temporary output of precision until a better solution can be found */
      *cgmms_reached_prec = err;
      break;
    }

    /* Compute betas[0](i+1) = (r(i+1),r(i+1))/(r(i),r(i))
       Compute p(i+1) = r(i+1) + beta(i+1)*p(i)  */
    betas[0] = err/normsq;
    assign_mul_add_r(solver_field[1], betas[0], solver_field[0], N);
    normsq = err;

    /* Compute betas(i+1) = betas[0](i+1)*(zita(i+1)*alphas(i))/(zita(i)*alphas[0](i))
       Compute ps(i+1) = zita(i+1)*r(i+1) + betas(i+1)*ps(i)  */
    for(int im = 1; im < no_shifts; im++) {
      betas[im] = betas[0]*zita[im]*alphas[im]/(zitam1[im]*alphas[0]);
      assign_mul_add_mul_r(ps_mms_solver[im-1], solver_field[0], betas[im], zita[im], N);
    }
  }
  etime = gettime();
  g_sloppy_precision = 0;
  if(iteration == solver_params->max_iter -1) iteration = -1;
  else iteration++;
  if(g_debug_level > 0 && g_proc_id == 0) {
    printf("# CGMMS (%d shifts): iter: %d eps_sq: %1.4e %1.4e t/s\n", solver_params->no_shifts, iteration, solver_params->squared_solver_prec, etime - atime); 
  }
  
  finalize_solver(solver_field, nr_sf);
  return(iteration);
}
예제 #21
0
파일: eigenvalues.c 프로젝트: palao/tmLQCD
double eigenvalues(int * nr_of_eigenvalues, const int max_iterations, 
		   const double precision, const int maxmin,
		   const int readwrite, const int nstore, 
		   const int even_odd_flag) {
  double returnvalue;
  complex norm2;
#ifdef HAVE_LAPACK
  static spinor * eigenvectors_ = NULL;
  static int allocated = 0;
  char filename[200];
  FILE * ofs;
#ifdef MPI
  double atime, etime;
#endif

  /**********************
   * For Jacobi-Davidson 
   **********************/
  int verbosity = g_debug_level, converged = 0, blocksize = 1, blockwise = 0;
  int solver_it_max = 50, j_max, j_min, ii, jj;
  /*int it_max = 10000;*/
  /* complex *eigv_ = NULL, *eigv; */
  double decay_min = 1.7, decay_max = 1.5, prec,
    threshold_min = 1.e-3, threshold_max = 5.e-2;

  /* static int v0dim = 0; */
  int v0dim = 0;
  matrix_mult f;
  int N = (VOLUME)/2, N2 = (VOLUMEPLUSRAND)/2;
  spinor * max_eigenvector_ = NULL, * max_eigenvector;

  /**********************
   * General variables
   **********************/
  int returncode=0;
  int returncode2=0;

  char eigenvector_prefix[512];
  char eigenvalue_prefix[512];


  no_eigenvalues = *nr_of_eigenvalues;

  sprintf(eigenvector_prefix,"eigenvector.%%s.%%.2d.%%.4d");
  sprintf(eigenvalue_prefix,"eigenvalues.%%s.%%.4d");

  if(!even_odd_flag) {
    N = (VOLUME);
    N2 = (VOLUMEPLUSRAND);
    f = &Q_pm_psi;
  }
  else {
    f = &Qtm_pm_psi;
  }
  evlength = N2;
  if(g_proc_id == g_stdio_proc && g_debug_level >0) {
    printf("Number of %s eigenvalues to compute = %d\n",
	   maxmin ? "maximal" : "minimal",(*nr_of_eigenvalues));
    printf("Using Jacobi-Davidson method! \n");
  }

  if((*nr_of_eigenvalues) < 8){
    j_max = 15;
    j_min = 8;
  }
  else{
    j_max = 2*(*nr_of_eigenvalues);
    j_min = (*nr_of_eigenvalues);
  }
  if(precision < 1.e-14){
    prec = 1.e-14;
  }
  else{
    prec = precision;
  }
#if (defined SSE || defined SSE2 || defined SSE3)
  max_eigenvector_ = calloc(N2+1, sizeof(spinor));
  max_eigenvector = (spinor *)(((unsigned long int)(max_eigenvector_)+ALIGN_BASE)&~ALIGN_BASE);
#else
  max_eigenvector_= calloc(N2, sizeof(spinor));
  max_eigenvector = max_eigenvector_;
#endif  

  if(allocated == 0) {
    allocated = 1;
#if (defined SSE || defined SSE2 || defined SSE3)
    eigenvectors_ = calloc(N2*(*nr_of_eigenvalues)+1, sizeof(spinor)); 
    eigenvectors = (spinor *)(((unsigned long int)(eigenvectors_)+ALIGN_BASE)&~ALIGN_BASE);
#else
    eigenvectors_= calloc(N2*(*nr_of_eigenvalues), sizeof(spinor));
    eigenvectors = eigenvectors_;
#endif
    eigenvls = (double*)malloc((*nr_of_eigenvalues)*sizeof(double));
    inv_eigenvls = (double*)malloc((*nr_of_eigenvalues)*sizeof(double));
  }

  solver_it_max = 50;
  /* compute the maximal one first */
  jdher(N*sizeof(spinor)/sizeof(complex), N2*sizeof(spinor)/sizeof(complex),
	50., 1.e-12, 
	1, 15, 8, max_iterations, 1, 0, 0, NULL,
	CG, solver_it_max,
	threshold_max, decay_max, verbosity,
	&converged, (complex*) max_eigenvector, (double*) &max_eigenvalue,
	&returncode2, JD_MAXIMAL, 1,
	f);

  if(readwrite) {
    if(even_odd_flag){
      for(v0dim = 0; v0dim < (*nr_of_eigenvalues); v0dim++) {
	sprintf(filename, eigenvector_prefix , maxmin ? "max" : "min", v0dim, nstore);
	if((read_eospinor(&eigenvectors[v0dim*N2], filename)) != 0) {
	  break;
	}
      }
    } else {
      FILE *testfile;
      spinor *s;
      double sqnorm;
      for(v0dim = 0; v0dim < (*nr_of_eigenvalues); v0dim++) {
	sprintf(filename, eigenvector_prefix, maxmin ? "max" : "min", v0dim, nstore);

	printf("reading eigenvectors ... ");
	testfile=fopen(filename,"r");
	if( testfile != NULL){
	  fclose(testfile);
	  s=(spinor*)&eigenvectors[v0dim*N2];
	  read_spinor(s,NULL, filename,0);
	  sqnorm=square_norm(s,VOLUME,1);
	  printf(" has | |^2 = %e \n",sqnorm);

	} else {
	  printf(" no more eigenvectors \n");
	  break;
	}
      }
    }
  }

  if(readwrite != 2) {
#ifdef MPI
    atime = MPI_Wtime();
#endif
    /* (re-) compute minimal eigenvalues */
    converged = 0;
    solver_it_max = 200;

    if(maxmin)
      jdher(N*sizeof(spinor)/sizeof(complex), N2*sizeof(spinor)/sizeof(complex),
	  50., prec, 
	  (*nr_of_eigenvalues), j_max, j_min, 
	  max_iterations, blocksize, blockwise, v0dim, (complex*) eigenvectors,
	  CG, solver_it_max,
	  threshold_max, decay_max, verbosity,
	  &converged, (complex*) eigenvectors, eigenvls,
	  &returncode, JD_MAXIMAL, 1,
	  f);
    else
      jdher(N*sizeof(spinor)/sizeof(complex), N2*sizeof(spinor)/sizeof(complex),
	  0., prec, 
	  (*nr_of_eigenvalues), j_max, j_min, 
	  max_iterations, blocksize, blockwise, v0dim, (complex*) eigenvectors,
	  CG, solver_it_max,
	  threshold_min, decay_min, verbosity,
	  &converged, (complex*) eigenvectors, eigenvls,
	  &returncode, JD_MINIMAL, 1,
	  f);
    
#ifdef MPI
    etime = MPI_Wtime();
    if(g_proc_id == 0) {
      printf("Eigenvalues computed in %e sec. (MPI_Wtime)\n", etime-atime);
    }
#endif
  }
  else {
    sprintf(filename, eigenvalue_prefix, maxmin ? "max" : "min", nstore); 
    if((ofs = fopen(filename, "r")) != (FILE*) NULL) {
      for(v0dim = 0; v0dim < (*nr_of_eigenvalues); v0dim++) {
	fscanf(ofs, "%d %lf\n", &v0dim, &eigenvls[v0dim]);
	if(feof(ofs)) break;
	converged = v0dim;
      }
    }
    fclose(ofs);
  }

  (*nr_of_eigenvalues) = converged;
  no_eigenvalues = converged;
  ev_minev = eigenvls[(*nr_of_eigenvalues)-1];
  eigenvalues_for_cg_computed = converged;

  for (ii = 0; ii < (*nr_of_eigenvalues); ii++){
    for (jj = 0; jj <= ii; jj++){
      norm2 = scalar_prod(&(eigenvectors[ii*N2]),&(eigenvectors[jj*N2]), VOLUME, 1);
      if(ii==jj){
        if((fabs(1.-norm2.re)>1e-12) || (fabs(norm2.im)>1e-12) || 1) {
          if(g_proc_id == g_stdio_proc){
            printf("< %d | %d>  =\t   %e  +i * %e \n", ii+1, jj+1, norm2.re, norm2.im);
            fflush(stdout);
          }
        }
      }
      else{
        if((fabs(norm2.re)>1e-12) || (fabs(norm2.im)>1e-12) || 1) {
          if(g_proc_id == g_stdio_proc){
            printf("< %d | %d>  =\t   %e  +i * %e \n", ii+1, jj+1, norm2.re, norm2.im);
            fflush(stdout);
          }
        }
      }
    }
  }


  if(readwrite == 1 ) {
    if(even_odd_flag)
      for(v0dim = 0; v0dim < (*nr_of_eigenvalues); v0dim++) {
	sprintf(filename, eigenvector_prefix, maxmin ? "max" : "min", v0dim, nstore);
	if((write_eospinor(&eigenvectors[v0dim*N2], filename, eigenvls[v0dim], prec, nstore)) != 0) {
	  break;
	}
      }
    else{
      WRITER *writer=NULL;
      spinor *s;
      double sqnorm;
      paramsPropagatorFormat *propagatorFormat = NULL;

      for(v0dim = 0; v0dim < (*nr_of_eigenvalues); v0dim++) {
	sprintf(filename, eigenvector_prefix, maxmin ? "max" : "min", v0dim, nstore);

	construct_writer(&writer, filename, 0);
	/* todo write propagator format */
	propagatorFormat = construct_paramsPropagatorFormat(64, 1);
	write_propagator_format(writer, propagatorFormat);
	free(propagatorFormat);


	s=(spinor*)&eigenvectors[v0dim*N2];
	write_spinor(writer, &s,NULL, 1, 64);
	destruct_writer(writer);
	writer=NULL;
	sqnorm=square_norm(s,VOLUME,1);
	printf(" wrote eigenvector | |^2 = %e \n",sqnorm);


      }
    }
  }
  if(g_proc_id == 0 && readwrite != 2) {
    sprintf(filename, eigenvalue_prefix , maxmin ? "max" : "min", nstore); 
    ofs = fopen(filename, "w");
    for(v0dim = 0; v0dim < (*nr_of_eigenvalues); v0dim++) {
      fprintf(ofs, "%d %e\n", v0dim, eigenvls[v0dim]);
    }
    fclose(ofs);
  }
  for(v0dim = 0; v0dim < converged; v0dim++) {
    inv_eigenvls[v0dim] = 1./eigenvls[v0dim];
  }

  ev_qnorm=1.0/(sqrt(max_eigenvalue)+0.1);
  ev_minev*=ev_qnorm*ev_qnorm;
  /* ov_n_cheby is initialized in Dov_psi.c */
  returnvalue=eigenvls[0];
  free(max_eigenvector_);
#else
  fprintf(stderr, "lapack not available, so JD method for EV computation not available \n");
#endif
  return(returnvalue);
}
예제 #22
0
double ndratcor_acc(const int id, hamiltonian_field_t * const hf) {
  monomial * mnl = &monomial_list[id];
  double atime, etime, delta;
  spinor * up0, * dn0, * up1, * dn1, * tup, * tdn;
  double coefs[6] = {-1./2., 3./8., -5./16., 35./128., -63./256., 231./1024.};
  atime = gettime();
  nd_set_global_parameter(mnl);
  g_mu3 = 0.;
  if(mnl->type == NDCLOVERRATCOR) {
    sw_term((const su3**) hf->gaugefield, mnl->kappa, mnl->c_sw); 
    sw_invert_nd(mnl->mubar*mnl->mubar - mnl->epsbar*mnl->epsbar);
    copy_32_sw_fields();
  }
  mnl->energy1 = square_norm(mnl->pf, VOLUME/2, 1) + square_norm(mnl->pf2, VOLUME/2, 1);

  mnl->solver_params.max_iter = mnl->maxiter;
  mnl->solver_params.squared_solver_prec = mnl->accprec;
  mnl->solver_params.no_shifts = mnl->rat.np;
  mnl->solver_params.shifts = mnl->rat.mu;
  mnl->solver_params.type = mnl->solver;
  mnl->solver_params.M_ndpsi = &Qtm_pm_ndpsi;
  mnl->solver_params.M_ndpsi32 = &Qtm_pm_ndpsi_32;    
  if(mnl->type == NDCLOVERRATCOR) {
    mnl->solver_params.M_ndpsi = &Qsw_pm_ndpsi;
    mnl->solver_params.M_ndpsi32 = &Qsw_pm_ndpsi_32;
  }
  mnl->solver_params.sdim = VOLUME/2;
  mnl->solver_params.rel_prec = g_relative_precision_flag;

  // apply (Q R)^(-1) to pseudo-fermion fields
  up0 = mnl->w_fields[0]; dn0 = mnl->w_fields[1];
  up1 = mnl->w_fields[2]; dn1 = mnl->w_fields[3];

  apply_Z_ndpsi(up0, dn0, mnl->pf, mnl->pf2, id, hf, &(mnl->solver_params));
  delta = coefs[0]*(scalar_prod_r(mnl->pf, up0, VOLUME/2, 1) + scalar_prod_r(mnl->pf2, dn0, VOLUME/2, 1));
  mnl->energy1 += delta;
  if(g_debug_level > 2 && g_proc_id == 0)
    printf("# NDRATCOR acc step: c_%d*(phi * Z^%d * phi) = %e\n", 1, 1, delta);

  for(int i = 2; i < 8; i++) {
    if(delta*delta < mnl->accprec) break;

    delta = coefs[i-1]*(square_norm(up0, VOLUME/2, 1) + square_norm(dn0, VOLUME/2, 1)); 
    mnl->energy1 += delta;
    if(g_debug_level > 2 && g_proc_id == 0)
      printf("# NDRATCOR acc step: c_%d*(phi * Z^%d * phi) = %e\n", i, i, delta);
    i++; //incrementing i
    if(delta*delta < mnl->accprec) break;

    apply_Z_ndpsi(up1, dn1, up0, dn0, id, hf, &(mnl->solver_params));
    delta = coefs[i-1]*(scalar_prod_r(up0, up1, VOLUME/2, 1) + scalar_prod_r(dn0, dn1, VOLUME/2, 1));
    mnl->energy1 += delta;
    if(g_debug_level > 2 && g_proc_id == 0)
      printf("# NDRATCOR acc step: c_%d*(phi * Z^%d * phi) = %e\n", i, i, delta);

    tup = up0; tdn = dn0;
    up0 = up1; dn0 = dn1;
    up1 = tup; dn1 = tdn;
  }


  etime = gettime();
  if(g_proc_id == 0) {
    if(g_debug_level > 1) {
      printf("# Time for %s monomial acc step: %e s\n", mnl->name, etime-atime);
    }
    if(g_debug_level > 3) { // shoud be 3
      printf("called ndratcor_acc for id %d dH = %1.10e\n", id, mnl->energy1 - mnl->energy0);
    }
  }
  return(mnl->energy1 - mnl->energy0);
}
예제 #23
0
파일: linsolve.c 프로젝트: annube/tmLQCD
/* k output , l input */
int bicg(spinor * const k, spinor * const l, double eps_sq, const int rel_prec) {

  double err, d1, squarenorm=0.;
  complex rho0, rho1, omega, alpha, beta, nom, denom;
  int iteration, N=VOLUME/2;
  spinor * r, * p, * v, *hatr, * s, * t, * P, * Q;
  

  if(ITER_MAX_BCG > 0) {



    hatr = g_spinor_field[DUM_SOLVER];
    r = g_spinor_field[DUM_SOLVER+1];
    v = g_spinor_field[DUM_SOLVER+2];
    p = g_spinor_field[DUM_SOLVER+3];
    s = g_spinor_field[DUM_SOLVER+4];
    t = g_spinor_field[DUM_SOLVER+5];
    P = k;
    Q = l;

    squarenorm = square_norm(Q, VOLUME/2, 1);
    
    Mtm_plus_psi(r, P);
    gamma5(g_spinor_field[DUM_SOLVER], l, VOLUME/2);
    diff(p, hatr, r, N);
    assign(r, p, N);
    assign(hatr, p, N);
    rho0 = scalar_prod(hatr, r, N, 1);
    
    for(iteration = 0; iteration < ITER_MAX_BCG; iteration++){
      err = square_norm(r, N, 1);
      if(g_proc_id == g_stdio_proc && g_debug_level > 1) {
	printf("BiCGstab: iterations: %d res^2 %e\n", iteration, err);
	fflush(stdout);
      }
      if (((err <= eps_sq) && (rel_prec == 0)) 
	  || ((err <= eps_sq*squarenorm) && (rel_prec == 1))){
	break;
      }
      Mtm_plus_psi(v, p);
      denom = scalar_prod(hatr, v, N, 1);
      _div_complex(alpha, rho0, denom);
      assign(s, r, N);
      assign_diff_mul(s, v, alpha, N);
      Mtm_plus_psi(t, s);
      omega = scalar_prod(t,s, N, 1);
      d1 = square_norm(t, N, 1);
      omega.re/=d1; omega.im/=d1;
      assign_add_mul_add_mul(P, p, s, alpha, omega, N);
      assign(r, s, N);
      assign_diff_mul(r, t, omega, N);
      rho1 = scalar_prod(hatr, r, N, 1);
      _mult_assign_complex(nom, alpha, rho1);
      _mult_assign_complex(denom, omega, rho0);
      _div_complex(beta, nom, denom);
      omega.re=-omega.re; omega.im=-omega.im;
      assign_mul_bra_add_mul_ket_add(p, v, r, omega, beta, N);
      rho0.re = rho1.re; rho0.im = rho1.im;
    }
    
    if(g_proc_id==0 && g_debug_level > 0) {
      printf("BiCGstab: iterations: %d eps_sq: %1.4e\n", iteration, eps_sq); 
    }
  }
  else{
    iteration = ITER_MAX_BCG;
    gamma5(k, l, VOLUME/2);
  }

  /* if bicg fails, redo with conjugate gradient */
  if(iteration>=ITER_MAX_BCG){
    iteration = solve_cg(k,l,eps_sq, rel_prec);
    /* Save the solution for reuse! not needed since Chronological inverter is there */
    /*     assign(g_spinor_field[DUM_DERI+6], k, VOLUME/2); */
    Qtm_minus_psi(k, k);;
  }
  return iteration;
}
예제 #24
0
// computes ||(1 - C^dagger R C) phi||
void check_C_ndpsi(spinor * const k_up, spinor * const k_dn,
		   spinor * const l_up, spinor * const l_dn,
		   const int id, hamiltonian_field_t * const hf,
		   solver_params_t * solver_params) {
  monomial * mnl = &monomial_list[id];
  mnl->iter0 = solve_mms_nd(g_chi_up_spinor_field, g_chi_dn_spinor_field,
			     l_up, l_dn, solver_params);

  assign(k_up, l_up, VOLUME/2);
  assign(k_dn, l_dn, VOLUME/2);

  // apply C to the random field to generate pseudo-fermion fields
  for(int j = (mnl->rat.np-1); j > -1; j--) {
    // Q_h * tau^1 - i nu_j
    // this needs phmc_Cpol = 1 to work!
    if(mnl->type == NDCLOVERRATCOR || mnl->type == NDCLOVERRAT) {
      Qsw_tau1_sub_const_ndpsi(g_chi_up_spinor_field[mnl->rat.np], g_chi_dn_spinor_field[mnl->rat.np],
			       g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j], 
			       I*mnl->rat.nu[j], 1., mnl->EVMaxInv);
    }
    else {
      Q_tau1_sub_const_ndpsi(g_chi_up_spinor_field[mnl->rat.np], g_chi_dn_spinor_field[mnl->rat.np],
			     g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j], 
			     I*mnl->rat.nu[j], 1., mnl->EVMaxInv);
    }
    assign_add_mul(k_up, g_chi_up_spinor_field[mnl->rat.np], I*mnl->rat.rnu[j], VOLUME/2);
    assign_add_mul(k_dn, g_chi_dn_spinor_field[mnl->rat.np], I*mnl->rat.rnu[j], VOLUME/2);
  }
  //apply R
  solver_params->shifts = mnl->rat.mu;
  solve_mms_nd(g_chi_up_spinor_field, g_chi_dn_spinor_field,
	       k_up, k_dn,
	       solver_params);
  for(int j = (mnl->rat.np-1); j > -1; j--) {
    assign_add_mul_r(k_up, g_chi_up_spinor_field[j], 
		     mnl->rat.rmu[j], VOLUME/2);
    assign_add_mul_r(k_dn, g_chi_dn_spinor_field[j], 
		     mnl->rat.rmu[j], VOLUME/2);
  }
  // apply C^dagger
  solver_params->shifts = mnl->rat.nu;
  solve_mms_nd(g_chi_up_spinor_field, g_chi_dn_spinor_field,
	       k_up, k_dn, solver_params);
  for(int j = (mnl->rat.np-1); j > -1; j--) {
    // Q_h * tau^1 + i nu_j
    if(mnl->type == NDCLOVERRATCOR || mnl->type == NDCLOVERRAT) {
      Qsw_tau1_sub_const_ndpsi(g_chi_up_spinor_field[mnl->rat.np], g_chi_dn_spinor_field[mnl->rat.np],
			     g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j], 
			     -I*mnl->rat.nu[j], 1., mnl->EVMaxInv);
    }
    else {
      Q_tau1_sub_const_ndpsi(g_chi_up_spinor_field[mnl->rat.np], g_chi_dn_spinor_field[mnl->rat.np],
			     g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j], 
			     -I*mnl->rat.nu[j], 1., mnl->EVMaxInv);
    }
    assign_add_mul(k_up, g_chi_up_spinor_field[mnl->rat.np], -I*mnl->rat.rnu[j], VOLUME/2);
    assign_add_mul(k_dn, g_chi_dn_spinor_field[mnl->rat.np], -I*mnl->rat.rnu[j], VOLUME/2);
  }
  diff(k_up, k_up, l_up, VOLUME/2);  
  diff(k_dn, k_dn, l_dn, VOLUME/2);  
  double resi = square_norm(k_up, VOLUME/2, 1);
  resi += square_norm(k_dn, VOLUME/2, 1);
  if(g_proc_id == 0) printf("|| (1-C^dagger R C)*phi|| = %e\n", resi);

  return;
}
예제 #25
0
파일: linsolve.c 프로젝트: annube/tmLQCD
/* k output , l input */
int solve_cg(spinor * const k, spinor * const l, double eps_sq, const int rel_prec) {

  static double normsq, pro, err, alpha_cg, beta_cg, squarenorm, sqnrm, sqnrm2;
  int iteration = 0, i, j;
  int save_sloppy = g_sloppy_precision;
  double atime, etime, flops;
  spinor *x, *delta, *y;
  
  /* initialize residue r and search vector p */
#ifdef MPI
  atime = MPI_Wtime();
#else
  atime = ((double)clock())/((double)(CLOCKS_PER_SEC));
#endif
  squarenorm = square_norm(l, VOLUME/2, 1);

  if(g_sloppy_precision_flag == 1) { 
    delta = g_spinor_field[DUM_SOLVER+3];
    x = g_spinor_field[DUM_SOLVER+4];
    y = g_spinor_field[DUM_SOLVER+5];
    assign(delta, l, VOLUME/2);
    Qtm_pm_psi(y, k);
    diff(delta, l, y, VOLUME/2);
    sqnrm = square_norm(delta, VOLUME/2, 1);
    if(((sqnrm <= eps_sq) && (rel_prec == 0)) || ((sqnrm <= eps_sq*squarenorm) && (rel_prec == 1))) {
      return(0);
    }
    
    for(i = 0; i < 20; i++) {
      g_sloppy_precision = 1;
      /* main CG loop in lower precision */
      zero_spinor_field(x, VOLUME/2);
      assign(g_spinor_field[DUM_SOLVER+1], delta, VOLUME/2);
      assign(g_spinor_field[DUM_SOLVER+2], delta, VOLUME/2);
      sqnrm2 = sqnrm;
      for(j = 0; j <= ITER_MAX_CG; j++) {
	Qtm_pm_psi(g_spinor_field[DUM_SOLVER], g_spinor_field[DUM_SOLVER+2]);
	pro = scalar_prod_r(g_spinor_field[DUM_SOLVER+2], g_spinor_field[DUM_SOLVER], VOLUME/2, 1);
	alpha_cg = sqnrm2 / pro;
	assign_add_mul_r(x, g_spinor_field[DUM_SOLVER+2], alpha_cg, VOLUME/2);
	
	assign_mul_add_r(g_spinor_field[DUM_SOLVER], -alpha_cg, g_spinor_field[DUM_SOLVER+1], VOLUME/2);
	err = square_norm(g_spinor_field[DUM_SOLVER], VOLUME/2, 1);
	
	if(g_proc_id == g_stdio_proc && g_debug_level > 1) {
	  printf("inner CG: %d res^2 %g\n", iteration+j+1, err);
	  fflush(stdout);
	}
	
	if (((err <= eps_sq) && (rel_prec == 0)) || ((err <= eps_sq*squarenorm) && (rel_prec == 1))){
	  break;
	}
	beta_cg = err / sqnrm2;
	assign_mul_add_r(g_spinor_field[DUM_SOLVER+2], beta_cg, g_spinor_field[DUM_SOLVER], VOLUME/2);
	assign(g_spinor_field[DUM_SOLVER+1], g_spinor_field[DUM_SOLVER], VOLUME/2);
	sqnrm2 = err;
      }
      /* end main CG loop */
      iteration += j;
      g_sloppy_precision = 0;
      add(k, k, x, VOLUME/2);
      
      Qtm_pm_psi(y, x);
      diff(delta, delta, y, VOLUME/2);
      sqnrm = square_norm(delta, VOLUME/2, 1);
      if(g_debug_level > 0 && g_proc_id == g_stdio_proc) {
	printf("mixed CG(linsolve): true residue %d\t%g\t\n",iteration, sqnrm); fflush( stdout);
      }
      
      if(((sqnrm <= eps_sq) && (rel_prec == 0)) || ((sqnrm <= eps_sq*squarenorm) && (rel_prec == 1))) {
	break;
      }
      iteration++;
    }
  }
  else {
    Qtm_pm_psi(g_spinor_field[DUM_SOLVER], k); 
    
    diff(g_spinor_field[DUM_SOLVER+1], l, g_spinor_field[DUM_SOLVER], VOLUME/2);
    assign(g_spinor_field[DUM_SOLVER+2], g_spinor_field[DUM_SOLVER+1], VOLUME/2);
    normsq=square_norm(g_spinor_field[DUM_SOLVER+1], VOLUME/2, 1);
    
    /* main loop */
    for(iteration = 1; iteration <= ITER_MAX_CG; iteration++) {
      Qtm_pm_psi(g_spinor_field[DUM_SOLVER], g_spinor_field[DUM_SOLVER+2]);
      pro=scalar_prod_r(g_spinor_field[DUM_SOLVER+2], g_spinor_field[DUM_SOLVER], VOLUME/2, 1);
      alpha_cg=normsq/pro;
      assign_add_mul_r(k, g_spinor_field[DUM_SOLVER+2], alpha_cg, VOLUME/2);
      
      assign_mul_add_r(g_spinor_field[DUM_SOLVER], -alpha_cg, g_spinor_field[DUM_SOLVER+1], VOLUME/2);
      err=square_norm(g_spinor_field[DUM_SOLVER], VOLUME/2, 1);
      
      if(g_proc_id == g_stdio_proc && g_debug_level > 1) {
	printf("CG (linsolve): iterations: %d res^2 %e\n", iteration, err);
	fflush(stdout);
      }
      
      if (((err <= eps_sq) && (rel_prec == 0)) || ((err <= eps_sq*squarenorm) && (rel_prec == 1))){
	break;
      }
      beta_cg = err/normsq;
      assign_mul_add_r(g_spinor_field[DUM_SOLVER+2], beta_cg, g_spinor_field[DUM_SOLVER], VOLUME/2);
      assign(g_spinor_field[DUM_SOLVER+1], g_spinor_field[DUM_SOLVER], VOLUME/2);
      normsq=err;
    }
  }
#ifdef MPI
  etime = MPI_Wtime();
#else
  etime = ((double)clock())/((double)(CLOCKS_PER_SEC));
#endif
  /* 2 A + 2 Nc Ns + N_Count ( 2 A + 10 Nc Ns ) */
  /* 2*1320.0 because the linalg is over VOLUME/2 */
  flops = (2*(2*1320.0+2*3*4) + 2*3*4 + iteration*(2.*(2*1320.0+2*3*4) + 10*3*4))*VOLUME/2/1.0e6f;
  if(g_proc_id==0 && g_debug_level > 0) {
    printf("CG: iter: %d eps_sq: %1.4e t/s: %1.4e\n", iteration, eps_sq, etime-atime); 
    printf("CG: flopcount: t/s: %1.4e mflops_local: %.1f mflops: %.1f\n", 
	   etime-atime, flops/(etime-atime), g_nproc*flops/(etime-atime));
  }
  g_sloppy_precision = save_sloppy;
  return(iteration);
}
예제 #26
0
void ndratcor_heatbath(const int id, hamiltonian_field_t * const hf) {
  monomial * mnl = &monomial_list[id];
  double atime, etime, delta;
  spinor * up0, * dn0, * up1, * dn1, * tup, * tdn, * Zup, * Zdn;
  double coefs[6] = {1./4., -3./32., 7./128., -77./2048., 231./8192., -1463./65536.}; // series of (1+x)^(1/4)
  double coefs_check[6] = {1./2., -1./8., 1./16., -5./128., 7./256., -21./1024.}; // series of (1+x)^(1/2)
  atime = gettime();
  nd_set_global_parameter(mnl);
  g_mu3 = 0.;
  mnl->iter0 = 0;
  if(mnl->type == NDCLOVERRATCOR) {
    init_sw_fields();
    sw_term((const su3**)hf->gaugefield, mnl->kappa, mnl->c_sw); 
    sw_invert_nd(mnl->mubar*mnl->mubar - mnl->epsbar*mnl->epsbar);
    copy_32_sw_fields();
  }
  // we measure before the trajectory!
  if((mnl->rec_ev != 0) && (hf->traj_counter%mnl->rec_ev == 0)) {
    if(mnl->type != NDCLOVERRAT) phmc_compute_ev(hf->traj_counter-1, id, &Qtm_pm_ndbipsi);
    else phmc_compute_ev(hf->traj_counter-1, id, &Qsw_pm_ndbipsi);
  }

  // the Gaussian distributed random fields
  mnl->energy0 = 0.;
  random_spinor_field_eo(mnl->pf, mnl->rngrepro, RN_GAUSS);
  mnl->energy0 = square_norm(mnl->pf, VOLUME/2, 1);

  random_spinor_field_eo(mnl->pf2, mnl->rngrepro, RN_GAUSS);
  mnl->energy0 += square_norm(mnl->pf2, VOLUME/2, 1);

  mnl->solver_params.max_iter = mnl->maxiter;
  mnl->solver_params.squared_solver_prec = mnl->accprec;
  mnl->solver_params.no_shifts = mnl->rat.np;
  mnl->solver_params.shifts = mnl->rat.mu;
  mnl->solver_params.type = mnl->solver;
  mnl->solver_params.M_ndpsi = &Qtm_pm_ndpsi;
  mnl->solver_params.M_ndpsi32 = &Qtm_pm_ndpsi_32;    
  if(mnl->type == NDCLOVERRATCOR) {
    mnl->solver_params.M_ndpsi = &Qsw_pm_ndpsi;
    mnl->solver_params.M_ndpsi32 = &Qsw_pm_ndpsi_32;
  }
  mnl->solver_params.sdim = VOLUME/2;
  mnl->solver_params.rel_prec = g_relative_precision_flag;

  // apply B to the random field to generate pseudo-fermion fields
  up0 = mnl->w_fields[0]; dn0 = mnl->w_fields[1];
  up1 = mnl->w_fields[2]; dn1 = mnl->w_fields[3];
  Zup = mnl->w_fields[4]; Zdn = mnl->w_fields[5];

  apply_Z_ndpsi(up0, dn0, mnl->pf, mnl->pf2, id, hf, &(mnl->solver_params));
  // computing correction to energy1
  delta = coefs_check[0]*(scalar_prod_r(mnl->pf, up0, VOLUME/2, 1) + scalar_prod_r(mnl->pf2, dn0, VOLUME/2, 1));
  if(g_debug_level > 2 && g_proc_id == 0)
    printf("# NDRATCOR heatbath: c_%d*(R * Z^%d * R) = %e\n", 1, 1, delta);
  // debug for showing that the old check was giving a smaller delta
  if(g_debug_level > 3) {
    double delta_old = square_norm(up0, VOLUME/2, 1) + square_norm(dn0, VOLUME/2, 1);
    if(g_proc_id == 0) {
      printf("# NDRATCOR old check: || Z^%d * R ||^2 = %e\n", 1, delta_old);
      printf("# NDRATCOR new check: (c_%d*(R * Z^%d * R))^2 = %e\n", 1, 1, delta*delta);
    }
  }

  if(delta*delta > mnl->accprec) {
    assign_add_mul_r(mnl->pf, up0, coefs[0], VOLUME/2);
    assign_add_mul_r(mnl->pf2, dn0, coefs[0], VOLUME/2);
    
    // saving first application
    assign(Zup, up0, VOLUME/2);
    assign(Zdn, dn0, VOLUME/2);
    
    
    for(int i = 2; i < 8; i++) {
      // computing next order correction to energy1
      delta = coefs_check[i-1]*(scalar_prod_r(Zup, up0, VOLUME/2, 1) + scalar_prod_r(Zup, dn0, VOLUME/2, 1)); 
      if(g_debug_level > 2 && g_proc_id == 0)
        printf("# NDRATCOR heatbath: c_%d*(R * Z^%d * R) = %e\n", i, i, delta);
      // debug for showing that the old check was giving a smaller delta
      if(g_debug_level > 3) {
        double delta_old = square_norm(up0, VOLUME/2, 1) + square_norm(dn0, VOLUME/2, 1);
        if(g_proc_id == 0) {
          printf("# NDRATCOR old check: || Z^%d * R ||^2 = %e\n", 1, delta_old);
          printf("# NDRATCOR new check: (c_%d*(R * Z^%d * R))^2 = %e\n", 1, 1, delta*delta);
        }
      }
      if(delta*delta < mnl->accprec) break;

      apply_Z_ndpsi(up1, dn1, up0, dn0, id, hf, &(mnl->solver_params));
      
      assign_add_mul_r(mnl->pf, up1, coefs[i-1], VOLUME/2);
      assign_add_mul_r(mnl->pf2, dn1, coefs[i-1], VOLUME/2);

      tup = up0; tdn = dn0;
      up0 = up1; dn0 = dn1;
      up1 = tup; dn1 = tdn;
    }
  }
  etime = gettime();
  if(g_proc_id == 0) {
    if(g_debug_level > 1) {
      printf("# Time for %s monomial heatbath: %e s\n", mnl->name, etime-atime);
    }
    if(g_debug_level > 3) { 
      printf("called ndratcor_heatbath for id %d energy %f\n", id, mnl->energy0);
    }
  }
  return;
}
예제 #27
0
void gtrafo_eo_nd(spinor * const Even_s, spinor * const Odd_s, spinor * const Even_c, spinor * const Odd_c, 
                  spinor * const Even_new_s, spinor * const Odd_new_s, spinor * const Even_new_c, spinor * const Odd_new_c,
                  GTRAFO_TYPE type){
  
  /* initialize temporal gauge here */
  int retval;
  double dret1, dret2;
  static double plaquette1 = 0.0;
  static double plaquette2 = 0.0;
  
  if(type==GTRAFO_APPLY){
    /* need VOLUME here (not N=VOLUME/2)*/
    if ((retval = init_temporalgauge_trafo(VOLUME, g_gauge_field)) != 0 ) {				// initializes the transformation matrices
      if (g_proc_id == 0) printf("Error while gauge fixing to temporal gauge. Aborting...\n");   	//	g_tempgauge_field as a copy of g_gauge_field
      exit(200);
    }
    
    /* do trafo */
    plaquette1 = measure_plaquette(g_gauge_field);
    apply_gtrafo(g_gauge_field, g_trafo);								// transformation of the gauge field
    plaquette2 = measure_plaquette(g_gauge_field);
    if (g_proc_id == 0) printf("\tPlaquette before gauge fixing: %.16e\n", plaquette1/6./VOLUME);
    if (g_proc_id == 0) printf("\tPlaquette after gauge fixing:  %.16e\n", plaquette2/6./VOLUME);
    
    /* do trafo to odd_s part of source */
    dret1 = square_norm(Odd_s, VOLUME/2 , 1);
    apply_gtrafo_spinor_odd(Odd_s, g_trafo);								// odd spinor transformation, strange
    dret2 = square_norm(Odd_s, VOLUME/2, 1);
    if (g_proc_id == 0) printf("\tsquare norm before gauge fixing: %.16e\n", dret1); 
    if (g_proc_id == 0) printf("\tsquare norm after gauge fixing:  %.16e\n", dret2);
    
    /* do trafo to odd_c part of source */
    dret1 = square_norm(Odd_c, VOLUME/2 , 1);
    apply_gtrafo_spinor_odd(Odd_c, g_trafo);								// odd spinor transformation, charm
    dret2 = square_norm(Odd_c, VOLUME/2, 1);
    if (g_proc_id == 0) printf("\tsquare norm before gauge fixing: %.16e\n", dret1); 
    if (g_proc_id == 0) printf("\tsquare norm after gauge fixing:  %.16e\n", dret2);       
    
    /* do trafo to even_s part of source */
    dret1 = square_norm(Even_s, VOLUME/2 , 1);
    apply_gtrafo_spinor_even(Even_s, g_trafo);							// even spinor transformation, strange
    dret2 = square_norm(Even_s, VOLUME/2, 1);
    if (g_proc_id == 0) printf("\tsquare norm before gauge fixing: %.16e\n", dret1); 
    if (g_proc_id == 0) printf("\tsquare norm after gauge fixing:  %.16e\n", dret2);
    
    /* do trafo to even_c part of source */
    dret1 = square_norm(Even_c, VOLUME/2 , 1);
    apply_gtrafo_spinor_even(Even_c, g_trafo);							// even spinor transformation, charm
    dret2 = square_norm(Even_c, VOLUME/2, 1);
    if (g_proc_id == 0) printf("\tsquare norm before gauge fixing: %.16e\n", dret1); 
    if (g_proc_id == 0) printf("\tsquare norm after gauge fixing:  %.16e\n", dret2);
  } else {
      /* undo trafo */
    /* apply_inv_gtrafo(g_gauge_field, g_trafo);*/
    /* copy back the saved original field located in g_tempgauge_field -> update necessary*/
    plaquette1 = measure_plaquette(g_gauge_field);
    copy_gauge_field(g_gauge_field, g_tempgauge_field);
    g_update_gauge_copy = 1;
    plaquette2 = measure_plaquette(g_gauge_field);
    if (g_proc_id == 0) printf("\tPlaquette before inverse gauge fixing: %.16e\n", plaquette1/6./VOLUME);
    if (g_proc_id == 0) printf("\tPlaquette after inverse gauge fixing:  %.16e\n", plaquette2/6./VOLUME);
    
    /* undo trafo to source Even_s */
    dret1 = square_norm(Even_s, VOLUME/2 , 1);
    apply_inv_gtrafo_spinor_even(Even_s, g_trafo);
    dret2 = square_norm(Even_s, VOLUME/2, 1);
    if (g_proc_id == 0) printf("\tsquare norm before gauge fixing: %.16e\n", dret1); 
    if (g_proc_id == 0) printf("\tsquare norm after gauge fixing:  %.16e\n", dret2);
    
    
    /* undo trafo to source Even_c */
    dret1 = square_norm(Even_c, VOLUME/2 , 1);
    apply_inv_gtrafo_spinor_even(Even_c, g_trafo);
    dret2 = square_norm(Even_c, VOLUME/2, 1);
    if (g_proc_id == 0) printf("\tsquare norm before gauge fixing: %.16e\n", dret1);
    if (g_proc_id == 0) printf("\tsquare norm after gauge fixing:  %.16e\n", dret2); 
    
    /* undo trafo to source Odd_s */
    dret1 = square_norm(Odd_s, VOLUME/2 , 1);
    apply_inv_gtrafo_spinor_odd(Odd_s, g_trafo);
    dret2 = square_norm(Odd_s, VOLUME/2, 1);
    if (g_proc_id == 0) printf("\tsquare norm before gauge fixing: %.16e\n", dret1); 
    if (g_proc_id == 0) printf("\tsquare norm after gauge fixing:  %.16e\n", dret2);
    
    /* undo trafo to source Odd_c */
    dret1 = square_norm(Odd_c, VOLUME/2 , 1);
    apply_inv_gtrafo_spinor_odd(Odd_c, g_trafo);
    dret2 = square_norm(Odd_c, VOLUME/2, 1);
    if (g_proc_id == 0) printf("\tsquare norm before gauge fixing: %.16e\n", dret1); 
    if (g_proc_id == 0) printf("\tsquare norm after gauge fixing:  %.16e\n", dret2); 
    
    
    // Even_new_s
    dret1 = square_norm(Even_new_s, VOLUME/2 , 1);
    apply_inv_gtrafo_spinor_even(Even_new_s, g_trafo);
    dret2 = square_norm(Even_new_s, VOLUME/2, 1);
    if (g_proc_id == 0) printf("\tsquare norm before gauge fixing: %.16e\n", dret1); 
    if (g_proc_id == 0) printf("\tsquare norm after gauge fixing:  %.16e\n", dret2);
    
    // Even_new_c
    dret1 = square_norm(Even_new_c, VOLUME/2 , 1);
    apply_inv_gtrafo_spinor_even(Even_new_c, g_trafo);
    dret2 = square_norm(Even_new_c, VOLUME/2, 1);
    if (g_proc_id == 0) printf("\tsquare norm before gauge fixing: %.16e\n", dret1); 
    if (g_proc_id == 0) printf("\tsquare norm after gauge fixing:  %.16e\n", dret2);
    
    // Odd_new_s
    dret1 = square_norm(Odd_new_s, VOLUME/2 , 1);
    apply_inv_gtrafo_spinor_odd(Odd_new_s, g_trafo);
    dret2 = square_norm(Odd_new_s, VOLUME/2, 1);
    if (g_proc_id == 0) printf("\tsquare norm before gauge fixing: %.16e\n", dret1); 
    if (g_proc_id == 0) printf("\tsquare norm after gauge fixing:  %.16e\n", dret2);
    
    // Odd_new_c
    dret1 = square_norm(Odd_new_c, VOLUME/2 , 1);
    apply_inv_gtrafo_spinor_odd(Odd_new_c, g_trafo);
    dret2 = square_norm(Odd_new_c, VOLUME/2, 1);
    if (g_proc_id == 0) printf("\tsquare norm before gauge fixing: %.16e\n", dret1); 
    if (g_proc_id == 0) printf("\tsquare norm after gauge fixing:  %.16e\n", dret2); 
    
    finalize_temporalgauge();
  }
#    ifdef TM_USE_MPI
  xchange_gauge(g_gauge_field);
#    endif
}
예제 #28
0
int main(int argc,char *argv[])
{
  FILE *parameterfile = NULL;
  char datafilename[206];
  char parameterfilename[206];
  char conf_filename[50];
  char scalar_filename[50];
  char * input_filename = NULL;
  char * filename = NULL;
  double plaquette_energy;

#ifdef _USE_HALFSPINOR
	#undef _USE_HALFSPINOR
	printf("# WARNING: USE_HALFSPINOR will be ignored (not supported here).\n");
#endif

	if(even_odd_flag)
	{
		even_odd_flag=0;
		printf("# WARNING: even_odd_flag will be ignored (not supported here).\n");
	}
	int j,j_max,k,k_max = 2;
	_Complex double * drvsc;

#ifdef HAVE_LIBLEMON
	paramsXlfInfo *xlfInfo;
#endif
	int status = 0;

	static double t1,t2,dt,sdt,dts,qdt,sqdt;
	double antioptaway=0.0;

#ifdef MPI
	static double dt2;

	DUM_DERI = 6;
	DUM_SOLVER = DUM_DERI+2;
	DUM_MATRIX = DUM_SOLVER+6;
	NO_OF_SPINORFIELDS = DUM_MATRIX+2;

#ifdef OMP
	int mpi_thread_provided;
	MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_thread_provided);
#else
	MPI_Init(&argc, &argv);
#endif
	MPI_Comm_rank(MPI_COMM_WORLD, &g_proc_id);

#else
	g_proc_id = 0;
#endif

	g_rgi_C1 = 1.;

  process_args(argc,argv,&input_filename,&filename);
  set_default_filenames(&input_filename, &filename);

  /* Read the input file */
  if( (j = read_input(input_filename)) != 0) {
    fprintf(stderr, "Could not find input file: %s\nAborting...\n", input_filename);
    exit(-1);
  }

	if(g_proc_id==0) {
		printf("parameter rho_BSM set to %f\n", rho_BSM);
		printf("parameter eta_BSM set to %f\n", eta_BSM);
		printf("parameter  m0_BSM set to %f\n",  m0_BSM);
	}

#ifdef OMP
	init_openmp();
#endif

	tmlqcd_mpi_init(argc, argv);


	if(g_proc_id==0) {
#ifdef SSE
		printf("# The code was compiled with SSE instructions\n");
#endif
#ifdef SSE2
		printf("# The code was compiled with SSE2 instructions\n");
#endif
#ifdef SSE3
		printf("# The code was compiled with SSE3 instructions\n");
#endif
#ifdef P4
		printf("# The code was compiled for Pentium4\n");
#endif
#ifdef OPTERON
		printf("# The code was compiled for AMD Opteron\n");
#endif
#ifdef _GAUGE_COPY
		printf("# The code was compiled with -D_GAUGE_COPY\n");
#endif
#ifdef BGL
		printf("# The code was compiled for Blue Gene/L\n");
#endif
#ifdef BGP
		printf("# The code was compiled for Blue Gene/P\n");
#endif
#ifdef _USE_HALFSPINOR
		printf("# The code was compiled with -D_USE_HALFSPINOR\n");
#endif
#ifdef _USE_SHMEM
		printf("# The code was compiled with -D_USE_SHMEM\n");
#ifdef _PERSISTENT
		printf("# The code was compiled for persistent MPI calls (halfspinor only)\n");
#endif
#endif
#ifdef MPI
	#ifdef _NON_BLOCKING
		printf("# The code was compiled for non-blocking MPI calls (spinor and gauge)\n");
	#endif
#endif
		printf("\n");
		fflush(stdout);
	}


#ifdef _GAUGE_COPY
	init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 1);
#else
	init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 0);
#endif
	init_geometry_indices(VOLUMEPLUSRAND + g_dbw2rand);


	j = init_bispinor_field(VOLUMEPLUSRAND, 12);
	if ( j!= 0) {
		fprintf(stderr, "Not enough memory for bispinor fields! Aborting...\n");
		exit(0);
	}

	j = init_spinor_field(VOLUMEPLUSRAND, 12);
	if ( j!= 0) {
		fprintf(stderr, "Not enough memory for spinor fields! Aborting...\n");
		exit(0);
	}

	int numbScalarFields = 4;
	j = init_scalar_field(VOLUMEPLUSRAND, numbScalarFields);
	if ( j!= 0) {
		fprintf(stderr, "Not enough memory for scalar fields! Aborting...\n");
		exit(0);
	}

	drvsc = malloc(18*VOLUMEPLUSRAND*sizeof(_Complex double));

	if(g_proc_id == 0) {
		fprintf(stdout,"# The number of processes is %d \n",g_nproc);
		printf("# The lattice size is %d x %d x %d x %d\n",
		 (int)(T*g_nproc_t), (int)(LX*g_nproc_x), (int)(LY*g_nproc_y), (int)(g_nproc_z*LZ));
		printf("# The local lattice size is %d x %d x %d x %d\n",
		 (int)(T), (int)(LX), (int)(LY),(int) LZ);

		fflush(stdout);
	}

	/* define the geometry */
	geometry();

  j = init_bsm_2hop_lookup(VOLUME);
	if ( j!= 0) {
    // this should not be reached since the init function calls fatal_error anyway
		fprintf(stderr, "Not enough memory for BSM2b 2hop lookup table! Aborting...\n");
		exit(0);
	}

	/* define the boundary conditions for the fermion fields */
  /* for the actual inversion, this is done in invert.c as the operators are iterated through */
  // 
  // For the BSM operator we don't use kappa normalisation,
  // as a result, when twisted boundary conditions are applied this needs to be unity.
  // In addition, unlike in the Wilson case, the hopping term comes with a plus sign.
  // However, in boundary(), the minus sign for the Wilson case is implicitly included.
  // We therefore use -1.0 here.
	boundary(-1.0);

	status = check_geometry();
	if (status != 0) {
		fprintf(stderr, "Checking of geometry failed. Unable to proceed.\nAborting....\n");
		exit(1);
	}
#if (defined MPI && !(defined _USE_SHMEM))
	// fails, we're not using spinor fields
//	check_xchange();
#endif

	start_ranlux(1, 123456);

	// read gauge field
	if( strcmp(gauge_input_filename, "create_random_gaugefield") == 0 ) {
		random_gauge_field(reproduce_randomnumber_flag, g_gauge_field);
	}
	else {
		sprintf(conf_filename, "%s.%.4d", gauge_input_filename, nstore);
		if (g_cart_id == 0) {
		  printf("#\n# Trying to read gauge field from file %s in %s precision.\n",
				conf_filename, (gauge_precision_read_flag == 32 ? "single" : "double"));
		  fflush(stdout);
		}

		int i;
		if( (i = read_gauge_field(conf_filename,g_gauge_field)) !=0) {
		  fprintf(stderr, "Error %d while reading gauge field from %s\n Aborting...\n", i, conf_filename);
		  exit(-2);
		}

		if (g_cart_id == 0) {
			printf("# Finished reading gauge field.\n");
			fflush(stdout);
		}
	}

	// read scalar field
	if( strcmp(scalar_input_filename, "create_random_scalarfield") == 0 ) {
		for( int s=0; s<numbScalarFields; s++ )
			ranlxd(g_scalar_field[s], VOLUME);
	}
	else {
		sprintf(scalar_filename, "%s.%d", scalar_input_filename, nscalar);
		if (g_cart_id == 0) {
		  printf("#\n# Trying to read scalar field from file %s in %s precision.\n",
				scalar_filename, (scalar_precision_read_flag == 32 ? "single" : "double"));
		  fflush(stdout);
		}

		int i;
		if( (i = read_scalar_field(scalar_filename,g_scalar_field)) !=0) {
		  fprintf(stderr, "Error %d while reading scalar field from %s\n Aborting...\n", i, scalar_filename);
		  exit(-2);
		}

		if (g_cart_id == 0) {
			printf("# Finished reading scalar field.\n");
			fflush(stdout);
		}
	}

#ifdef MPI
    xchange_gauge(g_gauge_field);
#endif

    /*compute the energy of the gauge field*/
    plaquette_energy = measure_plaquette( (const su3**) g_gauge_field);

    if (g_cart_id == 0) {
      printf("# The computed plaquette value is %e.\n", plaquette_energy / (6.*VOLUME*g_nproc));
      fflush(stdout);
    }

#ifdef MPI
	for( int s=0; s<numbScalarFields; s++ )
		generic_exchange(g_scalar_field[s], sizeof(scalar));
#endif

	/*initialize the bispinor fields*/
	j_max=1;
	sdt=0.;
  // w
	random_spinor_field_lexic( (spinor*)(g_bispinor_field[4]), reproduce_randomnumber_flag, RN_GAUSS);
	random_spinor_field_lexic( (spinor*)(g_bispinor_field[4])+VOLUME, reproduce_randomnumber_flag, RN_GAUSS);
	// for the D^\dagger test:
  // v
	random_spinor_field_lexic( (spinor*)(g_bispinor_field[5]), reproduce_randomnumber_flag, RN_GAUSS);
	random_spinor_field_lexic( (spinor*)(g_bispinor_field[5])+VOLUME, reproduce_randomnumber_flag, RN_GAUSS);
#if defined MPI
	generic_exchange(g_bispinor_field[4], sizeof(bispinor));
#endif

	// print L2-norm of source:
	double squarenorm = square_norm((spinor*)g_bispinor_field[4], 2*VOLUME, 1);
	if(g_proc_id==0) {
		printf("\n# square norm of the source: ||w||^2 = %e\n\n", squarenorm);
		fflush(stdout);
	}

  double t_MG, t_BK;
	/* inversion needs to be done first because it uses loads of the g_bispinor_fields internally */
#if TEST_INVERSION
  if(g_proc_id==1)
    printf("Testing inversion\n");
  // Bartek's operator
  t1 = gettime();
	cg_her_bi(g_bispinor_field[9], g_bispinor_field[4],
           25000, 1.0e-14, 0, VOLUME, &Q2_psi_BSM2b);
  t_BK = gettime() - t1;

  // Marco's operator
  t1 = gettime();
	cg_her_bi(g_bispinor_field[8], g_bispinor_field[4],
           25000, 1.0e-14, 0, VOLUME, &Q2_psi_BSM2m);
  t_MG = gettime() - t1;
  
  if(g_proc_id==0)
    printf("Operator inversion time: t_MG = %f sec \t t_BK = %f sec\n\n", t_MG, t_BK); 
#endif

  /* now apply the operators to the same bispinor field and do various comparisons */

  // Marco's operator
#ifdef MPI
  MPI_Barrier(MPI_COMM_WORLD);
#endif
  t_MG = 0.0;
  t1 = gettime();
  D_psi_BSM2m(g_bispinor_field[0], g_bispinor_field[4]);
  t1 = gettime() - t1;
#ifdef MPI
	MPI_Allreduce (&t1, &t_MG, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
#else
  t_MG = t1;
#endif

  // Bartek's operator
#ifdef MPI
  MPI_Barrier(MPI_COMM_WORLD);
#endif
  t_BK = 0.0;
  t1 = gettime();
  D_psi_BSM2b(g_bispinor_field[1], g_bispinor_field[4]);
  t1 = gettime() - t1;
#ifdef MPI
	MPI_Allreduce (&t1, &t_BK, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
#else
  t_BK = t1;
#endif
  
  if(g_proc_id==0)
    printf("Operator application time: t_MG = %f sec \t t_BK = %f sec\n\n", t_MG, t_BK); 

	squarenorm = square_norm((spinor*)g_bispinor_field[0], 2*VOLUME, 1);
	if(g_proc_id==0) {
		printf("# || D_MG w ||^2 = %.16e\n", squarenorm);
		fflush(stdout);
	}
	squarenorm = square_norm((spinor*)g_bispinor_field[1], 2*VOLUME, 1);
	if(g_proc_id==0) {
		printf("# || D_BK w ||^2 = %.16e\n\n\n", squarenorm);
		fflush(stdout);
	}

  diff( (spinor*)g_bispinor_field[3], (spinor*)g_bispinor_field[0], (spinor*)g_bispinor_field[1], 2*VOLUME);

  printf("element-wise difference between (D_BK w) and (D_MG w)\n");
  printf("( D_MG w - M_BK w )->sp_up.s0.c0= %.16e + I*(%.16e)\n\n", creal(g_bispinor_field[3][0].sp_up.s0.c0), cimag(g_bispinor_field[3][0].sp_up.s0.c0) );

  double diffnorm = square_norm( (spinor*) g_bispinor_field[3], 2*VOLUME, 1 );
  if(g_proc_id==0){
    printf("Square norm of the difference\n");
    printf("|| D_MG w - D_BK w ||^2     = %.16e \n\n\n", diffnorm); 
  }

	// < D w, v >
  printf("Check consistency of D and D^dagger\n");
  _Complex double prod1_MG = scalar_prod( (spinor*)g_bispinor_field[0], (spinor*)g_bispinor_field[5], 2*VOLUME, 1 );
	if(g_proc_id==0)
    printf("< D_MG w, v >        = %.16e + I*(%.16e)\n", creal(prod1_MG), cimag(prod1_MG));
	
  _Complex double prod1_BK = scalar_prod( (spinor*)g_bispinor_field[1], (spinor*)g_bispinor_field[5], 2*VOLUME, 1 );
  if(g_proc_id==0)
  	printf("< D_BK w, v >        = %.16e + I*(%.16e)\n\n", creal(prod1_BK), cimag(prod1_BK));
	
  // < w, D^\dagger v >
  t_MG = gettime();
	D_psi_dagger_BSM2m(g_bispinor_field[6], g_bispinor_field[5]);
  t_MG = gettime()-t_MG;

  t_BK = gettime();
	D_psi_dagger_BSM2b(g_bispinor_field[7], g_bispinor_field[5]);
  t_BK = gettime() - t_BK;

  if(g_proc_id==0)
    printf("Operator dagger application time: t_MG = %f sec \t t_BK = %f sec\n\n", t_MG, t_BK); 

	_Complex double prod2_MG = scalar_prod((spinor*)g_bispinor_field[4], (spinor*)g_bispinor_field[6], 2*VOLUME, 1);
	_Complex double prod2_BK = scalar_prod((spinor*)g_bispinor_field[4], (spinor*)g_bispinor_field[7], 2*VOLUME, 1);
  if( g_proc_id == 0 ){
	  printf("< w, D_MG^dagger v > = %.16e + I*(%.16e)\n", creal(prod2_MG), cimag(prod2_MG));
    printf("< w, D_BK^dagger v > = %.16e + I*(%.16e)\n", creal(prod2_BK), cimag(prod2_BK));
	  
    printf("\n| < D_MG w, v > - < w, D_MG^dagger v > | = %.16e\n",cabs(prod2_MG-prod1_MG));
	  printf("| < D_BK w, v > - < w, D_BK^dagger v > | = %.16e\n\n",cabs(prod2_BK-prod1_BK));
  }
	
#if TEST_INVERSION
	// check result of inversion
	Q2_psi_BSM2m(g_bispinor_field[10], g_bispinor_field[8]);
	Q2_psi_BSM2b(g_bispinor_field[11], g_bispinor_field[8]);
	assign_diff_mul((spinor*)g_bispinor_field[10], (spinor*)g_bispinor_field[4], 1.0, 2*VOLUME);
	assign_diff_mul((spinor*)g_bispinor_field[11], (spinor*)g_bispinor_field[4], 1.0, 2*VOLUME);
	double squarenorm_MGMG = square_norm((spinor*)g_bispinor_field[10], 2*VOLUME, 1);
	double squarenorm_BKMG = square_norm((spinor*)g_bispinor_field[11], 2*VOLUME, 1);
	if(g_proc_id==0) {
		printf("# ||Q2_MG*(Q2_MG)^-1*(b)-b||^2 = %.16e\n\n", squarenorm_MGMG);
		printf("# ||Q2_BK*(Q2_MG)^-1*(b)-b||^2 = %.16e\n\n", squarenorm_BKMG);
		fflush(stdout);
	}
	
  Q2_psi_BSM2b(g_bispinor_field[10], g_bispinor_field[9]);
  Q2_psi_BSM2m(g_bispinor_field[11], g_bispinor_field[9]);
	assign_diff_mul((spinor*)g_bispinor_field[10], (spinor*)g_bispinor_field[4], 1.0, 2*VOLUME);
	assign_diff_mul((spinor*)g_bispinor_field[11], (spinor*)g_bispinor_field[4], 1.0, 2*VOLUME);
	double squarenorm_BKBK = square_norm((spinor*)g_bispinor_field[10], 2*VOLUME, 1);
	double squarenorm_MGBK = square_norm((spinor*)g_bispinor_field[11], 2*VOLUME, 1);
	if(g_proc_id==0) {
		printf("# ||Q2_BK*(Q2_BK)^-1*(b)-b||^2 = %.16e\n\n", squarenorm_BKBK);
		printf("# ||Q2_MG*(Q2_BK)^-1*(b)-b||^2 = %.16e\n\n", squarenorm_MGBK);
		fflush(stdout);
	}
#endif

#ifdef OMP
	free_omp_accumulators();
#endif
	free_gauge_field();
	free_geometry_indices();
	free_bispinor_field();
	free_scalar_field();
#ifdef MPI
	MPI_Barrier(MPI_COMM_WORLD);
	MPI_Finalize();
#endif
	return(0);
}
예제 #29
0
파일: hmc.c 프로젝트: MrMage/schwinger
int update() //Basic HMC update step
{
  double squnrm;
  int i, acc;
  double exphdiff;
  
  /* the new impulses and the 'generator' of the arbitrary pseudofield */
  /* calculate the hamiltonian of this state: new impulses + action */
  /* g_X is ab-used a bit - here it is \xi = (gamma5 D)^{-1} \phi */
  
  ham_old = s_g_old;
  for(i=0; i<GRIDPOINTS; i++) {
    gp1[i] = gauss();
    gp2[i] = gauss();
    ham_old += 0.5*(gp1[i]*gp1[i] + gp2[i]*gp2[i]);
  }
  
  /* Now create the field and calculate its contributions to the action (end of the 'misuse') */
  /* squnrm is the fermion part of the action : */
  /*   S = R^dagger * R  =  g_fermion^dag * D^{-1 dag} * D^{-1} * g_fermion = g_fermion Q^-1 g_fermion */

  /* PF1 det(1/(Q^2 + mu^2)) */
  for(i=0; i<GRIDPOINTS; i++) {
    g_X[i].s1 = (gauss() + I*gauss())/sqrt(2); //Gaussian fields R
    g_X[i].s2 = (gauss() + I*gauss())/sqrt(2);
  }
  squnrm = square_norm(g_X);
  
  // step iv): g_fermion = \phi = K^dag * g_X = K^dag * \xi
  gam5D_wilson(g_fermion, g_X);
  assign_diff_mul(g_fermion, g_X, 0.+I*sqrt(g_musqr));
  ham_old += squnrm;

  /* PF2 det((Q^2 + mu^2)/Q^2) */
  if(no_timescales > 2) {
    for(i=0; i<GRIDPOINTS; i++) {
      g_X[i].s1 = (gauss() + I*gauss())/sqrt(2); //Gaussian fields R
      g_X[i].s2 = (gauss() + I*gauss())/sqrt(2);
    }
    squnrm = square_norm(g_X);

    cg(g_fermion2, g_X, ITER_MAX, DELTACG, &gam5D_SQR_musqr_wilson);    
    gam5D_wilson(g_gam5DX, g_fermion2);
    assign_add_mul(g_gam5DX, g_fermion2, 0.+I*sqrt(g_musqr));
    gam5D_wilson(g_fermion2, g_gam5DX);
    ham_old += squnrm;
  }
  // Add the part for the fermion fields

  // Do the molecular dynamic chain
  /* the simple LF scheme */

  /* the second order minimal norm multi-timescale integrator*/
  /* MN2_integrator(g_steps, 2, g_steps*g_stepsize, 0.2); */

  /* This is the recursive implementation */
  /* in can be found in rec_lf_integrator.c|h */
  if (no_timescales == 1)
    leapfrog(n_steps[0], tau/n_steps[0]);
  else
    integrate_leap_frog(tau/n_steps[no_timescales-1], no_timescales-1, no_timescales, n_steps, 1, up_momenta);
  
  // Calculate the new action and hamiltonian
  ham = 0;
  s_g = 0;
  for (i=0; i<GRIDPOINTS; i++) {
    s_g += S_G(i);
    ham += 0.5*(gp1[i]*gp1[i] + gp2[i]*gp2[i]);
  }
  /* Sum_ij [(g_fermion^*)_i (Q^-1)_ij (g_fermion)_j]  =  Sum_ij [(g_fermion^*)_i (g_X)_i] */
  ham += s_g;
  // add in the part for the fermion fields.
  cg(g_X, g_fermion, ITER_MAX, DELTACG, &gam5D_SQR_musqr_wilson);
  ham += scalar_prod_r(g_fermion, g_X);
  
  if(no_timescales > 2) {
    cg(g_gam5DX, g_fermion2, ITER_MAX, DELTACG, &gam5D_SQR_wilson);
    gam5D_SQR_musqr_wilson(g_X, g_temp, g_gam5DX);
    ham += scalar_prod_r(g_fermion2, g_X);
  }

  exphdiff = exp(ham_old-ham);
  acc = accept(exphdiff);
 
  for(i=0; i<GRIDPOINTS; i++) {
    gauge1_old[i]=gauge1[i];
    gauge2_old[i]=gauge2[i];
  }
 
  s_g_old = s_g;
  return(acc);
}
예제 #30
0
파일: mr.c 프로젝트: LorenzoRiggio/tmLQCD
int mrblk(spinor * const P, spinor * const Q,
	  const int max_iter, const double eps_sq,
	  const int rel_prec, const int N, 
	  matrix_mult_blk f, const int blk) {
  static int mr_init=0;
  int i = 0;
  double norm_r,beta;
  _Complex double alpha;
  spinor * r;
  const int parallel = 0;
  spinor * s[3];
  static spinor *s_=NULL;
  static int N_;

  if(mr_init == 0 || N != N_) {
    if(N!= N_ && mr_init != 0) {
      free(s_);
    }
    N_ = N;
    s_ = calloc(3*(N+1)+1, sizeof(spinor));
    mr_init = 1;
  }
#if (defined SSE || defined SSE2 || defined SSE3)
  s[0] = (spinor *)(((unsigned long int)(s_)+ALIGN_BASE)&~ALIGN_BASE); 
#else
  s[0] = s_;
#endif
  s[1] = s[0] + N + 1;
  s[2] = s[1] + N + 1;

  r = s[0];
  norm_r = square_norm(Q, N, parallel);
  
  zero_spinor_field(P, N);
  f(s[2], P, blk);
  diff(r, Q, s[2], N);
  norm_r = square_norm(r, N, parallel);
  if(g_proc_id == g_stdio_proc && g_debug_level > 2 && blk == 0) {
    printf("MRblk iteration= %d  |res|^2= %e\n", i, norm_r);
    fflush( stdout );
  }
  
  while((norm_r > eps_sq) && (i < max_iter)){
    i++;
    f(s[1], r, blk);
    alpha = scalar_prod(s[1], r, N, parallel);
    beta = square_norm(s[1], N, parallel);
    alpha /= beta;
    assign_add_mul(P, r, alpha, N);
    if(i%50 == 0) {
      f(s[2], P,blk);
    }
    else{
      assign_add_mul(s[2], s[1], alpha, N);
    }
    
    diff(r, Q, s[2], N);
    norm_r = square_norm(r, N, parallel);
    if(g_proc_id == g_stdio_proc && g_debug_level > 2 && blk == 0) {
      printf("MRblk iteration= %d  |res|^2= %g\n", i, norm_r);
      fflush(stdout);
    }
  }
  /* free(s_); */
  if(norm_r > eps_sq){
    return(-1);
  }
  return(i);
}