Esempio n. 1
0
int write_gauge_field(char * filename, const int prec, paramsXlfInfo const *xlfInfo)
{
  WRITER * writer = NULL;
  uint64_t bytes;
  int status = 0;
  DML_Checksum     checksum;
  paramsIldgFormat *ildg;

  bytes = (uint64_t)L * L * L * T_global * sizeof(su3) * prec / 16;

  /* all these functions, except for write_binary_gauge_data do their own error handling */
  construct_writer(&writer, filename, 0);/* the 0 is for not appending */

  write_xlf_info(writer, xlfInfo);

  ildg = construct_paramsIldgFormat(prec);
  write_ildg_format(writer, ildg);
  free(ildg);

  /* Both begin and end bit are 0, the message is begun with the format, and will end with the checksum */
  write_header(writer, 0, 0, "ildg-binary-data", bytes);
  status = write_binary_gauge_data(writer, prec, &checksum);
  write_checksum(writer, &checksum, NULL);

  if (g_cart_id == 0 && g_debug_level > 0)
  {
    fprintf(stdout, "# Scidac checksums for gaugefield %s:\n", filename);
    fprintf(stdout, "#   Calculated            : A = %#010x B = %#010x.\n", checksum.suma, checksum.sumb);
    fflush(stdout);
  }
#ifdef TM_USE_MPI
    MPI_Barrier(MPI_COMM_WORLD);
#endif /* MPI */

  destruct_writer(writer);
  return status;
}
Esempio n. 2
0
/* P output = solution , Q input = source */
int cg_mms_tm(spinor * const P, spinor * const Q, const int max_iter, 
	      double eps_sq, const int rel_prec, const int N, matrix_mult f) {

  static double normsq, pro, err, alpha_cg = 1., beta_cg = 0., squarenorm;
  int iteration, im, append = 0;
  char filename[100];
  static double gamma, alpham1;
  int const cg_mms_default_precision = 32;
  double tmp_mu = g_mu;
  WRITER * writer = NULL;
  paramsInverterInfo *inverterInfo = NULL;
  paramsPropagatorFormat *propagatorFormat = NULL;
  spinor * temp_save; //used to save all the masses
  spinor ** solver_field = NULL;
  const int nr_sf = 5;

  init_solver_field(&solver_field, VOLUMEPLUSRAND, nr_sf);
  init_mms_tm(g_no_extra_masses);

  /* currently only implemented for P=0 */
  zero_spinor_field(P, N);
  /*  Value of the bare MMS-masses (\mu^2 - \mu_0^2) */
  for(im = 0; im < g_no_extra_masses; im++) {
    sigma[im] = g_extra_masses[im]*g_extra_masses[im] - g_mu*g_mu;
    assign(xs_mms_solver[im], P, N);
    assign(ps_mms_solver[im], Q, N);
    zitam1[im] = 1.0;
    zita[im] = 1.0;
    alphas[im] = 1.0;
    betas[im] = 0.0;
  }

  squarenorm = square_norm(Q, N, 1);
  assign(solver_field[0], P, N);
/*   normsp = square_norm(P, N, 1); */

  /* initialize residue r and search vector p */
/*   if(normsp == 0){ */
  /* currently only implemented for P=0 */
  if(1) {
    /* if a starting solution vector equal to zero is chosen */
    assign(solver_field[1], Q, N);
    assign(solver_field[2], Q, N);
    normsq = square_norm(Q, N, 1);
  }
  else{
    /* if a starting solution vector different from zero is chosen */
    f(solver_field[3], solver_field[0]);

    diff(solver_field[1], Q, solver_field[3], N);
    assign(solver_field[2], solver_field[1], N);
    normsq = square_norm(solver_field[2], N, 1);
  }

  /* main loop */
  for(iteration = 0; iteration < max_iter; iteration++) {

    /*   Q^2*p and then (p,Q^2*p)  */
    f(solver_field[4], solver_field[2]);
    pro = scalar_prod_r(solver_field[2], solver_field[4], N, 1);

    /* For the update of the coeff. of the shifted pol. we need alpha_cg(i-1) and alpha_cg(i).
       This is the reason why we need this double definition of alpha */
    alpham1 = alpha_cg;

    /* Compute alpha_cg(i+1) */
    alpha_cg = normsq/pro;
    for(im = 0; im < g_no_extra_masses; im++) {

      /* Now gamma is a temp variable that corresponds to zita(i+1) */ 
      gamma = zita[im]*alpham1/(alpha_cg*beta_cg*(1.-zita[im]/zitam1[im]) 
				+ alpham1*(1.+sigma[im]*alpha_cg));

      /* Now zita(i-1) is put equal to the old zita(i) */
      zitam1[im] = zita[im];
      /* Now zita(i+1) is updated */
      zita[im] = gamma;
      /* Update of alphas(i) = alpha_cg(i)*zita(i+1)/zita(i) */ 
      alphas[im] = alpha_cg*zita[im]/zitam1[im];
      /* Compute xs(i+1) = xs(i) + alphas(i)*ps(i) */
      assign_add_mul_r(xs_mms_solver[im], ps_mms_solver[im], alphas[im], N); 
    }

    /*  Compute x_(i+1) = x_i + alpha_cg(i+1) p_i    */
    assign_add_mul_r(solver_field[0], solver_field[2],  alpha_cg, N);
    /*  Compute r_(i+1) = r_i - alpha_cg(i+1) Qp_i   */
    assign_add_mul_r(solver_field[1], solver_field[4], -alpha_cg, N);

    /* Check whether the precision eps_sq is reached */

    err = square_norm(solver_field[1], N, 1);
    if(g_debug_level > 2 && g_proc_id == g_stdio_proc) {
      printf("CGMMS iteration: %d residue: %g\n", iteration, err); fflush( stdout );
    }

    if( ((err <= eps_sq) && (rel_prec == 0)) ||
      ((err <= eps_sq*squarenorm) && (rel_prec == 1)) ) {

      assign(P, solver_field[0], N);
      f(solver_field[2], P);
      diff(solver_field[3], solver_field[2], Q, N);
      err = square_norm(solver_field[3], N, 1);
      if(g_debug_level > 0 && g_proc_id == g_stdio_proc) {
        printf("# CG MMS true residue at final iteration (%d) was %g.\n", iteration, err); 
        fflush( stdout);
      }
      g_sloppy_precision = 0;
      g_mu = tmp_mu;

      /* save all the results of (Q^dagger Q)^(-1) \gamma_5 \phi */
      /* here ... */
      /* when im == -1 save the base mass*/
      for(im = -1; im < g_no_extra_masses; im++) {
        if(im==-1) {
          temp_save=solver_field[0];
        } else {
          temp_save=xs_mms_solver[im];
        }

        if(SourceInfo.type != 1) {
          if (PropInfo.splitted) {
            sprintf(filename, "%s.%.4d.%.2d.%.2d.cgmms.%.2d.inverted", SourceInfo.basename, SourceInfo.nstore, SourceInfo.t, SourceInfo.ix, im+1);
          } else {
            sprintf(filename, "%s.%.4d.%.2d.cgmms.%.2d.inverted", SourceInfo.basename, SourceInfo.nstore, SourceInfo.t, im+1);
          }
        }
        else {
          sprintf(filename, "%s.%.4d.%.5d.cgmms.%.2d.0", SourceInfo.basename, SourceInfo.nstore, SourceInfo.sample, im+1);
        }
        if(g_kappa != 0) {
          mul_r(temp_save, (2*g_kappa)*(2*g_kappa), temp_save, N);
        }

        append = !PropInfo.splitted;

        construct_writer(&writer, filename, append);

        if (PropInfo.splitted || SourceInfo.ix == index_start) {
          //Create the inverter info NOTE: always set to TWILSON=12 and 1 flavour (to be adjusted)
          inverterInfo = construct_paramsInverterInfo(err, iteration+1, 12, 1);
          if (im == -1) {
            inverterInfo->cgmms_mass = inverterInfo->mu;
          } else {
            inverterInfo->cgmms_mass = g_extra_masses[im]/(2 * inverterInfo->kappa);
          }
          write_spinor_info(writer, PropInfo.format, inverterInfo, append);
          //Create the propagatorFormat NOTE: always set to 1 flavour (to be adjusted)
          propagatorFormat = construct_paramsPropagatorFormat(cg_mms_default_precision, 1);
          write_propagator_format(writer, propagatorFormat);
          free(inverterInfo);
          free(propagatorFormat);
        }
        convert_lexic_to_eo(solver_field[2], solver_field[1], temp_save);
        write_spinor(writer, &solver_field[2], &solver_field[1], 1, 32);
        destruct_writer(writer);
      }
      finalize_solver(solver_field, nr_sf);
      return(iteration+1);
    }

    /* Compute beta_cg(i+1) = (r(i+1),r(i+1))/(r(i),r(i))
       Compute p(i+1) = r(i+1) + beta(i+1)*p(i)  */
    beta_cg = err/normsq;
    assign_mul_add_r(solver_field[2], beta_cg, solver_field[1], N);
    normsq = err;

    /* Compute betas(i+1) = beta_cg(i)*(zita(i+1)*alphas(i))/(zita(i)*alpha_cg(i))
       Compute ps(i+1) = zita(i+1)*r(i+1) + betas(i+1)*ps(i)  */
    for(im = 0; im < g_no_extra_masses; im++) {
      betas[im] = beta_cg*zita[im]*alphas[im]/(zitam1[im]*alpha_cg);
      assign_mul_add_mul_r(ps_mms_solver[im], solver_field[1], betas[im], zita[im], N);
    }
  }
  assign(P, solver_field[0], N);
  g_sloppy_precision = 0;
  finalize_solver(solver_field, nr_sf);
  return(-1);
}
Esempio n. 3
0
double eigenvalues(int * nr_of_eigenvalues, const int max_iterations, 
		   const double precision, const int maxmin,
		   const int readwrite, const int nstore, 
		   const int even_odd_flag) {
  double returnvalue;
  complex norm2;
#ifdef HAVE_LAPACK
  static spinor * eigenvectors_ = NULL;
  static int allocated = 0;
  char filename[200];
  FILE * ofs;
#ifdef MPI
  double atime, etime;
#endif

  /**********************
   * For Jacobi-Davidson 
   **********************/
  int verbosity = g_debug_level, converged = 0, blocksize = 1, blockwise = 0;
  int solver_it_max = 50, j_max, j_min, ii, jj;
  /*int it_max = 10000;*/
  /* complex *eigv_ = NULL, *eigv; */
  double decay_min = 1.7, decay_max = 1.5, prec,
    threshold_min = 1.e-3, threshold_max = 5.e-2;

  /* static int v0dim = 0; */
  int v0dim = 0;
  matrix_mult f;
  int N = (VOLUME)/2, N2 = (VOLUMEPLUSRAND)/2;
  spinor * max_eigenvector_ = NULL, * max_eigenvector;

  /**********************
   * General variables
   **********************/
  int returncode=0;
  int returncode2=0;

  char eigenvector_prefix[512];
  char eigenvalue_prefix[512];


  no_eigenvalues = *nr_of_eigenvalues;

  sprintf(eigenvector_prefix,"eigenvector.%%s.%%.2d.%%.4d");
  sprintf(eigenvalue_prefix,"eigenvalues.%%s.%%.4d");

  if(!even_odd_flag) {
    N = (VOLUME);
    N2 = (VOLUMEPLUSRAND);
    f = &Q_pm_psi;
  }
  else {
    f = &Qtm_pm_psi;
  }
  evlength = N2;
  if(g_proc_id == g_stdio_proc && g_debug_level >0) {
    printf("Number of %s eigenvalues to compute = %d\n",
	   maxmin ? "maximal" : "minimal",(*nr_of_eigenvalues));
    printf("Using Jacobi-Davidson method! \n");
  }

  if((*nr_of_eigenvalues) < 8){
    j_max = 15;
    j_min = 8;
  }
  else{
    j_max = 2*(*nr_of_eigenvalues);
    j_min = (*nr_of_eigenvalues);
  }
  if(precision < 1.e-14){
    prec = 1.e-14;
  }
  else{
    prec = precision;
  }
#if (defined SSE || defined SSE2 || defined SSE3)
  max_eigenvector_ = calloc(N2+1, sizeof(spinor));
  max_eigenvector = (spinor *)(((unsigned long int)(max_eigenvector_)+ALIGN_BASE)&~ALIGN_BASE);
#else
  max_eigenvector_= calloc(N2, sizeof(spinor));
  max_eigenvector = max_eigenvector_;
#endif  

  if(allocated == 0) {
    allocated = 1;
#if (defined SSE || defined SSE2 || defined SSE3)
    eigenvectors_ = calloc(N2*(*nr_of_eigenvalues)+1, sizeof(spinor)); 
    eigenvectors = (spinor *)(((unsigned long int)(eigenvectors_)+ALIGN_BASE)&~ALIGN_BASE);
#else
    eigenvectors_= calloc(N2*(*nr_of_eigenvalues), sizeof(spinor));
    eigenvectors = eigenvectors_;
#endif
    eigenvls = (double*)malloc((*nr_of_eigenvalues)*sizeof(double));
    inv_eigenvls = (double*)malloc((*nr_of_eigenvalues)*sizeof(double));
  }

  solver_it_max = 50;
  /* compute the maximal one first */
  jdher(N*sizeof(spinor)/sizeof(complex), N2*sizeof(spinor)/sizeof(complex),
	50., 1.e-12, 
	1, 15, 8, max_iterations, 1, 0, 0, NULL,
	CG, solver_it_max,
	threshold_max, decay_max, verbosity,
	&converged, (complex*) max_eigenvector, (double*) &max_eigenvalue,
	&returncode2, JD_MAXIMAL, 1,
	f);

  if(readwrite) {
    if(even_odd_flag){
      for(v0dim = 0; v0dim < (*nr_of_eigenvalues); v0dim++) {
	sprintf(filename, eigenvector_prefix , maxmin ? "max" : "min", v0dim, nstore);
	if((read_eospinor(&eigenvectors[v0dim*N2], filename)) != 0) {
	  break;
	}
      }
    } else {
      FILE *testfile;
      spinor *s;
      double sqnorm;
      for(v0dim = 0; v0dim < (*nr_of_eigenvalues); v0dim++) {
	sprintf(filename, eigenvector_prefix, maxmin ? "max" : "min", v0dim, nstore);

	printf("reading eigenvectors ... ");
	testfile=fopen(filename,"r");
	if( testfile != NULL){
	  fclose(testfile);
	  s=(spinor*)&eigenvectors[v0dim*N2];
	  read_spinor(s,NULL, filename,0);
	  sqnorm=square_norm(s,VOLUME,1);
	  printf(" has | |^2 = %e \n",sqnorm);

	} else {
	  printf(" no more eigenvectors \n");
	  break;
	}
      }
    }
  }

  if(readwrite != 2) {
#ifdef MPI
    atime = MPI_Wtime();
#endif
    /* (re-) compute minimal eigenvalues */
    converged = 0;
    solver_it_max = 200;

    if(maxmin)
      jdher(N*sizeof(spinor)/sizeof(complex), N2*sizeof(spinor)/sizeof(complex),
	  50., prec, 
	  (*nr_of_eigenvalues), j_max, j_min, 
	  max_iterations, blocksize, blockwise, v0dim, (complex*) eigenvectors,
	  CG, solver_it_max,
	  threshold_max, decay_max, verbosity,
	  &converged, (complex*) eigenvectors, eigenvls,
	  &returncode, JD_MAXIMAL, 1,
	  f);
    else
      jdher(N*sizeof(spinor)/sizeof(complex), N2*sizeof(spinor)/sizeof(complex),
	  0., prec, 
	  (*nr_of_eigenvalues), j_max, j_min, 
	  max_iterations, blocksize, blockwise, v0dim, (complex*) eigenvectors,
	  CG, solver_it_max,
	  threshold_min, decay_min, verbosity,
	  &converged, (complex*) eigenvectors, eigenvls,
	  &returncode, JD_MINIMAL, 1,
	  f);
    
#ifdef MPI
    etime = MPI_Wtime();
    if(g_proc_id == 0) {
      printf("Eigenvalues computed in %e sec. (MPI_Wtime)\n", etime-atime);
    }
#endif
  }
  else {
    sprintf(filename, eigenvalue_prefix, maxmin ? "max" : "min", nstore); 
    if((ofs = fopen(filename, "r")) != (FILE*) NULL) {
      for(v0dim = 0; v0dim < (*nr_of_eigenvalues); v0dim++) {
	fscanf(ofs, "%d %lf\n", &v0dim, &eigenvls[v0dim]);
	if(feof(ofs)) break;
	converged = v0dim;
      }
    }
    fclose(ofs);
  }

  (*nr_of_eigenvalues) = converged;
  no_eigenvalues = converged;
  ev_minev = eigenvls[(*nr_of_eigenvalues)-1];
  eigenvalues_for_cg_computed = converged;

  for (ii = 0; ii < (*nr_of_eigenvalues); ii++){
    for (jj = 0; jj <= ii; jj++){
      norm2 = scalar_prod(&(eigenvectors[ii*N2]),&(eigenvectors[jj*N2]), VOLUME, 1);
      if(ii==jj){
        if((fabs(1.-norm2.re)>1e-12) || (fabs(norm2.im)>1e-12) || 1) {
          if(g_proc_id == g_stdio_proc){
            printf("< %d | %d>  =\t   %e  +i * %e \n", ii+1, jj+1, norm2.re, norm2.im);
            fflush(stdout);
          }
        }
      }
      else{
        if((fabs(norm2.re)>1e-12) || (fabs(norm2.im)>1e-12) || 1) {
          if(g_proc_id == g_stdio_proc){
            printf("< %d | %d>  =\t   %e  +i * %e \n", ii+1, jj+1, norm2.re, norm2.im);
            fflush(stdout);
          }
        }
      }
    }
  }


  if(readwrite == 1 ) {
    if(even_odd_flag)
      for(v0dim = 0; v0dim < (*nr_of_eigenvalues); v0dim++) {
	sprintf(filename, eigenvector_prefix, maxmin ? "max" : "min", v0dim, nstore);
	if((write_eospinor(&eigenvectors[v0dim*N2], filename, eigenvls[v0dim], prec, nstore)) != 0) {
	  break;
	}
      }
    else{
      WRITER *writer=NULL;
      spinor *s;
      double sqnorm;
      paramsPropagatorFormat *propagatorFormat = NULL;

      for(v0dim = 0; v0dim < (*nr_of_eigenvalues); v0dim++) {
	sprintf(filename, eigenvector_prefix, maxmin ? "max" : "min", v0dim, nstore);

	construct_writer(&writer, filename, 0);
	/* todo write propagator format */
	propagatorFormat = construct_paramsPropagatorFormat(64, 1);
	write_propagator_format(writer, propagatorFormat);
	free(propagatorFormat);


	s=(spinor*)&eigenvectors[v0dim*N2];
	write_spinor(writer, &s,NULL, 1, 64);
	destruct_writer(writer);
	writer=NULL;
	sqnorm=square_norm(s,VOLUME,1);
	printf(" wrote eigenvector | |^2 = %e \n",sqnorm);


      }
    }
  }
  if(g_proc_id == 0 && readwrite != 2) {
    sprintf(filename, eigenvalue_prefix , maxmin ? "max" : "min", nstore); 
    ofs = fopen(filename, "w");
    for(v0dim = 0; v0dim < (*nr_of_eigenvalues); v0dim++) {
      fprintf(ofs, "%d %e\n", v0dim, eigenvls[v0dim]);
    }
    fclose(ofs);
  }
  for(v0dim = 0; v0dim < converged; v0dim++) {
    inv_eigenvls[v0dim] = 1./eigenvls[v0dim];
  }

  ev_qnorm=1.0/(sqrt(max_eigenvalue)+0.1);
  ev_minev*=ev_qnorm*ev_qnorm;
  /* ov_n_cheby is initialized in Dov_psi.c */
  returnvalue=eigenvls[0];
  free(max_eigenvector_);
#else
  fprintf(stderr, "lapack not available, so JD method for EV computation not available \n");
#endif
  return(returnvalue);
}
Esempio n. 4
0
void op_write_prop(const int op_id, const int index_start, const int append_) {
  operator * optr = &operator_list[op_id];
  char filename[100];
  char ending[15];
  WRITER *writer = NULL;
  int append = 0;
  int status = 0;

  paramsSourceFormat *sourceFormat = NULL;
  paramsPropagatorFormat *propagatorFormat = NULL;
  paramsInverterInfo *inverterInfo = NULL;
  if(optr->type == DBTMWILSON || optr->type == DBCLOVER) {
    strcpy(ending, "hinverted");
  }
  else if(optr->type == OVERLAP) {
    strcpy(ending, "ovinverted");
  }
  else {
    strcpy(ending, "inverted");
  }

  if(SourceInfo.type != 1) {
    if (PropInfo.splitted) {
      sprintf(filename, "%s.%.4d.%.2d.%.2d.%s", SourceInfo.basename, SourceInfo.nstore, SourceInfo.t, SourceInfo.ix, ending);
    }
    else {
      sprintf(filename, "%s.%.4d.%.2d.%s", SourceInfo.basename, SourceInfo.nstore, SourceInfo.t, ending);
    }
  }
  else {
    sprintf(filename, "%s.%.4d.%.5d.%s", SourceInfo.basename, SourceInfo.nstore, SourceInfo.sample, ending);
  }

  if(!PropInfo.splitted || append_)
    append = 1;
  /* the 1 is for appending */
  construct_writer(&writer, filename, append);
  if (PropInfo.splitted || SourceInfo.ix == index_start) {
    inverterInfo = construct_paramsInverterInfo(optr->reached_prec, optr->iterations, 
						optr->solver, optr->no_flavours);
    write_spinor_info(writer, PropInfo.format, inverterInfo, append);
    free(inverterInfo);
  }
  /* write the source depending on format */
  /* to be fixed for 2 fl tmwilson        */
  if (PropInfo.format == 1) {
    sourceFormat = construct_paramsSourceFormat(SourceInfo.precision, optr->no_flavours, 4, 3);
    write_source_format(writer, sourceFormat);
    status = write_spinor(writer, &operator_list[op_id].sr0, &operator_list[op_id].sr1, 
			  1, SourceInfo.precision);
    if(optr->no_flavours == 2) {
      status = write_spinor(writer, &operator_list[op_id].sr2, &operator_list[op_id].sr3, 
			    1, SourceInfo.precision);
    }
    free(sourceFormat);
  }
  propagatorFormat = construct_paramsPropagatorFormat(optr->prop_precision, optr->no_flavours);
  write_propagator_format(writer, propagatorFormat);
  free(propagatorFormat);

  if(optr->no_flavours == 2) {
    status = write_spinor(writer, &operator_list[op_id].prop2, &operator_list[op_id].prop3, 1, optr->prop_precision);
  }
  status = write_spinor(writer, &operator_list[op_id].prop0, &operator_list[op_id].prop1, 1, optr->prop_precision);
  destruct_writer(writer);
  return;
}
Esempio n. 5
0
int arpack_cg(
  /* solver params */
  const int N,                   /* (IN) Number of lattice sites for this process*/
  solver_params_t solver_params, /* (IN) parameters for solver */
  spinor * const x,              /* (IN/OUT) initial guess on input, solution on output for this RHS*/
  spinor * const b,              /* (IN) right-hand side*/
  matrix_mult f,                 /* (IN) f(s,r) computes s=A*r, i.e. matrix-vector multiply in double precision */
  matrix_mult f32,               /* (IN) f(s,r) computes s=A*r, i.e. matrix-vector multiply in single precision */
  const double eps_sq,           /* (IN) squared tolerance of convergence of the linear system for systems nrhs1+1 till nrhs*/
  const int rel_prec,            /* (IN) 0 for using absoute error for convergence
                                         1 for using relative error for convergence*/
  const int maxit,               /* (IN) Maximum allowed number of iterations to solution for the linear system*/
  matrix_mult f_final,           /* (IN) final operator application during projection of type 1 */
  matrix_mult f_initial          /* (IN) initial operator application during projection of type 1 */
) {

  /* Static variables and arrays. */
  static int ncurRHS=0;                  /* current number of the system being solved */                   
  static void *_ax,*_r,*_tmps1,*_tmps2;                  
  static spinor *ax,*r,*tmps1,*tmps2;                  
  static _Complex double *evecs,*evals,*H,*HU,*Hinv,*initwork,*tmpv1;
  static _Complex double *zheev_work;
  static double *hevals,*zheev_rwork;
  static int *IPIV; 
  static int info_arpack=0;
  static int nconv=0; /* number of converged eigenvectors as returned by arpack */
  int i,j,tmpsize;
  char cV='V',cN='N', cU='U';   
  int ONE=1;
  int zheev_lwork,zheev_info;
  _Complex double c1, c2, c3, tpone=1.0,tzero=0.0;
  double d1,d2,d3;
  double et1,et2;  /* timing variables */
  char evecs_filename[500];
  char howmny = 'P';
  FILE *evecs_fs=NULL;
  size_t evecs_count;
  WRITER *evecs_writer=NULL;
  spinor *evecs_ptr0 = NULL, *evecs_ptr1 = NULL;
  paramsPropagatorFormat *evecs_propagatorFormat = NULL;
  void *evecs_io_buffer = NULL;

  int parallel;        /* for parallel processing of the scalar products */
#ifdef TM_USE_MPI
    parallel=1;
#else
    parallel=0;
#endif

  /* leading dimension for spinor vectors */
  int LDN;
  if(N==VOLUME)
     LDN = VOLUMEPLUSRAND;
  else
     LDN = VOLUMEPLUSRAND/2; 

  /*(IN) Number of right-hand sides to be solved*/ 
  const int nrhs =   solver_params.arpackcg_nrhs; 
  /*(IN) First number of right-hand sides to be solved using tolerance eps_sq1*/ 
  const int nrhs1 =   solver_params.arpackcg_nrhs1;
  /*(IN) squared tolerance of convergence of the linear system for systems 1 till nrhs1*/
  const double eps_sq1 = solver_params.arpackcg_eps_sq1;
  /*(IN) suqared tolerance for restarting cg */
  const double res_eps_sq =   solver_params.arpackcg_res_eps_sq;

  /* parameters for arpack */

  /*(IN) number of eigenvectors to be computed by arpack*/
  const int nev = solver_params.arpackcg_nev;
   /*(IN) size of the subspace used by arpack with the condition (nev+1) =< ncv*/
  const int ncv = solver_params.arpackcg_ncv;
  /*(IN) tolerance for computing eigenvalues with arpack */
  double arpack_eig_tol =   solver_params.arpackcg_eig_tol;
  /*(IN) maximum number of iterations to be used by arpack*/
  int arpack_eig_maxiter =   solver_params.arpackcg_eig_maxiter;
  /*(IN) 0 for eigenvalues with smallest real part "SR"
         1 for eigenvalues with largest real part "LR"
         2 for eigenvalues with smallest absolute value "SM"
         3 for eigenvalues with largest absolute value "LM"
         4 for eigenvalues with smallest imaginary part "SI"
         5 for eigenvalues with largest imaginary part  "LI"*/
  int kind =   solver_params.arpackcg_evals_kind;
  /*(IN) 0 don't compute the eiegnvalues and their residuals of the original system 
         1 compute the eigenvalues and the residuals for the original system (the orthonormal basis
           still be used in deflation and they are not overwritten).*/
  int comp_evecs =   solver_params.arpackcg_comp_evecs;
  /*(IN) 0 no polynomial acceleration; 1 use polynomial acceleration*/
  int acc =   solver_params.use_acc;
  /*(IN) degree of the Chebyshev polynomial (irrelevant if acc=0)*/
  int cheb_k = solver_params.cheb_k;
  /*(IN) lower end of the interval where the acceleration will be used (irrelevant if acc=0)*/
  double emin = solver_params.op_evmin;
  /*(IN) upper end of the interval where the acceleration will be used (irrelevant if acc=0)*/
  double emax = solver_params.op_evmax;
  /*(IN) file name to be used for printing out debugging information from arpack*/
  char *arpack_logfile = solver_params.arpack_logfile;
  /*(IN) read eigenvectors in Schur basis from file */
  int  arpack_read_ev = solver_params.arpackcg_read_ev;
  /*(IN) write eigenvectors in Schur basis to file */
  int  arpack_write_ev = solver_params.arpackcg_write_ev;
  /*(IN) file name to be used for reading and writing evecs from and to disc */
  char *arpack_evecs_filename = solver_params.arpack_evecs_filename;
   /*(IN) precision used for writing eigenvectors */
  int arpack_evecs_writeprec = solver_params.arpack_evecs_writeprec;
  /* how to project with approximate eigenvectors */
  int projection_type = solver_params.projection_type;
  /* file format for evecs used by arpack */
  char *arpack_evecs_fileformat = solver_params.arpack_evecs_fileformat; 

  /*-------------------------------------------------------------
    if this is the first right hand side, allocate memory, 
    call arpack, and compute resiudals of eigenvectors if needed
    -------------------------------------------------------------*/ 
  if(ncurRHS==0){ 
#if (defined SSE || defined SSE2 || defined SSE3)
    _ax = malloc((LDN+ALIGN_BASE)*sizeof(spinor));
    if(_ax==NULL)
    {
       if(g_proc_id == g_stdio_proc)
          fprintf(stderr,"[arpack_cg] insufficient memory for _ax inside arpack_cg.\n");
       exit(1);
    }
    else
       {ax  = (spinor *) ( ((unsigned long int)(_ax)+ALIGN_BASE)&~ALIGN_BASE);}

    _r = malloc((LDN+ALIGN_BASE)*sizeof(spinor));
    if(_r==NULL)
    {
       if(g_proc_id == g_stdio_proc)
          fprintf(stderr,"[arpack_cg] insufficient memory for _r inside arpack_cg.\n");
       exit(1);
    }
    else
       {r  = (spinor *) ( ((unsigned long int)(_r)+ALIGN_BASE)&~ALIGN_BASE);}

    _tmps1 = malloc((LDN+ALIGN_BASE)*sizeof(spinor));
    if(_tmps1==NULL)
    {
       if(g_proc_id == g_stdio_proc)
          fprintf(stderr,"[arpack_cg] insufficient memory for _tmps1 inside arpack_cg.\n");
       exit(1);
    }
    else
       {tmps1  = (spinor *) ( ((unsigned long int)(_tmps1)+ALIGN_BASE)&~ALIGN_BASE);}

    _tmps2 = malloc((LDN+ALIGN_BASE)*sizeof(spinor));
    if(_tmps2==NULL)
    {
       if(g_proc_id == g_stdio_proc)
          fprintf(stderr,"[arpack_cg] insufficient memory for _tmps2 inside arpack_cg.\n");
       exit(1);
    }
    else
       {tmps2  = (spinor *) ( ((unsigned long int)(_tmps2)+ALIGN_BASE)&~ALIGN_BASE);}

#else
    ax = (spinor *) malloc(LDN*sizeof(spinor));
    r  = (spinor *) malloc(LDN*sizeof(spinor));
    tmps1 = (spinor *) malloc(LDN*sizeof(spinor));
    tmps2 = (spinor *) malloc(LDN*sizeof(spinor));
    
    if( (ax == NULL)  || (r==NULL) || (tmps1==NULL) || (tmps2==NULL) )
    {
       if(g_proc_id == g_stdio_proc)
          fprintf(stderr,"[arpack_cg] insufficient memory for ax,r,tmps1,tmps2 inside arpack_cg.\n");
       exit(1);
    }
#endif


    evecs = (_Complex double *) malloc(ncv*12*N*sizeof(_Complex double)); /* note: no extra buffer  */
    evals = (_Complex double *) malloc(ncv*sizeof(_Complex double)); 
    tmpv1 = (_Complex double *) malloc(12*N*sizeof(_Complex double));

    if((evecs == NULL)  || (evals==NULL) || (tmpv1==NULL))
    {
       if(g_proc_id == g_stdio_proc)
          fprintf(stderr,"[arpack_cg] insufficient memory for evecs and evals inside arpack_cg.\n");
       exit(1);
    }

    if ( arpack_read_ev == 1) {

      if (strcmp(arpack_evecs_fileformat, "partfile") == 0) {
        /* set evec filenmae */
        sprintf(evecs_filename, "%s.%.5d.pt%.2dpx%.2dpy%.2dpz%.2d", arpack_evecs_filename, nev, g_proc_coords[0], g_proc_coords[1], g_proc_coords[2], g_proc_coords[3]);
        evecs_fs = fopen(evecs_filename, "r");
        if (evecs_fs == NULL) {
          fprintf(stderr, "[arpack_cg] (%.4d) Error, could not open file %s for reading\n", g_cart_id, evecs_filename);
          return(-2);
        }
        fprintf(stdout, "# [arpack_cg] reading eigenvectors from file %s\n", evecs_filename);

        if(arpack_evecs_writeprec == 64) {
 
          evecs_io_buffer = (void*)evecs;
   
          et1=gettime();
          evecs_count = fread( evecs_io_buffer, sizeof(_Complex double), (size_t)nev*12*N, evecs_fs);
          et2=gettime();
        
        } else {
          evecs_io_buffer = malloc(sizeof(_Complex double) * (size_t)nev*12*N );
          if( evecs_io_buffer == NULL) {
            fprintf(stderr, "[arpack_cg] (%.4d) Error, could not allocate memory for evecs_io_buffer\n", g_cart_id);
            return(-42);
          }
  
          et1=gettime();
          evecs_count = fread( evecs_io_buffer, sizeof(_Complex double)/2, (size_t)nev*12*N, evecs_fs);
          et2=gettime();

          single2double(evecs, evecs_io_buffer, nev*24*N);

          free( evecs_io_buffer );
          evecs_io_buffer = NULL;
        }
       
        if( evecs_count != ((size_t)nev*12*N) ) {
          fprintf(stderr, "[arpack_cg] (%.4d) Error, could not proper amount of data from file %s\n", g_cart_id, evecs_filename);
          return(-3);
        }
        fclose(evecs_fs);
        evecs_fs = NULL;
        if(g_proc_id == g_stdio_proc) {
          fprintf(stdout,"# [arpack_cg] ARPACK time for reading %d eigenvectors: %+e seconds\n", nev, et2-et1);
        }
      } else if(strcmp(arpack_evecs_fileformat, "single") == 0) {

        if(N==VOLUME) {
          for(i=0; i<nev; i++) {
            sprintf(evecs_filename, "%s.ev%.5d", arpack_evecs_filename, i);
            evecs_ptr0 = (spinor*)&(evecs[i*12*N]);
            evecs_ptr1 = NULL;
            read_spinor(evecs_ptr0,  evecs_ptr1, evecs_filename, 0);
          } /* end of loop on eigenvectors */
        } else if(N==VOLUME/2) {
          for(i=0; i<nev/2; i++) {
            sprintf(evecs_filename, "%s.ev%.5d", arpack_evecs_filename, 2*i);
            evecs_ptr0 = (spinor*)&(evecs[(2*i  )*12*N]);
            evecs_ptr1 = (spinor*)&(evecs[(2*i+1)*12*N]);
            read_spinor(evecs_ptr0,  evecs_ptr1, evecs_filename, 0);
          } /* end of loop on eigenvectors */
        }
      }   /* of if arpack_evecs_fileformat */

      /* set info_arpack pro forma to SUCCESS */
      nconv = nev;
      info_arpack = 0;
    } else {
      et1=gettime();
      evals_arpack(N,nev,ncv,kind,howmny,acc,cheb_k,emin,emax,evals,evecs,arpack_eig_tol,arpack_eig_maxiter,f,&info_arpack,&nconv,arpack_logfile);
      et2=gettime();

      if(info_arpack != 0){ /* arpack didn't converge */
      if(g_proc_id == g_stdio_proc)
        fprintf(stderr,"[arpack_cg] WARNING: ARPACK didn't converge. exiting..\n");
        return -1;
      }
    
      if(g_proc_id == g_stdio_proc)
      {
         fprintf(stdout,"# [arpack_cg] ARPACK has computed %d eigenvectors\n",nconv);
         fprintf(stdout,"# [arpack_cg] ARPACK time: %+e\n",et2-et1);
      }

      if ( arpack_write_ev == 1) {

        if(strcmp(arpack_evecs_fileformat, "partfile") == 0 ) {

          if( g_cart_id == 0 ) fprintf(stdout, "# [arpack_cg] writing evecs in partfile format\n");
          /* set evec filenmae */
          sprintf(evecs_filename, "%s.%.5d.pt%.2dpx%.2dpy%.2dpz%.2d", arpack_evecs_filename, nconv, g_proc_coords[0], g_proc_coords[1], g_proc_coords[2], g_proc_coords[3]);

          evecs_fs = fopen(evecs_filename, "w");
          if (evecs_fs == NULL) {
            fprintf(stderr, "[arpack_cg] (%.4d) Error, could not open file %s for writing\n", g_cart_id, evecs_filename);
            return(-4);
          }
        
          if(arpack_evecs_writeprec == 64) {

            evecs_io_buffer = (void*)evecs;
 
            et1=gettime();
            evecs_count = fwrite( evecs_io_buffer, sizeof(_Complex double), (size_t)nconv*12*N, evecs_fs);
            et2=gettime();

          } else {
            evecs_io_buffer = malloc(sizeof(_Complex double) * (size_t)nconv*12*N );
            if( evecs_io_buffer == NULL) {
              fprintf(stderr, "[arpack_cg] (%.4d) Error, could not allocate memory for evecs_io_buffer\n", g_cart_id);
              return(-41);
            }
            double2single(evecs_io_buffer, evecs, nconv*24*N);
 
            et1=gettime();
            evecs_count = fwrite( evecs_io_buffer, sizeof(_Complex double)/2, (size_t)nconv*12*N, evecs_fs);
            et2=gettime();
            free(evecs_io_buffer);
            evecs_io_buffer = NULL;
          }
 
          if( evecs_count != ((size_t)nconv*12*N) ) {
            fprintf(stderr, "[arpack_cg] (%.4d) Error, could not write proper amount of data to file %s\n", g_cart_id, evecs_filename);
            return(-5);
          }
          fclose(evecs_fs);
          evecs_fs = NULL;

          if(g_proc_id == g_stdio_proc) {
            fprintf(stdout,"[arpack_cg] (%.4d) ARPACK time for writing %d eigenvectors: %+e seconds\n", g_cart_id, nconv, et2-et1);
          }

        } else if (strcmp(arpack_evecs_fileformat, "single") == 0) {

          if(N==VOLUME) {
            for(i=0; i<nconv; i++) {
              sprintf(evecs_filename, "%s.ev%.5d", arpack_evecs_filename, i);
              construct_writer(&evecs_writer, evecs_filename, 0);
              evecs_propagatorFormat = construct_paramsPropagatorFormat(arpack_evecs_writeprec, 1);
              write_propagator_format(evecs_writer, evecs_propagatorFormat);
              free(evecs_propagatorFormat);
              evecs_ptr0 = (spinor*)&(evecs[i*12*N]);
              evecs_ptr1 = NULL;
              write_spinor(evecs_writer, &evecs_ptr0, &evecs_ptr1, 1, arpack_evecs_writeprec);
              destruct_writer(evecs_writer);
              evecs_writer=NULL;
            } /* end of loop on converged eigenvectors */
          } else if(N==VOLUME/2) {
            for(i=0; i<nconv/2; i++) {
              sprintf(evecs_filename, "%s.ev%.5d", arpack_evecs_filename, 2*i);
              construct_writer(&evecs_writer, evecs_filename, 0);
              evecs_propagatorFormat = construct_paramsPropagatorFormat(arpack_evecs_writeprec, 1);
              write_propagator_format(evecs_writer, evecs_propagatorFormat);
              free(evecs_propagatorFormat);
              evecs_ptr0 = (spinor*)&(evecs[(2*i  )*12*N]);
              evecs_ptr1 = (spinor*)&(evecs[(2*i+1)*12*N]);
              write_spinor(evecs_writer, &evecs_ptr0, &evecs_ptr1,1, arpack_evecs_writeprec);
              destruct_writer(evecs_writer);
              evecs_writer=NULL;
            }  /* end of loop on converged eigenvectors */
          }    /* end of if N == VOLUME */

        }      /* of if arpack_evecs_fileformat */

      }        /* end of if arpack_write_ev == 1 */

    }          /* end of if arpack_read_ev == 1 */

    H        = (_Complex double *) malloc(nconv*nconv*sizeof(_Complex double)); 
    Hinv     = (_Complex double *) malloc(nconv*nconv*sizeof(_Complex double)); 
    initwork = (_Complex double *) malloc(nconv*sizeof(_Complex double)); 
    IPIV     = (int *) malloc(nconv*sizeof(int));
    zheev_lwork = 3*nconv;
    zheev_work  = (_Complex double *) malloc(zheev_lwork*sizeof(_Complex double));
    zheev_rwork = (double *) malloc(3*nconv*sizeof(double));
    hevals      = (double *) malloc(nconv*sizeof(double));

    if((H==NULL) || (Hinv==NULL) || (initwork==NULL) || (IPIV==NULL) || (zheev_lwork==NULL) || (zheev_rwork==NULL) || (hevals==NULL))
    {
       if(g_proc_id == g_stdio_proc)
          fprintf(stderr,"[arpack_cg] insufficient memory for H, Hinv, initwork, IPIV, zheev_lwork, zheev_rwork, hevals inside arpack_cg.\n");
       exit(1);
    }

    et1=gettime();
    /* compute the elements of the hermitian matrix H 
       leading dimension is nconv and active dimension is nconv */
    
    if( projection_type == 0) {
    
      for(i=0; i<nconv; i++)
      {
        assign_complex_to_spinor(r,&evecs[i*12*N],12*N);
        f(ax,r);
        c1 = scalar_prod(r,ax,N,parallel);
        H[i+nconv*i] = creal(c1);  /* diagonal should be real */
        for(j=i+1; j<nconv; j++)
        {
          assign_complex_to_spinor(r,&evecs[j*12*N],12*N);
          c1 = scalar_prod(r,ax,N,parallel);
          H[j+nconv*i] = c1;
          H[i+nconv*j] = conj(c1); /* enforce hermiticity */
        }
      }

    } else if ( projection_type == 1 )  {

      for(i=0; i<nconv; i++)
      {
        assign_complex_to_spinor(tmps1, &evecs[i*12*N], 12*N);
        f_final(r, tmps1);
        f(ax,r);
        c1 = scalar_prod(r,ax,N,parallel);
        c2 = scalar_prod(r,r,N,parallel);
        H[i+nconv*i] = creal(c1) / creal(c2);   /* diagonal should be real */
        for(j=i+1; j<nconv; j++)
        {
          assign_complex_to_spinor(tmps1, &evecs[j*12*N], 12*N);
          f_final(r, tmps1);
          c1 = scalar_prod(r,ax,N,parallel);
          c3 = scalar_prod(r, r, N, parallel);

          H[j+nconv*i] = c1 / sqrt( creal(c2) * creal(c3) );
          H[i+nconv*j] = conj(c1) / sqrt( creal(c2) * creal(c3) ); /* enforce hermiticity */
        }
      }
    }


    et2=gettime();
    if(g_proc_id == g_stdio_proc) {
      fprintf(stdout,"[arpack_cg] time to compute H: %+e\n",et2-et1);
    }

/*
    if(g_cart_id == 0) {
      for(i=0; i<nconv; i++) {
      for(j=0; j<nconv; j++) {
        fprintf(stdout, "# [arpack_cg] H[%d, %d] = %25.16e %25.16e\n", i, j, creal(H[i*nconv+j]), cimag(H[i*nconv+j]));
      }}
    }
*/



     et1=gettime();
     /* compute Ritz values and Ritz vectors if needed */
     if( (nconv>0) && (comp_evecs !=0))
     {
         HU = (_Complex double *) malloc(nconv*nconv*sizeof(_Complex double)); 
         if( HU==NULL ) {
           if(g_proc_id == g_stdio_proc)
             fprintf(stderr,"[arpack_cg] insufficient memory for HU inside arpack_cg\n");
             exit(2);
         }
         /* copy H into HU */
         tmpsize=nconv*nconv;
         _FT(zcopy)(&tmpsize,H,&ONE,HU,&ONE);

         /* compute eigenvalues and eigenvectors of HU*/
         /* SUBROUTINE ZHEEV( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, RWORK,INFO ) */
         _FT(zheev)(&cV,&cU,&nconv,HU,&nconv,hevals,zheev_work,&zheev_lwork,zheev_rwork,&zheev_info,1,1);

         if(zheev_info != 0)
         {
	    if(g_proc_id == g_stdio_proc) 
	    {
	        fprintf(stderr,"[arpack_cg] Error in ZHEEV:, info =  %d\n",zheev_info); 
                fflush(stderr);
	    }
	    exit(1);
         }

         /* If you want to replace the schur (orthonormal) basis by eigen basis
            use something like this. It is better to use the schur basis because
            they are better conditioned. Use this part only to get the eigenvalues
            and their resduals for the operator (D^\daggerD)
            esize=(ncv-nconv)*12*N;
            Zrestart_X(evecs,12*N,HU,12*N,nconv,nconv,&evecs[nconv*N],esize); */

         /* compute residuals and print out results */

	 if(g_proc_id == g_stdio_proc)
	 {fprintf(stdout,"# [arpack_cg] Ritz values of A and their residulas (||A*x-lambda*x||/||x||\n"); 
          fprintf(stdout,"# [arpack_cg] =============================================================\n");
          fflush(stdout);}

         for(i=0; i<nconv; i++)
         {
	    tmpsize=12*N;
            _FT(zgemv)(&cN,&tmpsize,&nconv,&tpone,evecs,&tmpsize,
		       &HU[i*nconv],&ONE,&tzero,tmpv1,&ONE,1);

            assign_complex_to_spinor(r,tmpv1,12*N);

            d1=square_norm(r,N,parallel);
            
            f(ax,r);

            mul_r(tmps1,hevals[i],r,N);

            diff(tmps2,ax,tmps1,N);
	    
	    d2= square_norm(tmps2,N,parallel);

            d3= sqrt(d2/d1);
	    
	    if(g_proc_id == g_stdio_proc)
	    {fprintf(stdout,"Eval[%06d]: %22.15E rnorm: %22.15E\n", i, hevals[i], d3); fflush(stdout);}
        } 
        free( HU ); HU = NULL;
     }  /* if( (nconv_arpack>0) && (comp_evecs !=0)) */
     et2=gettime();
     if(g_proc_id == g_stdio_proc) {
       fprintf(stdout,"[arpack_cg] time to compute eigenvectors: %+e\n",et2-et1);
     }

  }  /* if(ncurRHS==0) */
    
  double eps_sq_used,restart_eps_sq_used;  /* tolerance squared for the linear system */

  double cur_res; /* current residual squared */

  /*increment the RHS counter*/
  ncurRHS = ncurRHS +1; 

  /* set the tolerance to be used for this right-hand side  */
  if(ncurRHS > nrhs1){
    eps_sq_used = eps_sq;
  }
  else{
    eps_sq_used = eps_sq1;
  }
  
  if(g_proc_id == g_stdio_proc && g_debug_level > 0) {
    fprintf(stdout, "# [arpack_cg] System %d, eps_sq %e, projection type %d\n",ncurRHS,eps_sq_used, projection_type); 
    fflush(stdout);
  } 
  
  /*---------------------------------------------------------------*/
  /* Call init-CG until this right-hand side converges             */
  /*---------------------------------------------------------------*/
  double wt1,wt2,wE,wI;
  double normsq,tol_sq;
  int flag,maxit_remain,numIts,its;
  int info_lapack;

  wE = 0.0; wI = 0.0;     /* Start accumulator timers */
  flag = -1;    	  /* System has not converged yet */
  maxit_remain = maxit;   /* Initialize Max and current # of iters   */
  numIts = 0;  
  restart_eps_sq_used=res_eps_sq;

  while( flag == -1 )
  {
    
    if(nconv > 0)
    {


      /* --------------------------------------------------------- */
      /* Perform init-CG with evecs vectors                        */
      /* xinit = xinit + evecs*Hinv*evec'*(b-Ax0) 		   */
      /* --------------------------------------------------------- */
      wt1 = gettime();

      /*r0=b-Ax0*/
      f(ax,x); /*ax = A*x */
      diff(r,b,ax,N);  /* r=b-A*x */

      if( projection_type == 0) {

        /* x = x + evecs*inv(H)*evecs'*r */
        for(int i=0; i < nconv; i++)
        {
           assign_complex_to_spinor(tmps1,&evecs[i*12*N],12*N);
           initwork[i]= scalar_prod(tmps1,r,N,parallel);
        }

        /* solve the linear system H y = c */
        tmpsize=nconv*nconv;
        _FT(zcopy) (&tmpsize,H,&ONE,Hinv,&ONE); /* copy H into Hinv */
        /* SUBROUTINE ZGESV( N, NRHS, A, LDA, IPIV, B, LDB, INFO ) */
        _FT(zgesv) (&nconv,&ONE,Hinv,&nconv,IPIV,initwork,&nconv,&info_lapack);

        if(info_lapack != 0)
        {
           if(g_proc_id == g_stdio_proc) {
              fprintf(stderr, "[arpack_cg] Error in ZGESV:, info =  %d\n",info_lapack); 
              fflush(stderr);
           }
           exit(1);
        }

        /* x = x + evecs*inv(H)*evecs'*r */
        for(i=0; i<nconv; i++)
        {
          assign_complex_to_spinor(tmps1,&evecs[i*12*N],12*N);
          assign_add_mul(x,tmps1,initwork[i],N);
        }

      } else if ( projection_type == 1 ) {
        /* x = x + evecs*inv(H)*evecs'*r */

        /* tmps2 = Q^+ r */
        f_initial(tmps2, r);

        for(int i=0; i < nconv; i++) {
          /* tmps1 = v_i */
          assign_complex_to_spinor(tmps1,&evecs[i*12*N],12*N);

          /* initwork_i = v_i^+ Q^+ r / lambda_i^2 */
          initwork[i]= scalar_prod(tmps1, tmps2, N, parallel) / ( H[i*nconv+i] * H[i*nconv+i] );
        }

        memset(tmps2, 0, N*sizeof(spinor) );
        for(i=0; i<nconv; i++) {
          assign_complex_to_spinor(tmps1, &evecs[i*12*N], 12*N);
          assign_add_mul(tmps2, tmps1, initwork[i], N);
        }

        /* apply final operator */
        f_final(tmps1, tmps2);
        assign_add_mul(x, tmps1, 1., N);

      }  /* end of if projection type */

      /* compute elapsed time and add to accumulator */

      wt2 = gettime();
      wI = wI + wt2-wt1;
      
    }/* if(nconv > 0) */


    /* which tolerance to use */
    if(eps_sq_used > restart_eps_sq_used)
    {
       tol_sq = eps_sq_used;
       flag   = 1; /* shouldn't restart again */
    }
    else
    {
       tol_sq = restart_eps_sq_used;
    }

    wt1 = gettime();
    its = cg_her(x,b,maxit_remain,tol_sq,rel_prec,N,f); 
          
    wt2 = gettime();

    wE = wE + wt2-wt1;

    /* check convergence */
    if(its == -1)
    {
       /* cg didn't converge */
       if(g_proc_id == g_stdio_proc) {
         fprintf(stderr, "[arpack_cg] CG didn't converge within the maximum number of iterations in arpack_cg. Exiting...\n"); 
         fflush(stderr);
         exit(1);
         
       }
    } 
    else
    {
       numIts += its;   
       maxit_remain = maxit - numIts; /* remaining number of iterations */
       restart_eps_sq_used = restart_eps_sq_used*res_eps_sq; /* prepare for the next restart */
    }
    
  }
  /* end while (flag ==-1)               */
  
  /* ---------- */
  /* Reporting  */
  /* ---------- */
  /* compute the exact residual */
  f(ax,x); /* ax= A*x */
  diff(r,b,ax,N);  /* r=b-A*x */	
  normsq=square_norm(r,N,parallel);
  if(g_debug_level > 0 && g_proc_id == g_stdio_proc)
  {
    fprintf(stdout, "# [arpack_cg] For this rhs:\n");
    fprintf(stdout, "# [arpack_cg] Total initCG Wallclock : %+e\n", wI);
    fprintf(stdout, "# [arpack_cg] Total cg Wallclock : %+e\n", wE);
    fprintf(stdout, "# [arpack_cg] Iterations: %-d\n", numIts); 
    fprintf(stdout, "# [arpack_cg] Actual Resid of LinSys  : %+e\n",normsq);
  }


  /* free memory if this was your last system to solve */
  if(ncurRHS == nrhs){
#if ( (defined SSE) || (defined SSE2) || (defined SSE3)) 
    free(_ax);  free(_r);  free(_tmps1); free(_tmps2);
#else
    free(ax); free(r); free(tmps1); free(tmps2);
#endif
    free(evecs); free(evals); free(H); free(Hinv);
    free(initwork); free(tmpv1); free(zheev_work);
    free(hevals); free(zheev_rwork); free(IPIV);
  }


  return numIts;
}
Esempio n. 6
0
void index_jd(int * nr_of_eigenvalues_ov, 
	      const int max_iterations, const double precision_ov, char *conf_filename, 
	      const int nstore, const int method){
  
  complex *eval;
  spinor  *eigenvectors_ov, *eigenvectors_ov_;
  spinor  *lowvectors, *lowvectors_;
  int i=0 , k=0, returncode=0, index = 0, determined = 0, signed_index = 0;
  char filename[120];
  FILE * ifs = NULL;
  matrix_mult Operator[2];
  double absdifference;
  const int N2 = VOLUMEPLUSRAND;

#ifdef MPI
  double atime, etime;
#endif
  double lowestmodes[20];
  int intsign, max_iter, first_blocksize = 1;
  int * idx = NULL;

  /**********************
   * For Jacobi-Davidson 
   **********************/
  int verbosity = 3, converged = 0, blocksize = 1, blockwise = 0;
  int solver_it_max = 50, j_max, j_min, v0dim = 0;
  double * eigenvalues_ov = NULL;
  double decay_min = 1.7, threshold_min = 1.e-3, prec;

  WRITER *writer=NULL;
  spinor *s;
  double sqnorm;
  paramsPropagatorFormat *propagatorFormat = NULL;
  
  double ap_eps_sq;
  int switch_on_adaptive_precision = 0;
  double ov_s = 0;

  /**********************                                                 
   * General variables                                                    
   **********************/

  eval= calloc((*nr_of_eigenvalues_ov),sizeof(complex));
  shift = 0.0;

  //  ov_s = 0.5*(1./g_kappa - 8.) - 1.;
  ap_eps_sq = precision_ov*precision_ov; 

#if (defined SSE || defined SSE2 )
  eigenvectors_ov_= calloc(VOLUMEPLUSRAND*(*nr_of_eigenvalues_ov)+1, sizeof(spinor)); 
  eigenvectors_ov = (spinor *)(((unsigned long int)(eigenvectors_ov_)+ALIGN_BASE)&~ALIGN_BASE);
  lowvectors_ = calloc(2*first_blocksize*VOLUMEPLUSRAND+1, sizeof(spinor));
  lowvectors = (spinor *)(((unsigned long int)(lowvectors_)+ALIGN_BASE)&~ALIGN_BASE);
#else
  //  eigenvectors_ov_ = calloc(VOLUMEPLUSRAND*(*nr_of_eigenvalues_ov), sizeof(spinor));
  eigenvectors_ov_ = calloc(VOLUMEPLUSRAND*(*nr_of_eigenvalues_ov), sizeof(spinor));
  lowvectors_ = calloc(2*first_blocksize*VOLUMEPLUSRAND, sizeof(spinor));
  eigenvectors_ov = eigenvectors_ov_;
  lowvectors = lowvectors_;
#endif

  //  idx = malloc((*nr_of_eigenvalues_ov)*sizeof(int));
  idx = malloc((*nr_of_eigenvalues_ov)*sizeof(int));
  Operator[0]=&Dov_proj_plus;
  Operator[1]=&Dov_proj_minus;
  
  if(g_proc_id == g_stdio_proc){
    printf("Computing first the two lowest modes in the positive and negative chirality sector, respectively\n");
    if(switch_on_adaptive_precision == 1) {
      printf("We have switched on adaptive precision with ap_eps_sq = %e!\n", ap_eps_sq);
    }
    printf("We have set the mass to zero within this computation!\n");
    fflush(stdout);
  }

  prec = precision_ov; 
  j_min = 8; j_max = 16;
  max_iter = 70;

#ifdef MPI
  atime = MPI_Wtime();
#endif

  v0dim = first_blocksize;
  blocksize = v0dim;
  for(intsign = 0; intsign < 2; intsign++){
    converged = 0;
    if(g_proc_id == g_stdio_proc){
      printf("%s chirality sector: \n", intsign ? "negative" : "positive");
      fflush(stdout);
    }
    if(max_iter == 70){
      /********************************************************************
       *
       * We need random start spinor fields, but they must be half zero,
       * that's why we apply the Projektor once
       *
       ********************************************************************/
      for(i = 0; i < first_blocksize; i++) {
	random_spinor_field(&lowvectors[(first_blocksize*intsign+i)*VOLUMEPLUSRAND],N2,0);
	Proj(&lowvectors[(first_blocksize*intsign+i)*VOLUMEPLUSRAND], 
	     &lowvectors[(first_blocksize*intsign+i)*VOLUMEPLUSRAND],N2, intsign);
      }
    }

    jdher(VOLUME*sizeof(spinor)/sizeof(complex),
	  VOLUMEPLUSRAND*sizeof(spinor)/sizeof(complex),
	  shift, prec, blocksize, j_max, j_min, 
	  max_iter, blocksize, blockwise, v0dim, (complex*) &lowvectors[first_blocksize*intsign*VOLUMEPLUSRAND],
	  CG, solver_it_max,
	  threshold_min, decay_min, verbosity,
	  &converged, (complex*) &lowvectors[first_blocksize*intsign*VOLUMEPLUSRAND], 
	  &lowestmodes[first_blocksize*intsign],
	  &returncode, JD_MINIMAL, 1,
	  Operator[intsign]);

    if(converged != blocksize && max_iter == 70){
      if(g_proc_id == g_stdio_proc){
	printf("Restarting %s chirality sector with more iterations!\n", intsign ? "negative" : "positive");
	fflush(stdout);
      }
      max_iter = 140;
      intsign-=1;
    }
    else {
      max_iter = 70;
      /* Save the allready computed eigenvectors_ov */
      for(i = 0; i< first_blocksize; i++) {
	sprintf(filename, "eigenvector_of_D%s.%.2d.%s.%.4d",((intsign==0)?"plus":"minus"),i , conf_filename, nstore);

	  construct_writer(&writer, filename, 0);
	  /* todo write propagator format */
	  propagatorFormat = construct_paramsPropagatorFormat(64, 1);
	  write_propagator_format(writer, propagatorFormat);
	  free(propagatorFormat);


	  s=(spinor*)&lowvectors[first_blocksize*intsign*VOLUMEPLUSRAND];
	  write_spinor(writer, &s,NULL, 1, 64);
	  destruct_writer(writer);
	  writer=NULL;
	  sqnorm=square_norm(s,VOLUME,1);
	  printf(" wrote eigenvector of overlap operator !!! | |^2 = %e \n",sqnorm);


      }
    }
  }

#ifdef MPI
  etime = MPI_Wtime();
  if(g_proc_id == g_stdio_proc){
    printf("It took %f sec to determine the sector with zero modes, if any!\n", etime-atime);
  }
#endif

  /*Compare the two lowest modes */
  absdifference = fabs(lowestmodes[0]-lowestmodes[first_blocksize]);
  if(absdifference < 0.1*max(lowestmodes[0],lowestmodes[first_blocksize])){
    /* They are equal within the errors */
    if(g_proc_id == g_stdio_proc){
      printf("Index is 0!\n");
      fflush(stdout);
      sprintf(filename, "eigenvalues_of_overlap_proj.%s.%.4d", conf_filename, nstore);
      ifs = fopen(filename, "w");  
      printf("\nThe following lowest modes have been computed:\n");
      fprintf(ifs, "Index is 0\n\n");
      fprintf(ifs, "Sector with positive chirality:\n");
      for(i = 0; i < first_blocksize; i++) {
	lowestmodes[i] = 2.*(1.+ov_s)*lowestmodes[i];
	fprintf(ifs, "%d %e positive\n", i, lowestmodes[i]);
	printf("%d %e positive\n", i, lowestmodes[i]);
      }
      fprintf(ifs, "Sector with negative chirality:\n");
      for(i = 0; i < first_blocksize; i++) {
	lowestmodes[i+first_blocksize] = 2.*(1.+ov_s)*lowestmodes[i+first_blocksize];
	fprintf(ifs, "%d %e negative\n", i, lowestmodes[i+first_blocksize]);
	printf("%d %e negative\n", i, lowestmodes[i+first_blocksize]);
      }
      fclose(ifs);
      for(k = 0; k < 2; k++) {
	sprintf(filename, "eigenvalues_of_D%s.%s.%.4d", 
		k ? "minus" : "plus", conf_filename, nstore);
	ifs = fopen(filename, "w");
	fwrite(&first_blocksize, sizeof(int), 1, ifs);
	index = 0;
	fwrite(&index, sizeof(int), 1, ifs);
	for(i = 0; i < first_blocksize; i++) {
	  fwrite(&lowestmodes[((intsign+1)%2)*first_blocksize+i], sizeof(double), 1, ifs);
	}
	fclose(ifs);
      }
    }
  }
  else{ 
    /* they are not equal */
    /* determine the sector with not trivial topology */
    if(lowestmodes[0] < lowestmodes[first_blocksize]){
      intsign = 0;
    }
    else{
      intsign = 1;
    }
    
    if(g_proc_id == g_stdio_proc){
      printf("Computing now up to %d modes in the sector with %s chirality\n", 
	     (*nr_of_eigenvalues_ov), intsign ? "negative" : "positive");
      fflush(stdout);
    }

    /* Here we set the (absolute) precision to be  */
    /* such that we can compare to the lowest mode */
    /* in the other sector                         */

    prec = (lowestmodes[first_blocksize*((intsign+1)%2)])*1.e-1;

    eigenvalues_ov = (double*)malloc((*nr_of_eigenvalues_ov)*sizeof(double));

    /* Copy the allready computed eigenvectors_ov */
    for(i = 0; i < first_blocksize; i++) { 
      assign(&eigenvectors_ov[i], &lowvectors[(first_blocksize*intsign+i)*VOLUMEPLUSRAND],N2);
      eigenvalues_ov[i] = lowestmodes[first_blocksize*intsign+i];
    }

#ifdef MPI
    atime = MPI_Wtime();
#endif

    blocksize = 3;
    j_min = 8; j_max = 16;
    converged = first_blocksize;
    for(i = first_blocksize; i < (*nr_of_eigenvalues_ov); i+=3) { 

      if((i + blocksize) > (*nr_of_eigenvalues_ov) ) {
	blocksize = (*nr_of_eigenvalues_ov) - i;
      }

      /* Fill up the rest with random spinor fields  */
      /* and project it to the corresponding sector  */
      for(v0dim = i; v0dim < i+blocksize; v0dim++){
	random_spinor_field(&eigenvectors_ov[v0dim*VOLUMEPLUSRAND],N2,0);
	Proj(&eigenvectors_ov[v0dim*VOLUMEPLUSRAND], &eigenvectors_ov[v0dim*VOLUMEPLUSRAND],N2, intsign);
      }
      v0dim = blocksize;
      returncode = 0;

      /* compute minimal eigenvalues */
#ifdef MPI
      /*      pjdher(VOLUME*sizeof(spinor)/sizeof(complex), VOLUMEPLUSRAND*sizeof(spinor)/sizeof(complex),
	     shift, prec, omega, n_omega, ev_tr,
	     i+blocksize, j_max, j_min, 
	     max_iterations, blocksize, blockwise, v0dim, (complex*)(&eigenvectors_ov[i*VOLUMEPLUSRAND]),
	     CG, solver_it_max,
	     threshold_min, decay_min, verbosity,
	     &converged, (complex*) eigenvectors_ov, eigenvalues_ov,
	     &returncode, JD_MINIMAL, 1, use_AV,
	     Operator[intsign]);*/
#else
      jdher(VOLUME*sizeof(spinor)/sizeof(complex),
	    VOLUMEPLUSRAND*sizeof(spinor)/sizeof(complex),
	    shift, prec, blocksize, j_max, j_min,
	    max_iter, blocksize, blockwise, v0dim, (complex*) &eigenvectors_ov[i*VOLUMEPLUSRAND],
	    CG, solver_it_max,
	    threshold_min, decay_min, verbosity,
	    &converged, (complex*) eigenvectors_ov,
	    eigenvalues_ov,
	    &returncode, JD_MINIMAL, 1,
	    Operator[intsign]);
#endif
      /* Save eigenvectors_ov temporary    */
      /* in order to be able to restart */
      for (k=i; k < converged; k++){
	if(intsign == 0){
	  sprintf(filename, "eigenvector_of_Dplus.%.2d.%s.%.4d", k, conf_filename, nstore);
	}
	else{
	  sprintf(filename, "eigenvector_of_Dminus.%.2d.%s.%.4d", k, conf_filename, nstore);
	}
	/*	write_spinorfield(&eigenvectors_ov[k*VOLUMEPLUSRAND], filename);*/
      }

      /* order the eigenvalues_ov and vectors */
      for(k = 0; k < converged; k++) {
	idx[k] = k;
      }
      /*      quicksort(converged, eigenvalues_ov, idx);*/

      /* Check whether the index is detemined */
      index = 0;
      for(k = 0; k < converged; k++) { 
	absdifference = fabs(lowestmodes[first_blocksize*((intsign+1)%2)] - eigenvalues_ov[k]);
	if(absdifference < 0.1*lowestmodes[first_blocksize*((intsign+1)%2)]) {
	  /* We have found the first non zero */
	  if(k < converged-1) {
	    determined = 1;
	    break;
	  }
	  else {
	    blocksize = 1;
	    shift = eigenvalues_ov[converged-1];
	  }
	}
	else {
	  index++;
	}
      }
      /* If we have determined the index or */
      /* hit the maximal number of ev       */
      if(determined == 1 || converged == (*nr_of_eigenvalues_ov)) {
	break;
      }
      else if(g_proc_id == g_stdio_proc) {
	if(blocksize != 1) {
	  printf("Index %s (or equal) than %s%d, continuing!\n\n", 
		 intsign ? "lower" : "bigger", 
		 intsign ? "-" : "+", index);
	  fflush( stdout );
	}
	else {
	  printf("Index is %s%d, one non zero is missing, continuing!\n\n", 
		 intsign ? "-" : "+", index);
	  fflush( stdout );
	}
      }
    }

#ifdef MPI
    etime = MPI_Wtime();
#endif

    /* Save the eigenvectors_ov */
    for(i = 0; i < converged; i++){
      eval[i].re = 2.*(1.+ov_s)*eigenvalues_ov[i];
      eval[i].im = 0.;
      if(intsign == 0){
	sprintf(filename, "eigenvector_of_Dplus.%.2d.%s.%.4d", i, conf_filename, nstore);
      }
      else{
	sprintf(filename, "eigenvector_of_Dminus.%.2d.%s.%.4d", i, conf_filename, nstore);
      }
      /*      write_spinorfield(&eigenvectors_ov[idx[i]*VOLUMEPLUSRAND], filename);*/
    }

    /* Some Output */
    if(g_proc_id == g_stdio_proc) {
      printf("Index is %s%d!\n", intsign ? "-" : "+", index);
#ifdef MPI
      printf("Zero modes determined in %f sec!\n", etime-atime);
#endif
    }
    if(g_proc_id == 0) {
      sprintf(filename, "eigenvalues_of_overlap_proj.%s.%.4d", conf_filename, nstore);
      ifs = fopen(filename, "w");
      printf("\nThe following lowest modes have been computed:\n");
      fprintf(ifs, "Index is %s%d!\n\n", intsign ? "-" : "+", index);
      for(k = 0; k < 2; k++) {
	if(k == intsign) {
	  for (i=0; i < converged; i++) {
	    fprintf(ifs, "%d %e %s\n", i, eval[i].re, intsign ? "negative" : "positive");
	    printf("%d %e %s\n", i, eval[i].re, intsign ? "negative" : "positive");
	  }
	}
	else {
	  for(i = 0; i < first_blocksize; i++) {
	    lowestmodes[((intsign+1)%2)*first_blocksize+i] = 2.*(1.+ov_s)*lowestmodes[((intsign+1)%2)*first_blocksize+i];
	    fprintf(ifs, "%d %e %s\n", i, lowestmodes[((intsign+1)%2)*first_blocksize+i], intsign ? "positive" : "negative");
	    printf("%d %e %s\n", i, lowestmodes[((intsign+1)%2)*first_blocksize+i], intsign ? "positive" : "negative");
	  }
	}
      }
      fclose(ifs);
      if(intsign != 0) signed_index = -index;
      else signed_index = index;
      for(k = 0; k < 2; k++) {
	sprintf(filename, "eigenvalues_of_D%s.%s.%.4d", 
		k ? "minus" : "plus", conf_filename, nstore);
	ifs = fopen(filename, "w");
	if(k == intsign) {
	  fwrite(&converged, sizeof(int), 1, ifs);
	  fwrite(&signed_index, sizeof(int), 1, ifs);
	  for (i=index; i < converged; i++) {
	    fwrite(&eval[i].re, sizeof(double), 1, ifs);
	  }
	}
	else {
	  fwrite(&first_blocksize, sizeof(int), 1, ifs);
	  fwrite(&signed_index, sizeof(int), 1, ifs);
	  for(i = 0; i < first_blocksize; i++) {
	    fwrite(&lowestmodes[((intsign+1)%2)*first_blocksize+i], sizeof(double), 1, ifs);
	  }
	}
	fclose(ifs);
      }
    }
  }

  switch_on_adaptive_precision = 0; 
  /* Free memory */
  free(eigenvectors_ov_);
  free(lowvectors_);
  free(eval);
  free(eigenvalues_ov);
  free(idx);
}