示例#1
0
文件: Msap.c 项目: Finkenrath/tmLQCD
void CGeoSmoother(spinor * const P, spinor * const Q, const int Ncy, const int dummy) {
  spinor ** solver_field = NULL;
  const int nr_sf = 5;
  double musave = g_mu;
  g_mu = g_mu1;
  init_solver_field(&solver_field, VOLUMEPLUSRAND/2, nr_sf);
  
  convert_lexic_to_eo(solver_field[0], solver_field[1], Q);
  if(g_c_sw > 0)
    assign_mul_one_sw_pm_imu_inv(EE,solver_field[2], solver_field[0], g_mu);
  else
    assign_mul_one_pm_imu_inv(solver_field[2], solver_field[0], +1., VOLUME/2);
  
  Hopping_Matrix(OE, solver_field[4], solver_field[2]); 
  /* The sign is plus, since in Hopping_Matrix */
  /* the minus is missing                      */
  assign_mul_add_r(solver_field[4], +1., solver_field[1], VOLUME/2);
  /* Do the inversion with the preconditioned  */
  /* matrix to get the odd sites               */
  gamma5(solver_field[4], solver_field[4], VOLUME/2);
  if(g_c_sw > 0) {
    cg_her(solver_field[3], solver_field[4], Ncy, 1.e-8, 1, 
	   VOLUME/2, &Qsw_pm_psi);
    Qsw_minus_psi(solver_field[3], solver_field[3]);
    
    /* Reconstruct the even sites                */
    Hopping_Matrix(EO, solver_field[2], solver_field[3]);
    assign_mul_one_sw_pm_imu_inv(EE,solver_field[4],solver_field[2], g_mu);
  }
  else {
    cg_her(solver_field[3], solver_field[4], Ncy, 1.e-8, 1, 
	   VOLUME/2, &Qtm_pm_psi);
    Qtm_minus_psi(solver_field[3], solver_field[3]);
    
    /* Reconstruct the even sites                */
    Hopping_Matrix(EO, solver_field[4], solver_field[3]);
    mul_one_pm_imu_inv(solver_field[4], +1., VOLUME/2);
  }
  
  /* The sign is plus, since in Hopping_Matrix */
  /* the minus is missing                      */
  assign_add_mul_r(solver_field[2], solver_field[4], +1., VOLUME/2);
  
  convert_eo_to_lexic(P, solver_field[2], solver_field[3]); 
  g_mu = musave;
  finalize_solver(solver_field, nr_sf);
  return;  
}
double cloverdetratio_acc(const int id, hamiltonian_field_t * const hf) {
  monomial * mnl = &monomial_list[id];
  int save_sloppy = g_sloppy_precision_flag;

  g_mu = mnl->mu;
  boundary(mnl->kappa);
  
  g_mu3 = mnl->rho2;
  mnl->Qp(g_spinor_field[DUM_DERI+5], mnl->pf);
  g_mu3 = mnl->rho;

  chrono_guess(g_spinor_field[3], g_spinor_field[DUM_DERI+5], mnl->csg_field, mnl->csg_index_array, 
	       mnl->csg_N, mnl->csg_n, VOLUME/2, &Qtm_plus_psi);
  g_sloppy_precision_flag = 0;    
  mnl->iter0 += cg_her(g_spinor_field[3], g_spinor_field[DUM_DERI+5], mnl->maxiter, mnl->accprec,  
		      g_relative_precision_flag, VOLUME/2, mnl->Qsq);
  mnl->Qm(g_spinor_field[3], g_spinor_field[3]);

  g_sloppy_precision_flag = save_sloppy;

  /* Compute the energy contr. from second field */
  mnl->energy1 = square_norm(g_spinor_field[3], VOLUME/2, 1);

  g_mu = g_mu1;
  g_mu3 = 0.;
  boundary(g_kappa);
  if(g_proc_id == 0 && g_debug_level > 3) {
    printf("called cloverdetratio_acc for id %d dH = %1.4e\n", 
	   id, mnl->energy1 - mnl->energy0);
  }
  return(mnl->energy1 - mnl->energy0);
}
示例#3
0
double cloverdet_acc(const int id, hamiltonian_field_t * const hf) {
  monomial * mnl = &monomial_list[id];
  int save_sloppy = g_sloppy_precision_flag;

  g_mu = mnl->mu;
  g_mu3 = mnl->rho;
  g_c_sw = mnl->c_sw;
  boundary(mnl->kappa);

  sw_term(hf->gaugefield, mnl->kappa, mnl->c_sw); 
  sw_invert(EE, mnl->mu);

  chrono_guess(g_spinor_field[2], mnl->pf, mnl->csg_field, mnl->csg_index_array,
	       mnl->csg_N, mnl->csg_n, VOLUME/2, mnl->Qsq);
  g_sloppy_precision_flag = 0;
  mnl->iter0 = cg_her(g_spinor_field[2], mnl->pf, mnl->maxiter, mnl->accprec,  
		      g_relative_precision_flag, VOLUME/2, mnl->Qsq); 
  mnl->Qm(g_spinor_field[2], g_spinor_field[2]);
  
  g_sloppy_precision_flag = save_sloppy;
  /* Compute the energy contr. from first field */
  mnl->energy1 = square_norm(g_spinor_field[2], VOLUME/2, 1);

  g_mu = g_mu1;
  g_mu3 = 0.;
  boundary(g_kappa);
  if(g_proc_id == 0 && g_debug_level > 3) {
    printf("called cloverdet_acc for id %d %d dH = %1.4e\n", 
	   id, mnl->even_odd_flag, mnl->energy1 - mnl->energy0);
  }
  return(mnl->energy1 - mnl->energy0);
}
示例#4
0
double det_acc(const int id, hamiltonian_field_t * const hf) {
  monomial * mnl = &monomial_list[id];
  int save_iter = ITER_MAX_BCG;
  int save_sloppy = g_sloppy_precision_flag;

  g_mu = mnl->mu;
  boundary(mnl->kappa);
  if(mnl->even_odd_flag) {

    if(mnl->solver == CG) {
      ITER_MAX_BCG = 0;
    }
    chrono_guess(g_spinor_field[2], mnl->pf, mnl->csg_field, mnl->csg_index_array,
		 mnl->csg_N, mnl->csg_n, VOLUME/2, &Qtm_plus_psi);
    g_sloppy_precision_flag = 0;
    mnl->iter0 = bicg(g_spinor_field[2], mnl->pf, mnl->accprec, g_relative_precision_flag);
    g_sloppy_precision_flag = save_sloppy;
    /* Compute the energy contr. from first field */
    mnl->energy1 = square_norm(g_spinor_field[2], VOLUME/2, 1);
  }
  else {
    if(mnl->solver == CG) {
      chrono_guess(g_spinor_field[DUM_DERI+5], mnl->pf, mnl->csg_field, mnl->csg_index_array,
		   mnl->csg_N, mnl->csg_n, VOLUME/2, &Q_pm_psi);
      mnl->iter0 = cg_her(g_spinor_field[DUM_DERI+5], mnl->pf, 
			  mnl->maxiter, mnl->accprec, g_relative_precision_flag, 
			  VOLUME, Q_pm_psi);
      Q_minus_psi(g_spinor_field[2], g_spinor_field[DUM_DERI+5]);
      /* Compute the energy contr. from first field */
      mnl->energy1 = square_norm(g_spinor_field[2], VOLUME, 1);
    }
    else {
      chrono_guess(g_spinor_field[2], mnl->pf, mnl->csg_field, mnl->csg_index_array,
		   mnl->csg_N, mnl->csg_n, VOLUME/2, &Q_plus_psi);
      mnl->iter0 += bicgstab_complex(g_spinor_field[2], mnl->pf, 
				     mnl->maxiter, mnl->forceprec, g_relative_precision_flag, 
				     VOLUME,  Q_plus_psi);
      mnl->energy1 = square_norm(g_spinor_field[2], VOLUME, 1);
    }
  }
  g_mu = g_mu1;
  boundary(g_kappa);
  if(g_proc_id == 0 && g_debug_level > 3) {
    printf("called det_acc for id %d %d dH = %1.4e\n", 
	   id, mnl->even_odd_flag, mnl->energy1 - mnl->energy0);
  }
  ITER_MAX_BCG = save_iter;
  return(mnl->energy1 - mnl->energy0);
}
示例#5
0
double cloverdet_acc(const int id, hamiltonian_field_t * const hf) {
  monomial * mnl = &monomial_list[id];
  int save_sloppy = g_sloppy_precision_flag;
  double atime, etime;
  atime = gettime();

  g_mu = mnl->mu;
  g_mu3 = mnl->rho;
  g_c_sw = mnl->c_sw;
  boundary(mnl->kappa);

  sw_term( (const su3**) hf->gaugefield, mnl->kappa, mnl->c_sw); 
  sw_invert(EE, mnl->mu);

  chrono_guess(mnl->w_fields[0], mnl->pf, mnl->csg_field, mnl->csg_index_array,
	       mnl->csg_N, mnl->csg_n, VOLUME/2, mnl->Qsq);
  g_sloppy_precision_flag = 0;
  mnl->iter0 = cg_her(mnl->w_fields[0], mnl->pf, mnl->maxiter, mnl->accprec,  
		      g_relative_precision_flag, VOLUME/2, mnl->Qsq); 
  mnl->Qm(mnl->w_fields[0], mnl->w_fields[0]);
  
  g_sloppy_precision_flag = save_sloppy;
  /* Compute the energy contr. from first field */
  mnl->energy1 = square_norm(mnl->w_fields[0], VOLUME/2, 1);

  g_mu = g_mu1;
  g_mu3 = 0.;
  boundary(g_kappa);
  etime = gettime();
  if(g_proc_id == 0) {
    if(g_debug_level > 1) {
      printf("# Time for %s monomial acc step: %e s\n", mnl->name, etime-atime);
    }
    if(g_debug_level > 3) {
      printf("called cloverdet_acc for id %d dH = %1.10e\n", 
	     id, mnl->energy1 - mnl->energy0);
    }
  }
  return(mnl->energy1 - mnl->energy0);
}
void cloverdetratio_heatbath(const int id, hamiltonian_field_t * const hf) {
  monomial * mnl = &monomial_list[id];

  g_mu = mnl->mu;
  g_c_sw = mnl->c_sw;
  boundary(mnl->kappa);
  mnl->csg_n = 0;
  mnl->csg_n2 = 0;
  mnl->iter0 = 0;
  mnl->iter1 = 0;
  
  init_sw_fields();
  sw_term( (const su3**) hf->gaugefield, mnl->kappa, mnl->c_sw); 
  sw_invert(EE, mnl->mu);

  random_spinor_field(g_spinor_field[4], VOLUME/2, mnl->rngrepro);
  mnl->energy0  = square_norm(g_spinor_field[4], VOLUME/2, 1);
  
  g_mu3 = mnl->rho;
  mnl->Qp(g_spinor_field[3], g_spinor_field[4]);
  g_mu3 = mnl->rho2;
  zero_spinor_field(mnl->pf,VOLUME/2);

  mnl->iter0 = cg_her(mnl->pf, g_spinor_field[3], mnl->maxiter, mnl->accprec,  
		      g_relative_precision_flag, VOLUME/2, mnl->Qsq); 

  chrono_add_solution(mnl->pf, mnl->csg_field, mnl->csg_index_array,
		      mnl->csg_N, &mnl->csg_n, VOLUME/2);
  mnl->Qm(mnl->pf, mnl->pf);

  if(g_proc_id == 0 && g_debug_level > 3) {
    printf("called cloverdetratio_heatbath for id %d \n", id);
  }
  g_mu3 = 0.;
  g_mu = g_mu1;
  boundary(g_kappa);
  return;
}
示例#7
0
void det_derivative(const int id, hamiltonian_field_t * const hf) {
  monomial * mnl = &monomial_list[id];

  /* This factor 2 a missing factor 2 in trace_lambda */
  (*mnl).forcefactor = 2.;

  if(mnl->even_odd_flag) {
    /*********************************************************************
     * 
     * even/odd version 
     *
     * This a term is det(\hat Q^2(\mu))
     *
     *********************************************************************/
    
    g_mu = mnl->mu;
    boundary(mnl->kappa);

    if(mnl->solver != CG) {
      fprintf(stderr, "Bicgstab currently not implemented, using CG instead! (det_monomial.c)\n");
    }
    
    /* Invert Q_{+} Q_{-} */
    /* X_o -> DUM_DERI+1 */
    chrono_guess(g_spinor_field[DUM_DERI+1], mnl->pf, mnl->csg_field, mnl->csg_index_array,
		 mnl->csg_N, mnl->csg_n, VOLUME/2, &Qtm_pm_psi);
    mnl->iter1 += cg_her(g_spinor_field[DUM_DERI+1], mnl->pf, mnl->maxiter, mnl->forceprec, 
			 g_relative_precision_flag, VOLUME/2, &Qtm_pm_psi);
    chrono_add_solution(g_spinor_field[DUM_DERI+1], mnl->csg_field, mnl->csg_index_array,
			mnl->csg_N, &mnl->csg_n, VOLUME/2);
    
    /* Y_o -> DUM_DERI  */
    Qtm_minus_psi(g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1]);
    
    /* apply Hopping Matrix M_{eo} */
    /* to get the even sites of X_e */
    H_eo_tm_inv_psi(g_spinor_field[DUM_DERI+2], g_spinor_field[DUM_DERI+1], EO, -1.);
    /* \delta Q sandwitched by Y_o^\dagger and X_e */
    deriv_Sb(OE, g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+2], hf); 
    
    /* to get the even sites of Y_e */
    H_eo_tm_inv_psi(g_spinor_field[DUM_DERI+3], g_spinor_field[DUM_DERI], EO, +1);
    /* \delta Q sandwitched by Y_e^\dagger and X_o */
    deriv_Sb(EO, g_spinor_field[DUM_DERI+3], g_spinor_field[DUM_DERI+1], hf);

  } 
  else {
    /*********************************************************************
     * non even/odd version
     * 
     * This term is det(Q^2 + \mu_1^2)
     *
     *********************************************************************/
    g_mu = mnl->mu;
    boundary(mnl->kappa);
    if(mnl->solver == CG) {
      /* Invert Q_{+} Q_{-} */
      /* X -> DUM_DERI+1 */
      chrono_guess(g_spinor_field[DUM_DERI+1], mnl->pf, mnl->csg_field, mnl->csg_index_array,
		   mnl->csg_N, mnl->csg_n, VOLUME/2, &Q_pm_psi);
      mnl->iter1 += cg_her(g_spinor_field[DUM_DERI+1], mnl->pf, 
			mnl->maxiter, mnl->forceprec, g_relative_precision_flag, 
			VOLUME, &Q_pm_psi);
      chrono_add_solution(g_spinor_field[DUM_DERI+1], mnl->csg_field, mnl->csg_index_array,
			  mnl->csg_N, &mnl->csg_n, VOLUME/2);

      /* Y -> DUM_DERI  */
      Q_minus_psi(g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1]);
      
    }
    else {
      /* Invert first Q_+ */
      /* Y -> DUM_DERI  */
      chrono_guess(g_spinor_field[DUM_DERI], mnl->pf, mnl->csg_field, mnl->csg_index_array,
		   mnl->csg_N, mnl->csg_n, VOLUME/2, &Q_plus_psi);
      mnl->iter1 += bicgstab_complex(g_spinor_field[DUM_DERI], mnl->pf, 
				     mnl->maxiter, mnl->forceprec, g_relative_precision_flag, 
				     VOLUME,  Q_plus_psi);
      chrono_add_solution(g_spinor_field[DUM_DERI], mnl->csg_field, mnl->csg_index_array,
			  mnl->csg_N, &mnl->csg_n, VOLUME/2);
      
      /* Now Q_- */
      /* X -> DUM_DERI+1 */
      g_mu = -g_mu;
      chrono_guess(g_spinor_field[DUM_DERI+1], g_spinor_field[DUM_DERI], mnl->csg_field2, 
		   mnl->csg_index_array2, mnl->csg_N2, mnl->csg_n2, VOLUME/2, &Q_minus_psi);
      mnl->iter1 += bicgstab_complex(g_spinor_field[DUM_DERI+1], g_spinor_field[DUM_DERI], 
				     mnl->maxiter, mnl->forceprec, g_relative_precision_flag, 
				     VOLUME, Q_minus_psi);
      chrono_add_solution(g_spinor_field[DUM_DERI+1], mnl->csg_field2, mnl->csg_index_array2,
			  mnl->csg_N2, &mnl->csg_n2, VOLUME/2);
      g_mu = -g_mu;   
    }
    
    /* \delta Q sandwitched by Y^\dagger and X */
    deriv_Sb_D_psi(g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1], hf);
  }
  g_mu = g_mu1;
  boundary(g_kappa);

  return;
}
示例#8
0
int arpack_cg(
  /* solver params */
  const int N,                   /* (IN) Number of lattice sites for this process*/
  solver_params_t solver_params, /* (IN) parameters for solver */
  spinor * const x,              /* (IN/OUT) initial guess on input, solution on output for this RHS*/
  spinor * const b,              /* (IN) right-hand side*/
  matrix_mult f,                 /* (IN) f(s,r) computes s=A*r, i.e. matrix-vector multiply in double precision */
  matrix_mult f32,               /* (IN) f(s,r) computes s=A*r, i.e. matrix-vector multiply in single precision */
  const double eps_sq,           /* (IN) squared tolerance of convergence of the linear system for systems nrhs1+1 till nrhs*/
  const int rel_prec,            /* (IN) 0 for using absoute error for convergence
                                         1 for using relative error for convergence*/
  const int maxit,               /* (IN) Maximum allowed number of iterations to solution for the linear system*/
  matrix_mult f_final,           /* (IN) final operator application during projection of type 1 */
  matrix_mult f_initial          /* (IN) initial operator application during projection of type 1 */
) {

  /* Static variables and arrays. */
  static int ncurRHS=0;                  /* current number of the system being solved */                   
  static void *_ax,*_r,*_tmps1,*_tmps2;                  
  static spinor *ax,*r,*tmps1,*tmps2;                  
  static _Complex double *evecs,*evals,*H,*HU,*Hinv,*initwork,*tmpv1;
  static _Complex double *zheev_work;
  static double *hevals,*zheev_rwork;
  static int *IPIV; 
  static int info_arpack=0;
  static int nconv=0; /* number of converged eigenvectors as returned by arpack */
  int i,j,tmpsize;
  char cV='V',cN='N', cU='U';   
  int ONE=1;
  int zheev_lwork,zheev_info;
  _Complex double c1, c2, c3, tpone=1.0,tzero=0.0;
  double d1,d2,d3;
  double et1,et2;  /* timing variables */
  char evecs_filename[500];
  char howmny = 'P';
  FILE *evecs_fs=NULL;
  size_t evecs_count;
  WRITER *evecs_writer=NULL;
  spinor *evecs_ptr0 = NULL, *evecs_ptr1 = NULL;
  paramsPropagatorFormat *evecs_propagatorFormat = NULL;
  void *evecs_io_buffer = NULL;

  int parallel;        /* for parallel processing of the scalar products */
#ifdef TM_USE_MPI
    parallel=1;
#else
    parallel=0;
#endif

  /* leading dimension for spinor vectors */
  int LDN;
  if(N==VOLUME)
     LDN = VOLUMEPLUSRAND;
  else
     LDN = VOLUMEPLUSRAND/2; 

  /*(IN) Number of right-hand sides to be solved*/ 
  const int nrhs =   solver_params.arpackcg_nrhs; 
  /*(IN) First number of right-hand sides to be solved using tolerance eps_sq1*/ 
  const int nrhs1 =   solver_params.arpackcg_nrhs1;
  /*(IN) squared tolerance of convergence of the linear system for systems 1 till nrhs1*/
  const double eps_sq1 = solver_params.arpackcg_eps_sq1;
  /*(IN) suqared tolerance for restarting cg */
  const double res_eps_sq =   solver_params.arpackcg_res_eps_sq;

  /* parameters for arpack */

  /*(IN) number of eigenvectors to be computed by arpack*/
  const int nev = solver_params.arpackcg_nev;
   /*(IN) size of the subspace used by arpack with the condition (nev+1) =< ncv*/
  const int ncv = solver_params.arpackcg_ncv;
  /*(IN) tolerance for computing eigenvalues with arpack */
  double arpack_eig_tol =   solver_params.arpackcg_eig_tol;
  /*(IN) maximum number of iterations to be used by arpack*/
  int arpack_eig_maxiter =   solver_params.arpackcg_eig_maxiter;
  /*(IN) 0 for eigenvalues with smallest real part "SR"
         1 for eigenvalues with largest real part "LR"
         2 for eigenvalues with smallest absolute value "SM"
         3 for eigenvalues with largest absolute value "LM"
         4 for eigenvalues with smallest imaginary part "SI"
         5 for eigenvalues with largest imaginary part  "LI"*/
  int kind =   solver_params.arpackcg_evals_kind;
  /*(IN) 0 don't compute the eiegnvalues and their residuals of the original system 
         1 compute the eigenvalues and the residuals for the original system (the orthonormal basis
           still be used in deflation and they are not overwritten).*/
  int comp_evecs =   solver_params.arpackcg_comp_evecs;
  /*(IN) 0 no polynomial acceleration; 1 use polynomial acceleration*/
  int acc =   solver_params.use_acc;
  /*(IN) degree of the Chebyshev polynomial (irrelevant if acc=0)*/
  int cheb_k = solver_params.cheb_k;
  /*(IN) lower end of the interval where the acceleration will be used (irrelevant if acc=0)*/
  double emin = solver_params.op_evmin;
  /*(IN) upper end of the interval where the acceleration will be used (irrelevant if acc=0)*/
  double emax = solver_params.op_evmax;
  /*(IN) file name to be used for printing out debugging information from arpack*/
  char *arpack_logfile = solver_params.arpack_logfile;
  /*(IN) read eigenvectors in Schur basis from file */
  int  arpack_read_ev = solver_params.arpackcg_read_ev;
  /*(IN) write eigenvectors in Schur basis to file */
  int  arpack_write_ev = solver_params.arpackcg_write_ev;
  /*(IN) file name to be used for reading and writing evecs from and to disc */
  char *arpack_evecs_filename = solver_params.arpack_evecs_filename;
   /*(IN) precision used for writing eigenvectors */
  int arpack_evecs_writeprec = solver_params.arpack_evecs_writeprec;
  /* how to project with approximate eigenvectors */
  int projection_type = solver_params.projection_type;
  /* file format for evecs used by arpack */
  char *arpack_evecs_fileformat = solver_params.arpack_evecs_fileformat; 

  /*-------------------------------------------------------------
    if this is the first right hand side, allocate memory, 
    call arpack, and compute resiudals of eigenvectors if needed
    -------------------------------------------------------------*/ 
  if(ncurRHS==0){ 
#if (defined SSE || defined SSE2 || defined SSE3)
    _ax = malloc((LDN+ALIGN_BASE)*sizeof(spinor));
    if(_ax==NULL)
    {
       if(g_proc_id == g_stdio_proc)
          fprintf(stderr,"[arpack_cg] insufficient memory for _ax inside arpack_cg.\n");
       exit(1);
    }
    else
       {ax  = (spinor *) ( ((unsigned long int)(_ax)+ALIGN_BASE)&~ALIGN_BASE);}

    _r = malloc((LDN+ALIGN_BASE)*sizeof(spinor));
    if(_r==NULL)
    {
       if(g_proc_id == g_stdio_proc)
          fprintf(stderr,"[arpack_cg] insufficient memory for _r inside arpack_cg.\n");
       exit(1);
    }
    else
       {r  = (spinor *) ( ((unsigned long int)(_r)+ALIGN_BASE)&~ALIGN_BASE);}

    _tmps1 = malloc((LDN+ALIGN_BASE)*sizeof(spinor));
    if(_tmps1==NULL)
    {
       if(g_proc_id == g_stdio_proc)
          fprintf(stderr,"[arpack_cg] insufficient memory for _tmps1 inside arpack_cg.\n");
       exit(1);
    }
    else
       {tmps1  = (spinor *) ( ((unsigned long int)(_tmps1)+ALIGN_BASE)&~ALIGN_BASE);}

    _tmps2 = malloc((LDN+ALIGN_BASE)*sizeof(spinor));
    if(_tmps2==NULL)
    {
       if(g_proc_id == g_stdio_proc)
          fprintf(stderr,"[arpack_cg] insufficient memory for _tmps2 inside arpack_cg.\n");
       exit(1);
    }
    else
       {tmps2  = (spinor *) ( ((unsigned long int)(_tmps2)+ALIGN_BASE)&~ALIGN_BASE);}

#else
    ax = (spinor *) malloc(LDN*sizeof(spinor));
    r  = (spinor *) malloc(LDN*sizeof(spinor));
    tmps1 = (spinor *) malloc(LDN*sizeof(spinor));
    tmps2 = (spinor *) malloc(LDN*sizeof(spinor));
    
    if( (ax == NULL)  || (r==NULL) || (tmps1==NULL) || (tmps2==NULL) )
    {
       if(g_proc_id == g_stdio_proc)
          fprintf(stderr,"[arpack_cg] insufficient memory for ax,r,tmps1,tmps2 inside arpack_cg.\n");
       exit(1);
    }
#endif


    evecs = (_Complex double *) malloc(ncv*12*N*sizeof(_Complex double)); /* note: no extra buffer  */
    evals = (_Complex double *) malloc(ncv*sizeof(_Complex double)); 
    tmpv1 = (_Complex double *) malloc(12*N*sizeof(_Complex double));

    if((evecs == NULL)  || (evals==NULL) || (tmpv1==NULL))
    {
       if(g_proc_id == g_stdio_proc)
          fprintf(stderr,"[arpack_cg] insufficient memory for evecs and evals inside arpack_cg.\n");
       exit(1);
    }

    if ( arpack_read_ev == 1) {

      if (strcmp(arpack_evecs_fileformat, "partfile") == 0) {
        /* set evec filenmae */
        sprintf(evecs_filename, "%s.%.5d.pt%.2dpx%.2dpy%.2dpz%.2d", arpack_evecs_filename, nev, g_proc_coords[0], g_proc_coords[1], g_proc_coords[2], g_proc_coords[3]);
        evecs_fs = fopen(evecs_filename, "r");
        if (evecs_fs == NULL) {
          fprintf(stderr, "[arpack_cg] (%.4d) Error, could not open file %s for reading\n", g_cart_id, evecs_filename);
          return(-2);
        }
        fprintf(stdout, "# [arpack_cg] reading eigenvectors from file %s\n", evecs_filename);

        if(arpack_evecs_writeprec == 64) {
 
          evecs_io_buffer = (void*)evecs;
   
          et1=gettime();
          evecs_count = fread( evecs_io_buffer, sizeof(_Complex double), (size_t)nev*12*N, evecs_fs);
          et2=gettime();
        
        } else {
          evecs_io_buffer = malloc(sizeof(_Complex double) * (size_t)nev*12*N );
          if( evecs_io_buffer == NULL) {
            fprintf(stderr, "[arpack_cg] (%.4d) Error, could not allocate memory for evecs_io_buffer\n", g_cart_id);
            return(-42);
          }
  
          et1=gettime();
          evecs_count = fread( evecs_io_buffer, sizeof(_Complex double)/2, (size_t)nev*12*N, evecs_fs);
          et2=gettime();

          single2double(evecs, evecs_io_buffer, nev*24*N);

          free( evecs_io_buffer );
          evecs_io_buffer = NULL;
        }
       
        if( evecs_count != ((size_t)nev*12*N) ) {
          fprintf(stderr, "[arpack_cg] (%.4d) Error, could not proper amount of data from file %s\n", g_cart_id, evecs_filename);
          return(-3);
        }
        fclose(evecs_fs);
        evecs_fs = NULL;
        if(g_proc_id == g_stdio_proc) {
          fprintf(stdout,"# [arpack_cg] ARPACK time for reading %d eigenvectors: %+e seconds\n", nev, et2-et1);
        }
      } else if(strcmp(arpack_evecs_fileformat, "single") == 0) {

        if(N==VOLUME) {
          for(i=0; i<nev; i++) {
            sprintf(evecs_filename, "%s.ev%.5d", arpack_evecs_filename, i);
            evecs_ptr0 = (spinor*)&(evecs[i*12*N]);
            evecs_ptr1 = NULL;
            read_spinor(evecs_ptr0,  evecs_ptr1, evecs_filename, 0);
          } /* end of loop on eigenvectors */
        } else if(N==VOLUME/2) {
          for(i=0; i<nev/2; i++) {
            sprintf(evecs_filename, "%s.ev%.5d", arpack_evecs_filename, 2*i);
            evecs_ptr0 = (spinor*)&(evecs[(2*i  )*12*N]);
            evecs_ptr1 = (spinor*)&(evecs[(2*i+1)*12*N]);
            read_spinor(evecs_ptr0,  evecs_ptr1, evecs_filename, 0);
          } /* end of loop on eigenvectors */
        }
      }   /* of if arpack_evecs_fileformat */

      /* set info_arpack pro forma to SUCCESS */
      nconv = nev;
      info_arpack = 0;
    } else {
      et1=gettime();
      evals_arpack(N,nev,ncv,kind,howmny,acc,cheb_k,emin,emax,evals,evecs,arpack_eig_tol,arpack_eig_maxiter,f,&info_arpack,&nconv,arpack_logfile);
      et2=gettime();

      if(info_arpack != 0){ /* arpack didn't converge */
      if(g_proc_id == g_stdio_proc)
        fprintf(stderr,"[arpack_cg] WARNING: ARPACK didn't converge. exiting..\n");
        return -1;
      }
    
      if(g_proc_id == g_stdio_proc)
      {
         fprintf(stdout,"# [arpack_cg] ARPACK has computed %d eigenvectors\n",nconv);
         fprintf(stdout,"# [arpack_cg] ARPACK time: %+e\n",et2-et1);
      }

      if ( arpack_write_ev == 1) {

        if(strcmp(arpack_evecs_fileformat, "partfile") == 0 ) {

          if( g_cart_id == 0 ) fprintf(stdout, "# [arpack_cg] writing evecs in partfile format\n");
          /* set evec filenmae */
          sprintf(evecs_filename, "%s.%.5d.pt%.2dpx%.2dpy%.2dpz%.2d", arpack_evecs_filename, nconv, g_proc_coords[0], g_proc_coords[1], g_proc_coords[2], g_proc_coords[3]);

          evecs_fs = fopen(evecs_filename, "w");
          if (evecs_fs == NULL) {
            fprintf(stderr, "[arpack_cg] (%.4d) Error, could not open file %s for writing\n", g_cart_id, evecs_filename);
            return(-4);
          }
        
          if(arpack_evecs_writeprec == 64) {

            evecs_io_buffer = (void*)evecs;
 
            et1=gettime();
            evecs_count = fwrite( evecs_io_buffer, sizeof(_Complex double), (size_t)nconv*12*N, evecs_fs);
            et2=gettime();

          } else {
            evecs_io_buffer = malloc(sizeof(_Complex double) * (size_t)nconv*12*N );
            if( evecs_io_buffer == NULL) {
              fprintf(stderr, "[arpack_cg] (%.4d) Error, could not allocate memory for evecs_io_buffer\n", g_cart_id);
              return(-41);
            }
            double2single(evecs_io_buffer, evecs, nconv*24*N);
 
            et1=gettime();
            evecs_count = fwrite( evecs_io_buffer, sizeof(_Complex double)/2, (size_t)nconv*12*N, evecs_fs);
            et2=gettime();
            free(evecs_io_buffer);
            evecs_io_buffer = NULL;
          }
 
          if( evecs_count != ((size_t)nconv*12*N) ) {
            fprintf(stderr, "[arpack_cg] (%.4d) Error, could not write proper amount of data to file %s\n", g_cart_id, evecs_filename);
            return(-5);
          }
          fclose(evecs_fs);
          evecs_fs = NULL;

          if(g_proc_id == g_stdio_proc) {
            fprintf(stdout,"[arpack_cg] (%.4d) ARPACK time for writing %d eigenvectors: %+e seconds\n", g_cart_id, nconv, et2-et1);
          }

        } else if (strcmp(arpack_evecs_fileformat, "single") == 0) {

          if(N==VOLUME) {
            for(i=0; i<nconv; i++) {
              sprintf(evecs_filename, "%s.ev%.5d", arpack_evecs_filename, i);
              construct_writer(&evecs_writer, evecs_filename, 0);
              evecs_propagatorFormat = construct_paramsPropagatorFormat(arpack_evecs_writeprec, 1);
              write_propagator_format(evecs_writer, evecs_propagatorFormat);
              free(evecs_propagatorFormat);
              evecs_ptr0 = (spinor*)&(evecs[i*12*N]);
              evecs_ptr1 = NULL;
              write_spinor(evecs_writer, &evecs_ptr0, &evecs_ptr1, 1, arpack_evecs_writeprec);
              destruct_writer(evecs_writer);
              evecs_writer=NULL;
            } /* end of loop on converged eigenvectors */
          } else if(N==VOLUME/2) {
            for(i=0; i<nconv/2; i++) {
              sprintf(evecs_filename, "%s.ev%.5d", arpack_evecs_filename, 2*i);
              construct_writer(&evecs_writer, evecs_filename, 0);
              evecs_propagatorFormat = construct_paramsPropagatorFormat(arpack_evecs_writeprec, 1);
              write_propagator_format(evecs_writer, evecs_propagatorFormat);
              free(evecs_propagatorFormat);
              evecs_ptr0 = (spinor*)&(evecs[(2*i  )*12*N]);
              evecs_ptr1 = (spinor*)&(evecs[(2*i+1)*12*N]);
              write_spinor(evecs_writer, &evecs_ptr0, &evecs_ptr1,1, arpack_evecs_writeprec);
              destruct_writer(evecs_writer);
              evecs_writer=NULL;
            }  /* end of loop on converged eigenvectors */
          }    /* end of if N == VOLUME */

        }      /* of if arpack_evecs_fileformat */

      }        /* end of if arpack_write_ev == 1 */

    }          /* end of if arpack_read_ev == 1 */

    H        = (_Complex double *) malloc(nconv*nconv*sizeof(_Complex double)); 
    Hinv     = (_Complex double *) malloc(nconv*nconv*sizeof(_Complex double)); 
    initwork = (_Complex double *) malloc(nconv*sizeof(_Complex double)); 
    IPIV     = (int *) malloc(nconv*sizeof(int));
    zheev_lwork = 3*nconv;
    zheev_work  = (_Complex double *) malloc(zheev_lwork*sizeof(_Complex double));
    zheev_rwork = (double *) malloc(3*nconv*sizeof(double));
    hevals      = (double *) malloc(nconv*sizeof(double));

    if((H==NULL) || (Hinv==NULL) || (initwork==NULL) || (IPIV==NULL) || (zheev_lwork==NULL) || (zheev_rwork==NULL) || (hevals==NULL))
    {
       if(g_proc_id == g_stdio_proc)
          fprintf(stderr,"[arpack_cg] insufficient memory for H, Hinv, initwork, IPIV, zheev_lwork, zheev_rwork, hevals inside arpack_cg.\n");
       exit(1);
    }

    et1=gettime();
    /* compute the elements of the hermitian matrix H 
       leading dimension is nconv and active dimension is nconv */
    
    if( projection_type == 0) {
    
      for(i=0; i<nconv; i++)
      {
        assign_complex_to_spinor(r,&evecs[i*12*N],12*N);
        f(ax,r);
        c1 = scalar_prod(r,ax,N,parallel);
        H[i+nconv*i] = creal(c1);  /* diagonal should be real */
        for(j=i+1; j<nconv; j++)
        {
          assign_complex_to_spinor(r,&evecs[j*12*N],12*N);
          c1 = scalar_prod(r,ax,N,parallel);
          H[j+nconv*i] = c1;
          H[i+nconv*j] = conj(c1); /* enforce hermiticity */
        }
      }

    } else if ( projection_type == 1 )  {

      for(i=0; i<nconv; i++)
      {
        assign_complex_to_spinor(tmps1, &evecs[i*12*N], 12*N);
        f_final(r, tmps1);
        f(ax,r);
        c1 = scalar_prod(r,ax,N,parallel);
        c2 = scalar_prod(r,r,N,parallel);
        H[i+nconv*i] = creal(c1) / creal(c2);   /* diagonal should be real */
        for(j=i+1; j<nconv; j++)
        {
          assign_complex_to_spinor(tmps1, &evecs[j*12*N], 12*N);
          f_final(r, tmps1);
          c1 = scalar_prod(r,ax,N,parallel);
          c3 = scalar_prod(r, r, N, parallel);

          H[j+nconv*i] = c1 / sqrt( creal(c2) * creal(c3) );
          H[i+nconv*j] = conj(c1) / sqrt( creal(c2) * creal(c3) ); /* enforce hermiticity */
        }
      }
    }


    et2=gettime();
    if(g_proc_id == g_stdio_proc) {
      fprintf(stdout,"[arpack_cg] time to compute H: %+e\n",et2-et1);
    }

/*
    if(g_cart_id == 0) {
      for(i=0; i<nconv; i++) {
      for(j=0; j<nconv; j++) {
        fprintf(stdout, "# [arpack_cg] H[%d, %d] = %25.16e %25.16e\n", i, j, creal(H[i*nconv+j]), cimag(H[i*nconv+j]));
      }}
    }
*/



     et1=gettime();
     /* compute Ritz values and Ritz vectors if needed */
     if( (nconv>0) && (comp_evecs !=0))
     {
         HU = (_Complex double *) malloc(nconv*nconv*sizeof(_Complex double)); 
         if( HU==NULL ) {
           if(g_proc_id == g_stdio_proc)
             fprintf(stderr,"[arpack_cg] insufficient memory for HU inside arpack_cg\n");
             exit(2);
         }
         /* copy H into HU */
         tmpsize=nconv*nconv;
         _FT(zcopy)(&tmpsize,H,&ONE,HU,&ONE);

         /* compute eigenvalues and eigenvectors of HU*/
         /* SUBROUTINE ZHEEV( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, RWORK,INFO ) */
         _FT(zheev)(&cV,&cU,&nconv,HU,&nconv,hevals,zheev_work,&zheev_lwork,zheev_rwork,&zheev_info,1,1);

         if(zheev_info != 0)
         {
	    if(g_proc_id == g_stdio_proc) 
	    {
	        fprintf(stderr,"[arpack_cg] Error in ZHEEV:, info =  %d\n",zheev_info); 
                fflush(stderr);
	    }
	    exit(1);
         }

         /* If you want to replace the schur (orthonormal) basis by eigen basis
            use something like this. It is better to use the schur basis because
            they are better conditioned. Use this part only to get the eigenvalues
            and their resduals for the operator (D^\daggerD)
            esize=(ncv-nconv)*12*N;
            Zrestart_X(evecs,12*N,HU,12*N,nconv,nconv,&evecs[nconv*N],esize); */

         /* compute residuals and print out results */

	 if(g_proc_id == g_stdio_proc)
	 {fprintf(stdout,"# [arpack_cg] Ritz values of A and their residulas (||A*x-lambda*x||/||x||\n"); 
          fprintf(stdout,"# [arpack_cg] =============================================================\n");
          fflush(stdout);}

         for(i=0; i<nconv; i++)
         {
	    tmpsize=12*N;
            _FT(zgemv)(&cN,&tmpsize,&nconv,&tpone,evecs,&tmpsize,
		       &HU[i*nconv],&ONE,&tzero,tmpv1,&ONE,1);

            assign_complex_to_spinor(r,tmpv1,12*N);

            d1=square_norm(r,N,parallel);
            
            f(ax,r);

            mul_r(tmps1,hevals[i],r,N);

            diff(tmps2,ax,tmps1,N);
	    
	    d2= square_norm(tmps2,N,parallel);

            d3= sqrt(d2/d1);
	    
	    if(g_proc_id == g_stdio_proc)
	    {fprintf(stdout,"Eval[%06d]: %22.15E rnorm: %22.15E\n", i, hevals[i], d3); fflush(stdout);}
        } 
        free( HU ); HU = NULL;
     }  /* if( (nconv_arpack>0) && (comp_evecs !=0)) */
     et2=gettime();
     if(g_proc_id == g_stdio_proc) {
       fprintf(stdout,"[arpack_cg] time to compute eigenvectors: %+e\n",et2-et1);
     }

  }  /* if(ncurRHS==0) */
    
  double eps_sq_used,restart_eps_sq_used;  /* tolerance squared for the linear system */

  double cur_res; /* current residual squared */

  /*increment the RHS counter*/
  ncurRHS = ncurRHS +1; 

  /* set the tolerance to be used for this right-hand side  */
  if(ncurRHS > nrhs1){
    eps_sq_used = eps_sq;
  }
  else{
    eps_sq_used = eps_sq1;
  }
  
  if(g_proc_id == g_stdio_proc && g_debug_level > 0) {
    fprintf(stdout, "# [arpack_cg] System %d, eps_sq %e, projection type %d\n",ncurRHS,eps_sq_used, projection_type); 
    fflush(stdout);
  } 
  
  /*---------------------------------------------------------------*/
  /* Call init-CG until this right-hand side converges             */
  /*---------------------------------------------------------------*/
  double wt1,wt2,wE,wI;
  double normsq,tol_sq;
  int flag,maxit_remain,numIts,its;
  int info_lapack;

  wE = 0.0; wI = 0.0;     /* Start accumulator timers */
  flag = -1;    	  /* System has not converged yet */
  maxit_remain = maxit;   /* Initialize Max and current # of iters   */
  numIts = 0;  
  restart_eps_sq_used=res_eps_sq;

  while( flag == -1 )
  {
    
    if(nconv > 0)
    {


      /* --------------------------------------------------------- */
      /* Perform init-CG with evecs vectors                        */
      /* xinit = xinit + evecs*Hinv*evec'*(b-Ax0) 		   */
      /* --------------------------------------------------------- */
      wt1 = gettime();

      /*r0=b-Ax0*/
      f(ax,x); /*ax = A*x */
      diff(r,b,ax,N);  /* r=b-A*x */

      if( projection_type == 0) {

        /* x = x + evecs*inv(H)*evecs'*r */
        for(int i=0; i < nconv; i++)
        {
           assign_complex_to_spinor(tmps1,&evecs[i*12*N],12*N);
           initwork[i]= scalar_prod(tmps1,r,N,parallel);
        }

        /* solve the linear system H y = c */
        tmpsize=nconv*nconv;
        _FT(zcopy) (&tmpsize,H,&ONE,Hinv,&ONE); /* copy H into Hinv */
        /* SUBROUTINE ZGESV( N, NRHS, A, LDA, IPIV, B, LDB, INFO ) */
        _FT(zgesv) (&nconv,&ONE,Hinv,&nconv,IPIV,initwork,&nconv,&info_lapack);

        if(info_lapack != 0)
        {
           if(g_proc_id == g_stdio_proc) {
              fprintf(stderr, "[arpack_cg] Error in ZGESV:, info =  %d\n",info_lapack); 
              fflush(stderr);
           }
           exit(1);
        }

        /* x = x + evecs*inv(H)*evecs'*r */
        for(i=0; i<nconv; i++)
        {
          assign_complex_to_spinor(tmps1,&evecs[i*12*N],12*N);
          assign_add_mul(x,tmps1,initwork[i],N);
        }

      } else if ( projection_type == 1 ) {
        /* x = x + evecs*inv(H)*evecs'*r */

        /* tmps2 = Q^+ r */
        f_initial(tmps2, r);

        for(int i=0; i < nconv; i++) {
          /* tmps1 = v_i */
          assign_complex_to_spinor(tmps1,&evecs[i*12*N],12*N);

          /* initwork_i = v_i^+ Q^+ r / lambda_i^2 */
          initwork[i]= scalar_prod(tmps1, tmps2, N, parallel) / ( H[i*nconv+i] * H[i*nconv+i] );
        }

        memset(tmps2, 0, N*sizeof(spinor) );
        for(i=0; i<nconv; i++) {
          assign_complex_to_spinor(tmps1, &evecs[i*12*N], 12*N);
          assign_add_mul(tmps2, tmps1, initwork[i], N);
        }

        /* apply final operator */
        f_final(tmps1, tmps2);
        assign_add_mul(x, tmps1, 1., N);

      }  /* end of if projection type */

      /* compute elapsed time and add to accumulator */

      wt2 = gettime();
      wI = wI + wt2-wt1;
      
    }/* if(nconv > 0) */


    /* which tolerance to use */
    if(eps_sq_used > restart_eps_sq_used)
    {
       tol_sq = eps_sq_used;
       flag   = 1; /* shouldn't restart again */
    }
    else
    {
       tol_sq = restart_eps_sq_used;
    }

    wt1 = gettime();
    its = cg_her(x,b,maxit_remain,tol_sq,rel_prec,N,f); 
          
    wt2 = gettime();

    wE = wE + wt2-wt1;

    /* check convergence */
    if(its == -1)
    {
       /* cg didn't converge */
       if(g_proc_id == g_stdio_proc) {
         fprintf(stderr, "[arpack_cg] CG didn't converge within the maximum number of iterations in arpack_cg. Exiting...\n"); 
         fflush(stderr);
         exit(1);
         
       }
    } 
    else
    {
       numIts += its;   
       maxit_remain = maxit - numIts; /* remaining number of iterations */
       restart_eps_sq_used = restart_eps_sq_used*res_eps_sq; /* prepare for the next restart */
    }
    
  }
  /* end while (flag ==-1)               */
  
  /* ---------- */
  /* Reporting  */
  /* ---------- */
  /* compute the exact residual */
  f(ax,x); /* ax= A*x */
  diff(r,b,ax,N);  /* r=b-A*x */	
  normsq=square_norm(r,N,parallel);
  if(g_debug_level > 0 && g_proc_id == g_stdio_proc)
  {
    fprintf(stdout, "# [arpack_cg] For this rhs:\n");
    fprintf(stdout, "# [arpack_cg] Total initCG Wallclock : %+e\n", wI);
    fprintf(stdout, "# [arpack_cg] Total cg Wallclock : %+e\n", wE);
    fprintf(stdout, "# [arpack_cg] Iterations: %-d\n", numIts); 
    fprintf(stdout, "# [arpack_cg] Actual Resid of LinSys  : %+e\n",normsq);
  }


  /* free memory if this was your last system to solve */
  if(ncurRHS == nrhs){
#if ( (defined SSE) || (defined SSE2) || (defined SSE3)) 
    free(_ax);  free(_r);  free(_tmps1); free(_tmps2);
#else
    free(ax); free(r); free(tmps1); free(tmps2);
#endif
    free(evecs); free(evals); free(H); free(Hinv);
    free(initwork); free(tmpv1); free(zheev_work);
    free(hevals); free(zheev_rwork); free(IPIV);
  }


  return numIts;
}
示例#9
0
void invert_overlap(const int op_id, const int index_start) {
  operator * optr;
  void (*op)(spinor*,spinor*);
  static complex alpha={0,0};
  spinorPrecWS *ws;
  optr = &operator_list[op_id];
  op=&Dov_psi;

  /* here we need to (re)compute the kernel eigenvectors */
  /* for new gauge fields                                */

  if(g_proc_id == 0) {printf("# Not using even/odd preconditioning!\n"); fflush(stdout);}
  convert_eo_to_lexic(g_spinor_field[DUM_DERI], optr->sr0, optr->sr1);
  convert_eo_to_lexic(g_spinor_field[DUM_DERI+1], optr->prop0, optr->prop1);

  if(optr->solver == 13 ){
    optr->iterations = sumr(g_spinor_field[DUM_DERI+1],g_spinor_field[DUM_DERI] , optr->maxiter, optr->eps_sq);
  } 
  else if(optr->solver == 1 /* CG */) {

    gamma5(g_spinor_field[DUM_DERI+1], g_spinor_field[DUM_DERI], VOLUME);
  
    if(use_preconditioning==1 && g_precWS!=NULL){
      ws=(spinorPrecWS*)g_precWS;
      printf("# Using preconditioning (which one?)!\n");
    
      alpha.re=ws->precExpo[2];
      spinorPrecondition(g_spinor_field[DUM_DERI+1],g_spinor_field[DUM_DERI+1],ws,T,L,alpha,0,1);

      /* 	iter = cg_her(g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1], max_iter, precision,  */
      /* 		    rel_prec, VOLUME, &Q_pm_psi_prec); */
      optr->iterations = cg_her(g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1], optr->maxiter, optr->eps_sq,
				optr->rel_prec, VOLUME, &Qov_sq_psi_prec);
    
      alpha.re=ws->precExpo[0];
      spinorPrecondition(g_spinor_field[DUM_DERI],g_spinor_field[DUM_DERI],ws,T,L,alpha,0,1);
    
    } 
    else {
      printf("# Not using preconditioning (which one?)!\n");
      /* 	iter = cg_her(g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1], max_iter, precision,  */
      /* 		      rel_prec, VOLUME, &Q_pm_psi); */
      optr->iterations = cg_her(g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1], optr->maxiter, optr->eps_sq,
				optr->rel_prec, VOLUME, &Qov_sq_psi);
    }
  
  
    Qov_psi(g_spinor_field[DUM_DERI+1], g_spinor_field[DUM_DERI]);
  
    if(use_preconditioning == 1 && g_precWS!=NULL){
      ws=(spinorPrecWS*)g_precWS;
      alpha.re=ws->precExpo[1];
      spinorPrecondition(g_spinor_field[DUM_DERI+1],g_spinor_field[DUM_DERI+1],ws,T,L,alpha,0,1);
    }
  
  }
  
  op(g_spinor_field[4],g_spinor_field[DUM_DERI+1]);

  convert_eo_to_lexic(g_spinor_field[DUM_DERI], optr->sr0, optr->sr1);

  optr->reached_prec=diff_and_square_norm(g_spinor_field[4],g_spinor_field[DUM_DERI],VOLUME);
  
  convert_lexic_to_eo(optr->prop0, optr->prop1 , g_spinor_field[DUM_DERI+1]);

  return;
}
void cloverdetratio_derivative_orig(const int no, hamiltonian_field_t * const hf) {
  monomial * mnl = &monomial_list[no];

  /* This factor 2* a missing factor 2 in trace_lambda */
  mnl->forcefactor = 1.;

  /*********************************************************************
   *
   *  this is being run in case there is even/odd preconditioning
   * 
   * This term is det((Q^2 + \mu_1^2)/(Q^2 + \mu_2^2))
   * mu1 and mu2 are set according to the monomial
   *
   *********************************************************************/
  /* First term coming from the second field */
  /* Multiply with W_+ */
  g_mu = mnl->mu;
  g_mu3 = mnl->rho2; //rho2
  boundary(mnl->kappa);

  // we compute the clover term (1 + T_ee(oo)) for all sites x
  sw_term( (const su3**) hf->gaugefield, mnl->kappa, mnl->c_sw); 
  // we invert it for the even sites only including mu
  sw_invert(EE, mnl->mu);
  
  if(mnl->solver != CG) {
    fprintf(stderr, "Bicgstab currently not implemented, using CG instead! (detratio_monomial.c)\n");
  }
  
  mnl->Qp(g_spinor_field[DUM_DERI+2], mnl->pf);
  g_mu3 = mnl->rho; // rho1

  /* Invert Q_{+} Q_{-} */
  /* X_W -> DUM_DERI+1 */
  chrono_guess(g_spinor_field[DUM_DERI+1], g_spinor_field[DUM_DERI+2], mnl->csg_field, 
	       mnl->csg_index_array, mnl->csg_N, mnl->csg_n, VOLUME/2, mnl->Qsq);
  mnl->iter1 += cg_her(g_spinor_field[DUM_DERI+1], g_spinor_field[DUM_DERI+2], mnl->maxiter, 
		       mnl->forceprec, g_relative_precision_flag, VOLUME/2, mnl->Qsq);
  chrono_add_solution(g_spinor_field[DUM_DERI+1], mnl->csg_field, mnl->csg_index_array,
		      mnl->csg_N, &mnl->csg_n, VOLUME/2);
  /* Y_W -> DUM_DERI  */
  mnl->Qm(g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1]);
  
  /* apply Hopping Matrix M_{eo} */
  /* to get the even sites of X */
  H_eo_sw_inv_psi(g_spinor_field[DUM_DERI+2], g_spinor_field[DUM_DERI+1], EE, -mnl->mu);
  /* \delta Q sandwitched by Y_o^\dagger and X_e */
  deriv_Sb(OE, g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+2], hf, mnl->forcefactor); 
  
  /* to get the even sites of Y */
  H_eo_sw_inv_psi(g_spinor_field[DUM_DERI+3], g_spinor_field[DUM_DERI], EE, mnl->mu);
  /* \delta Q sandwitched by Y_e^\dagger and X_o */
  deriv_Sb(EO, g_spinor_field[DUM_DERI+3], g_spinor_field[DUM_DERI+1], hf, mnl->forcefactor); 

  // here comes the clover term...
  // computes the insertion matrices for S_eff
  // result is written to swp and swm
  // even/even sites sandwiched by gamma_5 Y_e and gamma_5 X_e  
  gamma5(g_spinor_field[DUM_DERI+2], g_spinor_field[DUM_DERI+2], VOLUME/2);
  sw_spinor(EO, g_spinor_field[DUM_DERI+2], g_spinor_field[DUM_DERI+3]);
  
  // odd/odd sites sandwiched by gamma_5 Y_o and gamma_5 X_o
  gamma5(g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI], VOLUME/2);
  sw_spinor(OE, g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1]);

  g_mu3 = mnl->rho2; // rho2
  
  /* Second term coming from the second field */
  /* The sign is opposite!! */
  mul_r(g_spinor_field[DUM_DERI], -1., mnl->pf, VOLUME/2);
  
  /* apply Hopping Matrix M_{eo} */
  /* to get the even sites of X */
  H_eo_sw_inv_psi(g_spinor_field[DUM_DERI+2], g_spinor_field[DUM_DERI+1], EE, -mnl->mu);
  /* \delta Q sandwitched by Y_o^\dagger and X_e */
  deriv_Sb(OE, g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+2], hf, mnl->forcefactor); 
  
  /* to get the even sites of Y */
  H_eo_sw_inv_psi(g_spinor_field[DUM_DERI+3], g_spinor_field[DUM_DERI], EE, mnl->mu);
  /* \delta Q sandwitched by Y_e^\dagger and X_o */
  deriv_Sb(EO, g_spinor_field[DUM_DERI+3], g_spinor_field[DUM_DERI+1], hf, mnl->forcefactor);

  // here comes the clover term...
  // computes the insertion matrices for S_eff
  // result is written to swp and swm
  // even/even sites sandwiched by gamma_5 Y_e and gamma_5 X_e
  gamma5(g_spinor_field[DUM_DERI+2], g_spinor_field[DUM_DERI+2], VOLUME/2);
  sw_spinor(EO, g_spinor_field[DUM_DERI+2], g_spinor_field[DUM_DERI+3]);
  
  // odd/odd sites sandwiched by gamma_5 Y_o and gamma_5 X_o
  gamma5(g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI], VOLUME/2);
  sw_spinor(OE, g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1]);

  sw_all(hf, mnl->kappa*mnl->forcefactor, mnl->c_sw);
  
  g_mu = g_mu1;
  g_mu3 = 0.;
  boundary(g_kappa);

  return;
}
示例#11
0
void ndpoly_heatbath(const int id) {
  int j;
  double temp;
  monomial * mnl = &monomial_list[id];

  (*mnl).energy0 = 0.;
  random_spinor_field(g_chi_up_spinor_field[0], VOLUME/2, (*mnl).rngrepro);
  (*mnl).energy0 = square_norm(g_chi_up_spinor_field[0], VOLUME/2, 1);

  if(g_epsbar!=0.0 || phmc_exact_poly == 0){
    random_spinor_field(g_chi_dn_spinor_field[0], VOLUME/2, (*mnl).rngrepro);
     (*mnl).energy0 += square_norm(g_chi_dn_spinor_field[0], VOLUME/2, 1);
  } 
  else {
    zero_spinor_field(g_chi_dn_spinor_field[0], VOLUME/2);
  }

  if((g_proc_id == g_stdio_proc) && (g_debug_level > 2)) {
    printf("PHMC: Here comes the computation of H_old with \n \n");
    printf("PHMC: First: random spinors and their norm  \n ");
    printf("PHMC: OLD Ennergy UP %e \n", (*mnl).energy0);
    printf("PHMC: OLD Energy  DN + UP %e \n\n", (*mnl).energy0);
  }

  if(phmc_exact_poly==0){
    QNon_degenerate(g_chi_up_spinor_field[1], g_chi_dn_spinor_field[1], 
		    g_chi_up_spinor_field[0], g_chi_dn_spinor_field[0]);
 
    for(j = 1; j < (phmc_dop_n_cheby); j++){
      assign(g_chi_up_spinor_field[0], g_chi_up_spinor_field[1], VOLUME/2);
      assign(g_chi_dn_spinor_field[0], g_chi_dn_spinor_field[1], VOLUME/2);

      Q_tau1_min_cconst_ND(g_chi_up_spinor_field[1], g_chi_dn_spinor_field[1], 
			g_chi_up_spinor_field[0], g_chi_dn_spinor_field[0], 
			phmc_root[phmc_dop_n_cheby-2+j]);
    }
    Poly_tilde_ND(g_chi_up_spinor_field[0], g_chi_dn_spinor_field[0], phmc_ptilde_cheby_coef, 
		  phmc_ptilde_n_cheby, g_chi_up_spinor_field[1], g_chi_dn_spinor_field[1]);
  } 
  else if( phmc_exact_poly==1 && g_epsbar!=0.0) {
    /* Attention this is Q * tau1, up/dn are exchanged in the input spinor  */
    /* this is used as an preconditioner */
    QNon_degenerate(g_chi_up_spinor_field[1],g_chi_dn_spinor_field[1],
		    g_chi_dn_spinor_field[0],g_chi_up_spinor_field[0]);

    assign(g_chi_up_spinor_field[0], g_chi_up_spinor_field[1], VOLUME/2);
    assign(g_chi_dn_spinor_field[0], g_chi_dn_spinor_field[1], VOLUME/2);

    /* solve Q*tau1*P(Q^2) *x=y */
    cg_her_nd(g_chi_up_spinor_field[1],g_chi_dn_spinor_field[1],
	      g_chi_up_spinor_field[0],g_chi_dn_spinor_field[0],
	      1000,1.e-16,0,VOLUME/2, Qtau1_P_ND);

    /*  phi= Bdagger phi  */
    for(j = 1; j < (phmc_dop_n_cheby); j++){
      assign(g_chi_up_spinor_field[0], g_chi_up_spinor_field[1], VOLUME/2);
      assign(g_chi_dn_spinor_field[0], g_chi_dn_spinor_field[1], VOLUME/2);
      Q_tau1_min_cconst_ND(g_chi_up_spinor_field[1], g_chi_dn_spinor_field[1],
			g_chi_up_spinor_field[0], g_chi_dn_spinor_field[0],
			phmc_root[phmc_dop_n_cheby-2+j]);
    }

    assign(g_chi_up_spinor_field[0], g_chi_up_spinor_field[1], VOLUME/2);
    assign(g_chi_dn_spinor_field[0], g_chi_dn_spinor_field[1], VOLUME/2);
  } 
  else if(phmc_exact_poly==1 && g_epsbar==0.0) {
    Qtm_pm_psi(g_chi_up_spinor_field[1], g_chi_up_spinor_field[0]);

    assign(g_chi_up_spinor_field[0], g_chi_up_spinor_field[1], VOLUME/2);

    /* solve (Q+)*(Q-)*P((Q+)*(Q-)) *x=y */
    cg_her(g_chi_up_spinor_field[1], g_chi_up_spinor_field[0],
             1000,1.e-16,0,VOLUME/2, Qtm_pm_Ptm_pm_psi);

    /*  phi= Bdagger phi  */
    for(j = 1; j < (phmc_dop_n_cheby); j++){
      assign(g_chi_up_spinor_field[0], g_chi_up_spinor_field[1], VOLUME/2);
      Qtm_pm_min_cconst_nrm(g_chi_up_spinor_field[1],
			    g_chi_up_spinor_field[0],
			    phmc_root[phmc_dop_n_cheby-2+j]);
    }
    assign(g_chi_up_spinor_field[0], g_chi_up_spinor_field[1], VOLUME/2);
  }

  assign(mnl->pf, g_chi_up_spinor_field[0], VOLUME/2);
  assign(mnl->pf2, g_chi_dn_spinor_field[0], VOLUME/2);

  temp = square_norm(g_chi_up_spinor_field[0], VOLUME/2, 1);
  if((g_proc_id == g_stdio_proc) && (g_debug_level > 2)) {
    printf("PHMC: Then: evaluate Norm of pseudofermion heatbath BHB \n ");
    printf("PHMC: Norm of BHB up squared %e \n", temp);
  }

  if(g_epsbar!=0.0 || phmc_exact_poly==0) 
    temp += square_norm(g_chi_dn_spinor_field[0], VOLUME/2, 1);

  if((g_proc_id == g_stdio_proc) && (g_debug_level > 2)){
    printf("PHMC: Norm of BHB up + BHB dn squared %e \n\n", temp);
  }
  if(g_proc_id == 0 && g_debug_level > 3) {
    printf("called ndpoly_heatbath for id %d with g_running_phmc = %d\n", id, g_running_phmc);
  }
  return;
}
示例#12
0
void cloverdet_derivative(const int id, hamiltonian_field_t * const hf) {
  monomial * mnl = &monomial_list[id];
  double atime, etime;
  atime = gettime();
  for(int i = 0; i < VOLUME; i++) { 
    for(int mu = 0; mu < 4; mu++) { 
      _su3_zero(swm[i][mu]);
      _su3_zero(swp[i][mu]);
    }
  }

  (*mnl).forcefactor = 1.;
  /*********************************************************************
   * 
   * even/odd version 
   *
   * This a term is det(\hat Q^2(\mu))
   *
   *********************************************************************/
  
  g_mu = mnl->mu;
  g_mu3 = mnl->rho;
  boundary(mnl->kappa);
  
  // we compute the clover term (1 + T_ee(oo)) for all sites x
  sw_term( (const su3**) hf->gaugefield, mnl->kappa, mnl->c_sw); 
  // we invert it for the even sites only
  sw_invert(EE, mnl->mu);
  
  if(mnl->solver != CG && g_proc_id == 0) {
    fprintf(stderr, "Bicgstab currently not implemented, using CG instead! (cloverdet_monomial.c)\n");
  }
  
  // Invert Q_{+} Q_{-}
  // X_o -> w_fields[1]
  chrono_guess(mnl->w_fields[1], mnl->pf, mnl->csg_field, mnl->csg_index_array,
	       mnl->csg_N, mnl->csg_n, VOLUME/2, mnl->Qsq);
  mnl->iter1 += cg_her(mnl->w_fields[1], mnl->pf, mnl->maxiter, mnl->forceprec, 
		       g_relative_precision_flag, VOLUME/2, mnl->Qsq);
  chrono_add_solution(mnl->w_fields[1], mnl->csg_field, mnl->csg_index_array,
		      mnl->csg_N, &mnl->csg_n, VOLUME/2);
  
  // Y_o -> w_fields[0]
  mnl->Qm(mnl->w_fields[0], mnl->w_fields[1]);
  
  // apply Hopping Matrix M_{eo}
  // to get the even sites of X_e
  H_eo_sw_inv_psi(mnl->w_fields[2], mnl->w_fields[1], EO, -mnl->mu);
  // \delta Q sandwitched by Y_o^\dagger and X_e
  deriv_Sb(OE, mnl->w_fields[0], mnl->w_fields[2], hf, mnl->forcefactor); 
  
  // to get the even sites of Y_e
  H_eo_sw_inv_psi(mnl->w_fields[3], mnl->w_fields[0], EO, mnl->mu);
  // \delta Q sandwitched by Y_e^\dagger and X_o
  // uses the gauge field in hf and changes the derivative fields in hf
  deriv_Sb(EO, mnl->w_fields[3], mnl->w_fields[1], hf, mnl->forcefactor);
  
  // here comes the clover term...
  // computes the insertion matrices for S_eff
  // result is written to swp and swm
  // even/even sites sandwiched by gamma_5 Y_e and gamma_5 X_e
  sw_spinor(EE, mnl->w_fields[2], mnl->w_fields[3], mnl->forcefactor);
  
  // odd/odd sites sandwiched by gamma_5 Y_o and gamma_5 X_o
  sw_spinor(OO, mnl->w_fields[0], mnl->w_fields[1], mnl->forcefactor);
  
  // compute the contribution for the det-part
  // we again compute only the insertion matrices for S_det
  // the result is added to swp and swm
  // even sites only!
  sw_deriv(EE, mnl->mu);
  
  // now we compute
  // finally, using the insertion matrices stored in swm and swp
  // we compute the terms F^{det} and F^{sw} at once
  // uses the gaugefields in hf and changes the derivative field in hf
  sw_all(hf, mnl->kappa, mnl->c_sw);

  g_mu = g_mu1;
  g_mu3 = 0.;
  boundary(g_kappa);
  etime = gettime();
  if(g_debug_level > 1 && g_proc_id == 0) {
    printf("# Time for %s monomial derivative: %e s\n", mnl->name, etime-atime);
  }
  return;
}
示例#13
0
文件: jdher.c 项目: Finkenrath/tmLQCD
void jdher(int n, int lda, double tau, double tol, 
	   int kmax, int jmax, int jmin, int itmax,
	   int blksize, int blkwise, 
	   int V0dim, _Complex double *V0, 
	   int solver_flag, 
	   int linitmax, double eps_tr, double toldecay,
	   int verbosity,
	   int *k_conv, _Complex double *Q, double *lambda, int *it,
	   int maxmin, int shift_mode,
	   matrix_mult A_psi) {

  /****************************************************************************
   *                                                                          *
   * Local variables                                                          *
   *                                                                          *
   ****************************************************************************/
  
  /* constants */

  /* allocatables: 
   * initialize with NULL, so we can free even unallocated ptrs */
  double *s = NULL, *resnrm = NULL, *resnrm_old = NULL, *dtemp = NULL, *rwork = NULL;

  _Complex double *V_ = NULL, *V, *Vtmp = NULL, *U = NULL, *M = NULL, *Z = NULL,
    *Res_ = NULL, *Res,
    *eigwork = NULL, *temp1_ = NULL, *temp1;

  int *idx1 = NULL, *idx2 = NULL, 
    *convind = NULL, *keepind = NULL, *solvestep = NULL, 
    *actcorrits = NULL;

  /* non-allocated ptrs */
  _Complex double *q, *v, *u, *r = NULL;  
/*   _Complex double *matdummy, *vecdummy; */

  /* scalar vars */
  double theta, alpha, it_tol;

  int i, k, j, actblksize, eigworklen, found, conv, keep, n2;
  int act, cnt, idummy, info, CntCorrIts=0, endflag=0;
  int N = n*sizeof(_Complex double)/sizeof(spinor);

  /* variables for random number generator */
  int IDIST = 1;
  int ISEED[4] = {2, 3, 5, 7};
  ISEED[0] = g_proc_id+2;

  /****************************************************************************
   *                                                                          *
   * Execution starts here...                                                 *
   *                                                                          *
   ****************************************************************************/


  /* print info header */
  if ((verbosity > 2) && (g_proc_id == 0)){
    printf("Jacobi-Davidson method for hermitian Matrices\n");
    printf("Solving  A*x = lambda*x \n\n");
    printf("  N=      %10d  ITMAX=%4d\n", n, itmax);
    printf("  KMAX=%3d  JMIN=%3d  JMAX=%3d  V0DIM=%3d\n", 
	   kmax, jmin, jmax, V0dim);
    printf("  BLKSIZE=        %2d  BLKWISE=      %5s\n", 
	   blksize, blkwise ? "TRUE" : "FALSE");
    printf("  TOL=  %11.4e TAU=  %11.4e\n", 
	   tol, tau);
    printf("  LINITMAX=    %5d  EPS_TR=  %10.3e  TOLDECAY=%9.2e\n", 
	   linitmax, eps_tr, toldecay);
    printf("\n Computing %s eigenvalues\n",
	   maxmin ? "maximal" : "minimal");
    printf("\n");
    fflush( stdout );
  }

  /* validate input parameters */
  if(tol <= 0) jderrorhandler(401,"");
  if(kmax <= 0 || kmax > n) jderrorhandler(402,"");
  if(jmax <= 0 || jmax > n) jderrorhandler(403,"");
  if(jmin <= 0 || jmin > jmax) jderrorhandler(404,"");
  if(itmax < 0) jderrorhandler(405,"");
  if(blksize > jmin || blksize > (jmax - jmin)) jderrorhandler(406,"");
  if(blksize <= 0 || blksize > kmax) jderrorhandler(406,"");
  if(blkwise < 0 || blkwise > 1) jderrorhandler(407,"");
  if(V0dim < 0 || V0dim >= jmax) jderrorhandler(408,"");
  if(linitmax < 0) jderrorhandler(409,"");
  if(eps_tr < 0.) jderrorhandler(500,"");
  if(toldecay <= 1.0) jderrorhandler(501,"");
  
  CONE = 1.;
  CZERO = 0.;
  CMONE = -1.;

  /* Get hardware-dependent values:
   * Opt size of workspace for ZHEEV is (NB+1)*j, where NB is the opt.
   * block size... */
  eigworklen = (2 + _FT(ilaenv)(&ONE, filaenv, fvu, &jmax, &MONE, &MONE, &MONE, 6, 2)) * jmax;

  /* Allocating memory for matrices & vectors */ 

  if((void*)(V_ = (_Complex double *)malloc((lda * jmax + 4) * sizeof(_Complex double))) == NULL) {
    errno = 0;
    jderrorhandler(300,"V in jdher");
  }
#if (defined SSE || defined SSE2 || defined SSE3)
  V = (_Complex double*)(((unsigned long int)(V_)+ALIGN_BASE)&~ALIGN_BASE);
#else
  V = V_;
#endif
  if((void*)(U = (_Complex double *)malloc(jmax * jmax * sizeof(_Complex double))) == NULL) {
    jderrorhandler(300,"U in jdher");
  }
  if((void*)(s = (double *)malloc(jmax * sizeof(double))) == NULL) {
    jderrorhandler(300,"s in jdher");
  }
  if((void*)(Res_ = (_Complex double *)malloc((lda * blksize+4) * sizeof(_Complex double))) == NULL) {
    jderrorhandler(300,"Res in jdher");
  }
#if (defined SSE || defined SSE2 || defined SSE3)
  Res = (_Complex double*)(((unsigned long int)(Res_)+ALIGN_BASE)&~ALIGN_BASE);
#else
  Res = Res_;
#endif
  if((void*)(resnrm = (double *)malloc(blksize * sizeof(double))) == NULL) {
    jderrorhandler(300,"resnrm in jdher");
  }
  if((void*)(resnrm_old = (double *)calloc(blksize,sizeof(double))) == NULL) {
    jderrorhandler(300,"resnrm_old in jdher");
  }
  if((void*)(M = (_Complex double *)malloc(jmax * jmax * sizeof(_Complex double))) == NULL) {
    jderrorhandler(300,"M in jdher");
  }
  if((void*)(Vtmp = (_Complex double *)malloc(jmax * jmax * sizeof(_Complex double))) == NULL) {
    jderrorhandler(300,"Vtmp in jdher");
  }
  if((void*)(p_work = (_Complex double *)malloc(lda * sizeof(_Complex double))) == NULL) {
    jderrorhandler(300,"p_work in jdher");
  }

  /* ... */
  if((void*)(idx1 = (int *)malloc(jmax * sizeof(int))) == NULL) {
    jderrorhandler(300,"idx1 in jdher");
  }
  if((void*)(idx2 = (int *)malloc(jmax * sizeof(int))) == NULL) {
    jderrorhandler(300,"idx2 in jdher");
  }

  /* Indices for (non-)converged approximations */
  if((void*)(convind = (int *)malloc(blksize * sizeof(int))) == NULL) {
    jderrorhandler(300,"convind in jdher");
  }
  if((void*)(keepind = (int *)malloc(blksize * sizeof(int))) == NULL) {
    jderrorhandler(300,"keepind in jdher");
  }
  if((void*)(solvestep = (int *)malloc(blksize * sizeof(int))) == NULL) {
    jderrorhandler(300,"solvestep in jdher");
  }
  if((void*)(actcorrits = (int *)malloc(blksize * sizeof(int))) == NULL) {
    jderrorhandler(300,"actcorrits in jdher");
  }

  if((void*)(eigwork = (_Complex double *)malloc(eigworklen * sizeof(_Complex double))) == NULL) {
    jderrorhandler(300,"eigwork in jdher");
  }
  if((void*)(rwork = (double *)malloc(3*jmax * sizeof(double))) == NULL) {
    jderrorhandler(300,"rwork in jdher");
  }
  if((void*)(temp1_ = (_Complex double *)malloc((lda+4) * sizeof(_Complex double))) == NULL) {
    jderrorhandler(300,"temp1 in jdher");
  }
#if (defined SSE || defined SSE2 || defined SSE3)
  temp1 = (_Complex double*)(((unsigned long int)(temp1_)+ALIGN_BASE)&~ALIGN_BASE);
#else
  temp1 = temp1_;
#endif
  if((void*)(dtemp = (double *)malloc(lda * sizeof(_Complex double))) == NULL) {
    jderrorhandler(300,"dtemp in jdher");
  }

  /* Set variables for Projection routines */
  n2 = 2*n;
  p_n = n;
  p_n2 = n2;
  p_Q = Q;
  p_A_psi = A_psi;
  p_lda = lda;

  /**************************************************************************
   *                                                                        *
   * Generate initial search subspace V. Vectors are taken from V0 and if   *
   * necessary randomly generated.                                          *
   *                                                                        *
   **************************************************************************/

  /* copy V0 to V */
  _FT(zlacpy)(fupl_a, &n, &V0dim, V0, &lda, V, &lda, 1);
  j = V0dim;
  /* if V0dim < blksize: generate additional random vectors */
  if (V0dim < blksize) {
    idummy = (blksize - V0dim)*n; /* nof random numbers */
    _FT(zlarnv)(&IDIST, ISEED, &idummy, V + V0dim*lda);
    j = blksize;
  }
  for (cnt = 0; cnt < j; cnt ++) {
    ModifiedGS(V + cnt*lda, n, cnt, V, lda);
    alpha = sqrt(square_norm((spinor*)(V+cnt*lda), N, 1));
    alpha = 1.0 / alpha;
    _FT(dscal)(&n2, &alpha, (double *)(V + cnt*lda), &ONE);
  }

  /* Generate interaction matrix M = V^dagger*A*V. Only the upper triangle
     is computed. */
  for (cnt = 0; cnt < j; cnt++){
    A_psi((spinor*) temp1, (spinor*)(V+cnt*lda));
    idummy = cnt+1;
    for(i = 0; i < idummy; i++){
      M[cnt*jmax+i] = scalar_prod((spinor*)(V+i*lda), (spinor*) temp1, N, 1);
    }
  }

  /* Other initializations */
  k = 0; (*it) = 0; 
  if((*k_conv) > 0) {
    k = *k_conv;
  }

  actblksize = blksize; 
  for(act = 0; act < blksize; act ++){
    solvestep[act] = 1;
  }


  /****************************************************************************
   *                                                                          *
   * Main JD-iteration loop                                                   *
   *                                                                          *
   ****************************************************************************/

  while((*it) < itmax) {

    /****************************************************************************
     *                                                                          *
     * Solving the projected eigenproblem                                       *
     *                                                                          *
     * M*u = V^dagger*A*V*u = s*u                                                     *
     * M is hermitian, only the upper triangle is stored                        *
     *                                                                          *
     ****************************************************************************/
    _FT(zlacpy)(fupl_u, &j, &j, M, &jmax, U, &jmax, 1);
    _FT(zheev)(fupl_v, fupl_u, &j, U, &jmax, s, eigwork, &eigworklen, rwork, &info, 1, 1); 

    if (info != 0) {
      printf("error solving the projected eigenproblem.");
      printf(" zheev: info = %d\n", info);
    }
    if(info != 0) jderrorhandler(502,"proble in zheev");
  

    /* Reverse order of eigenvalues if maximal value is needed */
    if(maxmin == 1){
      sorteig(j, s, U, jmax, s[j-1], dtemp, idx1, idx2, 0); 
    }
    else{
      sorteig(j, s, U, jmax, 0., dtemp, idx1, idx2, 0); 
    }
    /****************************************************************************
     *                                                                          *
     * Convergence/Restart Check                                                *
     *                                                                          *
     * In case of convergence, strip off a whole block or just the converged    *
     * ones and put 'em into Q.  Update the matrices Q, V, U, s                 *
     *                                                                          *
     * In case of a restart update the V, U and M matrices and recompute the    *
     * Eigenvectors                                                             *
     *                                                                          *
     ****************************************************************************/

    found = 1;
    while(found) {

      /* conv/keep = Number of converged/non-converged Approximations */
      conv = 0; keep = 0;

      for(act=0; act < actblksize; act++){

	/* Setting pointers for single vectors */
	q = Q + (act+k)*lda; 
	u = U + act*jmax; 
	r = Res + act*lda; 
	
	/* Compute Ritz-Vector Q[:,k+cnt1]=V*U[:,cnt1] */
	theta = s[act];
	_FT(zgemv)(fupl_n, &n, &j, &CONE, V, &lda, u, &ONE, &CZERO, q, &ONE, 1);

	/* Compute the residual */
	A_psi((spinor*) r, (spinor*) q); 
	theta = -theta;
	_FT(daxpy)(&n2, &theta, (double*) q, &ONE, (double*) r, &ONE);

	/* Compute norm of the residual and update arrays convind/keepind*/
	resnrm_old[act] = resnrm[act];
	resnrm[act] = sqrt(square_norm((spinor*) r, N, 1));
	if (resnrm[act] < tol){
	  convind[conv] = act; 
	  conv = conv + 1; 
	}
	else{
	  keepind[keep] = act; 
	  keep = keep + 1; 
	}
	
      }  /* for(act = 0; act < actblksize; act ++) */

      /* Check whether the blkwise-mode is chosen and ALL the
	 approximations converged, or whether the strip-off mode is
	 active and SOME of the approximations converged */

      found = ((blkwise==1 && conv==actblksize) || (blkwise==0 && conv!=0)) 
	&& (j > actblksize || k == kmax - actblksize);
      
      /***************************************************************************
	*                                                                        *
	* Convergence Case                                                       *
	*                                                                        *
	* In case of convergence, strip off a whole block or just the converged  *
	* ones and put 'em into Q.  Update the matrices Q, V, U, s               *
	*                                                                        *
	**************************************************************************/

      if (found) {

	/* Store Eigenvalues */
	for(act = 0; act < conv; act++)
	  lambda[k+act] = s[convind[act]];
	 
	/* Re-use non approximated Ritz-Values */
	for(act = 0; act < keep; act++)
	  s[act] = s[keepind[act]];

	/* Shift the others in the right position */
	for(act = 0; act < (j-actblksize); act ++)
	  s[act+keep] = s[act+actblksize];

	/* Update V. Re-use the V-Vectors not looked at yet. */
	idummy = j - actblksize;
	for (act = 0; act < n; act = act + jmax) {
	  cnt = act + jmax > n ? n-act : jmax;
	  _FT(zlacpy)(fupl_a, &cnt, &j, V+act, &lda, Vtmp, &jmax, 1);
	  _FT(zgemm)(fupl_n, fupl_n, &cnt, &idummy, &j, &CONE, Vtmp, 
		     &jmax, U+actblksize*jmax, &jmax, &CZERO, V+act+keep*lda, &lda, 1, 1);
	}

	/* Insert the not converged approximations as first columns in V */
	for(act = 0; act < keep; act++){
	  _FT(zlacpy)(fupl_a,&n,&ONE,Q+(k+keepind[act])*lda,&lda,V+act*lda,&lda,1);
	}

	/* Store Eigenvectors */
	for(act = 0; act < conv; act++){
	  _FT(zlacpy)(fupl_a,&n,&ONE,Q+(k+convind[act])*lda,&lda,Q+(k+act)*lda,&lda,1);
	}

	/* Update SearchSpaceSize j */
	j = j - conv;

	/* Let M become a diagonalmatrix with the Ritzvalues as entries ... */ 
	_FT(zlaset)(fupl_u, &j, &j, &CZERO, &CZERO, M, &jmax, 1);
	for (act = 0; act < j; act++)
	  M[act*jmax + act] = s[act];
	
	/* ... and U the Identity(jnew,jnew) */
	_FT(zlaset)(fupl_a, &j, &j, &CZERO, &CONE, U, &jmax, 1);

	if(shift_mode == 1){
	  if(maxmin == 0){
	    for(act = 0; act < conv; act ++){
	      if (lambda[k+act] > tau){
		tau = lambda[k+act];
	      }
	    }
	  }
	  else{
	    for(act = 0; act < conv; act ++){
	      if (lambda[k+act] < tau){
		tau = lambda[k+act];
	      }
	    } 
	  }
	}
	 
	/* Update Converged-Eigenpair-counter and Pro_k */
	k = k + conv;

	/* Update the new blocksize */
	actblksize=min(blksize, kmax-k);

	/* Exit main iteration loop when kmax eigenpairs have been
           approximated */
	if (k == kmax){
	  endflag = 1;
	  break;
	}
	/* Counter for the linear-solver-accuracy */
	for(act = 0; act < keep; act++)
	  solvestep[act] = solvestep[keepind[act]];

	/* Now we expect to have the next eigenvalues */
	/* allready with some accuracy                */
	/* So we do not need to start from scratch... */
	for(act = keep; act < blksize; act++)
	  solvestep[act] = 1;

      } /* if(found) */
      if(endflag == 1){
	break;
      }
      /**************************************************************************
       *                                                                        *
       * Restart                                                                *
       *                                                                        *
       * The Eigenvector-Aproximations corresponding to the first jmin          *
       * Petrov-Vectors are kept.  if (j+actblksize > jmax) {                   *
       *                                                                        *
       **************************************************************************/
      if (j+actblksize > jmax) {

	idummy = j; j = jmin;

	for (act = 0; act < n; act = act + jmax) { /* V = V * U(:,1:j) */
	  cnt = act+jmax > n ? n-act : jmax;
	  _FT(zlacpy)(fupl_a, &cnt, &idummy, V+act, &lda, Vtmp, &jmax, 1);
	  _FT(zgemm)(fupl_n, fupl_n, &cnt, &j, &idummy, &CONE, Vtmp, 
		     &jmax, U, &jmax, &CZERO, V+act, &lda, 1, 1);
	}
	  
	_FT(zlaset)(fupl_a, &j, &j, &CZERO, &CONE, U, &jmax, 1);
	_FT(zlaset)(fupl_u, &j, &j, &CZERO, &CZERO, M, &jmax, 1);
	for (act = 0; act < j; act++)
	  M[act*jmax + act] = s[act];
      }

    } /* while(found) */    

    if(endflag == 1){
      break;
    }

    /****************************************************************************
     *                                                                          *
     * Solving the correction equations                                         *
     *                                                                          *
     *                                                                          *
     ****************************************************************************/

    /* Solve actblksize times the correction equation ... */
    for (act = 0; act < actblksize; act ++) {      

      /* Setting start-value for vector v as zeros(n,1). Guarantees
         orthogonality */
      v = V + j*lda;
      for (cnt = 0; cnt < n; cnt ++){ 
	v[cnt] = 0.;
      }

      /* Adaptive accuracy and shift for the lin.solver. In case the
	 residual is big, we don't need a too precise solution for the
	 correction equation, since even in exact arithmetic the
	 solution wouldn't be too usefull for the Eigenproblem. */
      r = Res + act*lda;

      if (resnrm[act] < eps_tr && resnrm[act] < s[act] && resnrm_old[act] > resnrm[act]){
	p_theta = s[act];
      }
      else{
	p_theta = tau;
      }
      p_k = k + actblksize;

      /* if we are in blockwise mode, we do not want to */
      /* iterate solutions much more, if they have      */
      /* allready the desired precision                 */
      if(blkwise == 1 && resnrm[act] < tol) {
	it_tol = pow(toldecay, (double)(-5));
      }
      else {
	it_tol = pow(toldecay, (double)(-solvestep[act]));
      }
      solvestep[act] = solvestep[act] + 1;


      /* equation and project if necessary */
      ModifiedGS(r, n, k + actblksize, Q, lda);

      /* Solve the correction equation ...  */
      g_sloppy_precision = 1;
      if(solver_flag == GMRES){
/* 	info = gmres((spinor*) v, (spinor*) r, 10, linitmax/10, it_tol*it_tol, &Proj_A_psi, &Proj_A_psi); */
	info = gmres((spinor*) v, (spinor*) r, 10, linitmax/10, it_tol*it_tol, 0, 
		     n*sizeof(_Complex double)/sizeof(spinor), 1, &Proj_A_psi);
      }
      if(solver_flag == CGS){
	info = cgs_real((spinor*) v, (spinor*) r, linitmax, it_tol*it_tol, 0,
			n*sizeof(_Complex double)/sizeof(spinor), &Proj_A_psi);
      }
      else if (solver_flag == BICGSTAB){
	info = bicgstab_complex((spinor*) v, (spinor*) r, linitmax, it_tol*it_tol, 0,
				n*sizeof(_Complex double)/sizeof(spinor), &Proj_A_psi);
      }
      else if (solver_flag == CG){
	info = cg_her((spinor*) v, (spinor*) r, linitmax, it_tol*it_tol, 0, 
		      n*sizeof(_Complex double)/sizeof(spinor), &Proj_A_psi);
      }
      else{
 	info = gmres((spinor*) v, (spinor*) r, 10, linitmax, it_tol*it_tol, 0,
		     n*sizeof(_Complex double)/sizeof(spinor), 1, &Proj_A_psi); 
      }
      g_sloppy_precision = 0;

      /* Actualizing profiling data */
      if (info == -1){
	CntCorrIts += linitmax;
      }
      else{
	CntCorrIts += info;
      }
      actcorrits[act] = info;

      /* orthonormalize v to Q, cause the implicit
	 orthogonalization in the solvers may be too inaccurate. Then
	 apply "IteratedCGS" to prevent numerical breakdown 
         in order to orthogonalize v to V */

      ModifiedGS(v, n, k+actblksize, Q, lda);
      IteratedClassicalGS(v, &alpha, n, j, V, temp1, lda);

      alpha = 1.0 / alpha;
      _FT(dscal)(&n2, &alpha, (double*) v, &ONE);
      
      /* update interaction matrix M */
      A_psi((spinor*) temp1, (spinor*) v);
      idummy = j+1;
      for(i = 0; i < idummy; i++) {
	M[j*jmax+i] = scalar_prod((spinor*)(V+i*lda), (spinor*) temp1, N, 1);
      }
      
      /* Increasing SearchSpaceSize j */
      j ++;
    }   /* for (act = 0;act < actblksize; act ++) */    

    /* Print information line */
    if(g_proc_id == 0) {
      print_status(verbosity, *it, k, j - blksize, kmax, blksize, actblksize, 
		   s, resnrm, actcorrits);
    }

    /* Increase iteration-counter for outer loop  */
    (*it) = (*it) + 1;

  } /* Main iteration loop */
  
  /******************************************************************
   *                                                                *
   * Eigensolutions converged or iteration limit reached            *
   *                                                                *
   * Print statistics. Free memory. Return.                         *
   *                                                                *
   ******************************************************************/

  (*k_conv) = k;
  if (g_proc_id == 0 && verbosity > 0) {
    printf("\nJDHER execution statistics\n\n");
    printf("IT_OUTER=%d   IT_INNER_TOT=%d   IT_INNER_AVG=%8.2f\n",
	   (*it), CntCorrIts, (double)CntCorrIts/(*it));
    printf("\nConverged eigensolutions in order of convergence:\n");
    printf("\n  I              LAMBDA(I)      RES(I)\n");
    printf("---------------------------------------\n");
  }    
  for (act = 0; act < *k_conv; act ++) {
    /* Compute the residual for solution act */
    q = Q + act*lda;
    theta = -lambda[act];
    A_psi((spinor*) r, (spinor*) q);
    _FT(daxpy)(&n2, &theta, (double*) q, &ONE, (double*) r, &ONE);
    alpha = sqrt(square_norm((spinor*) r, N, 1));
    if(g_proc_id == 0 && verbosity > 0) {
      printf("%3d %22.15e %12.5e\n", act+1, lambda[act],
	     alpha);
    }
  }
  if(g_proc_id == 0 && verbosity > 0) {
    printf("\n");
    fflush( stdout );
  }

  free(V_); free(Vtmp); free(U); 
  free(s); free(Res_); 
  free(resnrm); free(resnrm_old); 
  free(M); free(Z);
  free(eigwork); free(temp1_);
  free(dtemp); free(rwork);
  free(p_work);
  free(idx1); free(idx2); 
  free(convind); free(keepind); free(solvestep); free(actcorrits);
  
} /* jdher(.....) */
示例#14
0
int invert_clover_eo(spinor * const Even_new, spinor * const Odd_new,
                     spinor * const Even, spinor * const Odd,
                     const double precision, const int max_iter,
                     const int solver_flag, const int rel_prec,solver_params_t solver_params,
                     su3 *** gf, matrix_mult Qsq, matrix_mult Qm) {
    int iter;

    if(g_proc_id == 0 && g_debug_level > 0) {
        printf("# Using even/odd preconditioning!\n");
        fflush(stdout);
    }

    assign_mul_one_sw_pm_imu_inv(EE, Even_new, Even, +g_mu);

    Hopping_Matrix(OE, g_spinor_field[DUM_DERI], Even_new);
    /* The sign is plus, since in Hopping_Matrix */
    /* the minus is missing                      */
    assign_mul_add_r(g_spinor_field[DUM_DERI], +1., Odd, VOLUME/2);
    /* Do the inversion with the preconditioned  */
    /* matrix to get the odd sites               */

    /* Here we invert the hermitean operator squared */
    gamma5(g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI], VOLUME/2);
    if(g_proc_id == 0) {
        //printf("# Using CG!\n");
        printf("# mu = %f, kappa = %f, csw = %f\n",
               g_mu/2./g_kappa, g_kappa, g_c_sw);
        fflush(stdout);
    }

    if(solver_flag == CG) {
        if(g_proc_id == 0) {
            printf("# Using CG!\n");
            fflush(stdout);
        }
        iter = cg_her(Odd_new, g_spinor_field[DUM_DERI], max_iter,
                      precision, rel_prec,
                      VOLUME/2, Qsq);
        Qm(Odd_new, Odd_new);
    } else if(solver_flag == INCREIGCG) {

        if(g_proc_id == 0) {
            printf("# Using Incremental Eig-CG!\n");
            fflush(stdout);
        }
        iter = incr_eigcg(VOLUME/2,solver_params.eigcg_nrhs, solver_params.eigcg_nrhs1, Odd_new, g_spinor_field[DUM_DERI], solver_params.eigcg_ldh, Qsq,
                          solver_params.eigcg_tolsq1, solver_params.eigcg_tolsq, solver_params.eigcg_restolsq , solver_params.eigcg_rand_guess_opt,
                          rel_prec, max_iter, solver_params.eigcg_nev, solver_params.eigcg_vmax);
        Qm(Odd_new, Odd_new);

    } else {
        if(g_proc_id == 0) {
            printf("# This solver is not available for this operator. Exisiting!\n");
            fflush(stdout);
        }
        return 0;
    }


    /* Reconstruct the even sites                */
    Hopping_Matrix(EO, g_spinor_field[DUM_DERI], Odd_new);
    clover_inv(g_spinor_field[DUM_DERI], +1, g_mu);
    /* The sign is plus, since in Hopping_Matrix */
    /* the minus is missing                      */
    assign_add_mul_r(Even_new, g_spinor_field[DUM_DERI], +1., VOLUME/2);

    return(iter);
}
示例#15
0
void cloverdet_derivative(const int id, hamiltonian_field_t * const hf) {
  monomial * mnl = &monomial_list[id];

  /* This factor 2 a missing factor 2 in trace_lambda */
  (*mnl).forcefactor = 2.;

  
  /*********************************************************************
   * 
   * even/odd version 
   *
   * This a term is det(\hat Q^2(\mu))
   *
   *********************************************************************/
  
  g_mu = mnl->mu;
  g_mu3 = mnl->rho;
  boundary(mnl->kappa);
  
  // we compute the clover term (1 + T_ee(oo)) for all sites x
  sw_term(hf->gaugefield, mnl->kappa, mnl->c_sw); 
  // we invert it for the even sites only
  sw_invert(EE, mnl->mu);
  
  if(mnl->solver != CG && g_proc_id == 0) {
    fprintf(stderr, "Bicgstab currently not implemented, using CG instead! (cloverdet_monomial.c)\n");
  }
  
  // Invert Q_{+} Q_{-}
  // X_o -> DUM_DERI+1
  chrono_guess(g_spinor_field[DUM_DERI+1], mnl->pf, mnl->csg_field, mnl->csg_index_array,
	       mnl->csg_N, mnl->csg_n, VOLUME/2, mnl->Qsq);
  mnl->iter1 += cg_her(g_spinor_field[DUM_DERI+1], mnl->pf, mnl->maxiter, mnl->forceprec, 
		       g_relative_precision_flag, VOLUME/2, mnl->Qsq);
  chrono_add_solution(g_spinor_field[DUM_DERI+1], mnl->csg_field, mnl->csg_index_array,
		      mnl->csg_N, &mnl->csg_n, VOLUME/2);
  
  // Y_o -> DUM_DERI
  mnl->Qm(g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1]);
  
  // apply Hopping Matrix M_{eo}
  // to get the even sites of X_e
  H_eo_sw_inv_psi(g_spinor_field[DUM_DERI+2], g_spinor_field[DUM_DERI+1], EE, -mnl->mu);
  // \delta Q sandwitched by Y_o^\dagger and X_e
  deriv_Sb(OE, g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+2], hf); 
  
  // to get the even sites of Y_e
  H_eo_sw_inv_psi(g_spinor_field[DUM_DERI+3], g_spinor_field[DUM_DERI], EE, mnl->mu);
  // \delta Q sandwitched by Y_e^\dagger and X_o
  // uses the gauge field in hf and changes the derivative fields in hf
  deriv_Sb(EO, g_spinor_field[DUM_DERI+3], g_spinor_field[DUM_DERI+1], hf);
  
  // here comes the clover term...
  // computes the insertion matrices for S_eff
  // result is written to swp and swm
  // even/even sites sandwiched by gamma_5 Y_e and gamma_5 X_e
  gamma5(g_spinor_field[DUM_DERI+2], g_spinor_field[DUM_DERI+2], VOLUME/2);
  sw_spinor(EO, g_spinor_field[DUM_DERI+2], g_spinor_field[DUM_DERI+3]);
  
  // odd/odd sites sandwiched by gamma_5 Y_o and gamma_5 X_o
  gamma5(g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI], VOLUME/2);
  sw_spinor(OE, g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1]);
  
  // compute the contribution for the det-part
  // we again compute only the insertion matrices for S_det
  // the result is added to swp and swm
  // even sites only!
  sw_deriv(EE, mnl->mu);
  
  // now we compute
  // finally, using the insertion matrices stored in swm and swp
  // we compute the terms F^{det} and F^{sw} at once
  // uses the gaugefields in hf and changes the derivative field in hf
  sw_all(hf, mnl->kappa, mnl->c_sw);

  g_mu = g_mu1;
  g_mu3 = 0.;
  boundary(g_kappa);

  return;
}