Example #1
0
int mr(spinor * const P, spinor * const Q,
       const int max_iter, const double eps_sq,
       const int rel_prec, const int N, const int parallel, 
       matrix_mult f){
  int i=0;
  double norm_r,beta;
  _Complex double alpha;
  spinor * r;
  spinor ** solver_field = NULL;
  const int nr_sf = 3;
  
  if(N == VOLUME) {
    init_solver_field(&solver_field, VOLUMEPLUSRAND, nr_sf);
  }
  else {
    init_solver_field(&solver_field, VOLUMEPLUSRAND/2, nr_sf);
  }
  r = solver_field[0];
  
  zero_spinor_field(P, N);
  f(solver_field[2], P);
  diff(r, Q, solver_field[2], N);
  norm_r=square_norm(solver_field[0], N, parallel);
  if(g_proc_id == g_stdio_proc && g_debug_level > 2) {
    printf("MR iteration number: %d, |res|^2 = %e\n", i, norm_r); 
    fflush( stdout );
  }
  while((norm_r > eps_sq) && (i < max_iter)){
    i++;
    f(solver_field[1], r);
    alpha=scalar_prod(solver_field[1], r, N, parallel);
    beta=square_norm(solver_field[1], N, parallel);
    alpha /= beta;
    assign_add_mul(P, r, alpha, N);
    if(i%50 == 0){
      f(solver_field[2], P);
    }
    else{
      assign_add_mul(solver_field[2], solver_field[1], alpha, N);
    }

    diff(r, Q, solver_field[2], N);
    norm_r=square_norm(solver_field[0], N, parallel);
    if(g_proc_id == g_stdio_proc && g_debug_level > 2) {
      printf("# MR iteration= %d  |res|^2= %g\n", i, norm_r); 
      fflush(stdout);
    }
  }
  finalize_solver(solver_field, nr_sf);
  if(norm_r > eps_sq){
    return(-1);
  }
  return(i);
}
Example #2
0
void Msap(spinor * const P, spinor * const Q, const int Ncy, const int Niter) {
  int blk, ncy = 0, eo, vol;
  spinor * r, * a, * b;
  double nrm;
  spinor ** solver_field = NULL;
  const int nr_sf = 6;

  /* 
   * here it would be probably better to get the working fields as a parameter 
   * from the calling function
   */
  init_solver_field(&solver_field, VOLUME, nr_sf);
  r = solver_field[0];
  a = solver_field[1];
  b = solver_field[2];

  for(ncy = 0; ncy < Ncy; ncy++) {
    /* compute the global residue        */
    /* this can be done more efficiently */
    /* here only a naive implementation  */
    for(eo = 0; eo < 2; eo++) {
      D_psi(r, P);
      diff(r, Q, r, VOLUME);
      nrm = square_norm(r, VOLUME, 1);
      if(g_proc_id == 0 && g_debug_level > 2 && eo == 1) {  /*  GG, was 1 */
	printf("Msap: %d %1.3e\n", ncy, nrm);
	fflush(stdout);
      }
      /* choose the even (odd) block */

      /*blk = eolist[eo];*/

      for (blk = 0; blk < nb_blocks; blk++) {
	if(block_list[blk].evenodd == eo) {
	  vol = block_list[blk].volume;

	  /* get part of r corresponding to block blk into b */
	  copy_global_to_block(b, r, blk);
	  // does this work?? i.e. solver_field[3]
	  mrblk(a, b, solver_field[3], Niter, 1.e-31, 1, vol, &dummy_Di, blk);

	  /* add a up to full spinor P */
	  add_block_to_global(P, a, blk);
	}
      }
    }
  }
  finalize_solver(solver_field, nr_sf);
  return;
}
Example #3
0
void CGeoSmoother(spinor * const P, spinor * const Q, const int Ncy, const int dummy) {
  spinor ** solver_field = NULL;
  const int nr_sf = 5;
  double musave = g_mu;
  g_mu = g_mu1;
  init_solver_field(&solver_field, VOLUMEPLUSRAND/2, nr_sf);
  
  convert_lexic_to_eo(solver_field[0], solver_field[1], Q);
  if(g_c_sw > 0)
    assign_mul_one_sw_pm_imu_inv(EE,solver_field[2], solver_field[0], g_mu);
  else
    assign_mul_one_pm_imu_inv(solver_field[2], solver_field[0], +1., VOLUME/2);
  
  Hopping_Matrix(OE, solver_field[4], solver_field[2]); 
  /* The sign is plus, since in Hopping_Matrix */
  /* the minus is missing                      */
  assign_mul_add_r(solver_field[4], +1., solver_field[1], VOLUME/2);
  /* Do the inversion with the preconditioned  */
  /* matrix to get the odd sites               */
  gamma5(solver_field[4], solver_field[4], VOLUME/2);
  if(g_c_sw > 0) {
    cg_her(solver_field[3], solver_field[4], Ncy, 1.e-8, 1, 
	   VOLUME/2, &Qsw_pm_psi);
    Qsw_minus_psi(solver_field[3], solver_field[3]);
    
    /* Reconstruct the even sites                */
    Hopping_Matrix(EO, solver_field[2], solver_field[3]);
    assign_mul_one_sw_pm_imu_inv(EE,solver_field[4],solver_field[2], g_mu);
  }
  else {
    cg_her(solver_field[3], solver_field[4], Ncy, 1.e-8, 1, 
	   VOLUME/2, &Qtm_pm_psi);
    Qtm_minus_psi(solver_field[3], solver_field[3]);
    
    /* Reconstruct the even sites                */
    Hopping_Matrix(EO, solver_field[4], solver_field[3]);
    mul_one_pm_imu_inv(solver_field[4], +1., VOLUME/2);
  }
  
  /* The sign is plus, since in Hopping_Matrix */
  /* the minus is missing                      */
  assign_add_mul_r(solver_field[2], solver_field[4], +1., VOLUME/2);
  
  convert_eo_to_lexic(P, solver_field[2], solver_field[3]); 
  g_mu = musave;
  finalize_solver(solver_field, nr_sf);
  return;  
}
Example #4
0
/* P output = solution , Q input = source */
int cg_mms_tm(spinor ** const P, spinor * const Q,
		 solver_params_t * solver_params, double * cgmms_reached_prec) {

  static double normsq, pro, err, squarenorm;
  int iteration, N = solver_params->sdim, no_shifts = solver_params->no_shifts;
  static double gamma, alpham1;
  spinor ** solver_field = NULL;
  double atime, etime;
  const int nr_sf = 3;

  atime = gettime();
  if(solver_params->sdim == VOLUME) {
    init_solver_field(&solver_field, VOLUMEPLUSRAND, nr_sf);
    init_mms_tm(no_shifts, VOLUMEPLUSRAND);
  } 
  else {
    init_solver_field(&solver_field, VOLUMEPLUSRAND/2, nr_sf); 
    init_mms_tm(no_shifts, VOLUMEPLUSRAND/2);
  } 

  zero_spinor_field(P[0], N);
  alphas[0] = 1.0;
  betas[0] = 0.0;
  sigma[0] = solver_params->shifts[0]*solver_params->shifts[0];
  if(g_proc_id == 0 && g_debug_level > 1) printf("# CGMMS: shift %d is %e\n", 0, sigma[0]);

  for(int im = 1; im < no_shifts; im++) {
    sigma[im] = solver_params->shifts[im]*solver_params->shifts[im] - sigma[0];
    if(g_proc_id == 0 && g_debug_level > 1) printf("# CGMMS: shift %d is %e\n", im, sigma[im]);
    // these will be the result spinor fields
    zero_spinor_field(P[im], N);
    // these are intermediate fields
    assign(ps_mms_solver[im-1], Q, N);
    zitam1[im] = 1.0;
    zita[im] = 1.0;
    alphas[im] = 1.0;
    betas[im] = 0.0;
  }

  /* currently only implemented for P=0 */
  squarenorm = square_norm(Q, N, 1);
  /* if a starting solution vector equal to zero is chosen */
  assign(solver_field[0], Q, N);
  assign(solver_field[1], Q, N);
  normsq = squarenorm;

  /* main loop */
  for(iteration = 0; iteration < solver_params->max_iter; iteration++) {

    /*   Q^2*p and then (p,Q^2*p)  */
    solver_params->M_psi(solver_field[2], solver_field[1]);
    // add the zero's shift
    assign_add_mul_r(solver_field[2], solver_field[1], sigma[0], N);
    pro = scalar_prod_r(solver_field[1], solver_field[2], N, 1);

    /* For the update of the coeff. of the shifted pol. we need alphas[0](i-1) and alpha_cg(i).
       This is the reason why we need this double definition of alpha */
    alpham1 = alphas[0];

    /* Compute alphas[0](i+1) */
    alphas[0] = normsq/pro;
    for(int im = 1; im < no_shifts; im++) {

      /* Now gamma is a temp variable that corresponds to zita(i+1) */ 
      gamma = zita[im]*alpham1/(alphas[0]*betas[0]*(1.-zita[im]/zitam1[im]) 
				+ alpham1*(1.+sigma[im]*alphas[0]));

      // Now zita(i-1) is put equal to the old zita(i)
      zitam1[im] = zita[im];
      // Now zita(i+1) is updated 
      zita[im] = gamma;
      // Update of alphas(i) = alphas[0](i)*zita(i+1)/zita(i) 
      alphas[im] = alphas[0]*zita[im]/zitam1[im];

      // Compute xs(i+1) = xs(i) + alphas(i)*ps(i) 
      assign_add_mul_r(P[im], ps_mms_solver[im-1], alphas[im], N); 
      // in the CG the corrections are decreasing with the iteration number increasing
      // therefore, we can remove shifts when the norm of the correction vector
      // falls below a threshold
      // this is useful for computing time and needed, because otherwise
      // zita might get smaller than DOUBLE_EPS and, hence, zero
      if(iteration > 0 && (iteration % 20 == 0) && (im == no_shifts-1)) {
	double sn = square_norm(ps_mms_solver[im-1], N, 1);
	if(alphas[no_shifts-1]*alphas[no_shifts-1]*sn <= solver_params->squared_solver_prec) {
	  no_shifts--;
	  if(g_debug_level > 2 && g_proc_id == 0) {
	    printf("# CGMMS: at iteration %d removed one shift, %d remaining\n", iteration, no_shifts);
      	  }
	}
      }
    }
    
    /*  Compute x_(i+1) = x_i + alphas[0](i+1) p_i    */
    assign_add_mul_r(P[0], solver_field[1],  alphas[0], N);
    /*  Compute r_(i+1) = r_i - alphas[0](i+1) Qp_i   */
    assign_add_mul_r(solver_field[0], solver_field[2], -alphas[0], N);

    /* Check whether the precision eps_sq is reached */

    err = square_norm(solver_field[0], N, 1);

    if(g_debug_level > 2 && g_proc_id == g_stdio_proc) {
      printf("# CGMMS iteration: %d residue: %g\n", iteration, err); fflush( stdout );
    }

    if( ((err <= solver_params->squared_solver_prec) && (solver_params->rel_prec == 0)) ||
        ((err <= solver_params->squared_solver_prec*squarenorm) && (solver_params->rel_prec > 0)) ||
        (iteration == solver_params->max_iter -1) ) {
      /* FIXME temporary output of precision until a better solution can be found */
      *cgmms_reached_prec = err;
      break;
    }

    /* Compute betas[0](i+1) = (r(i+1),r(i+1))/(r(i),r(i))
       Compute p(i+1) = r(i+1) + beta(i+1)*p(i)  */
    betas[0] = err/normsq;
    assign_mul_add_r(solver_field[1], betas[0], solver_field[0], N);
    normsq = err;

    /* Compute betas(i+1) = betas[0](i+1)*(zita(i+1)*alphas(i))/(zita(i)*alphas[0](i))
       Compute ps(i+1) = zita(i+1)*r(i+1) + betas(i+1)*ps(i)  */
    for(int im = 1; im < no_shifts; im++) {
      betas[im] = betas[0]*zita[im]*alphas[im]/(zitam1[im]*alphas[0]);
      assign_mul_add_mul_r(ps_mms_solver[im-1], solver_field[0], betas[im], zita[im], N);
    }
  }
  etime = gettime();
  g_sloppy_precision = 0;
  if(iteration == solver_params->max_iter -1) iteration = -1;
  else iteration++;
  if(g_debug_level > 0 && g_proc_id == 0) {
    printf("# CGMMS (%d shifts): iter: %d eps_sq: %1.4e %1.4e t/s\n", solver_params->no_shifts, iteration, solver_params->squared_solver_prec, etime - atime); 
  }
  
  finalize_solver(solver_field, nr_sf);
  return(iteration);
}
Example #5
0
File: gcr.c Project: annube/tmLQCD
int gcr(spinor * const P, spinor * const Q, 
	const int m, const int max_restarts,
	const double eps_sq, const int rel_prec,
	const int N, const int precon, matrix_mult f) {

  int k, l, restart, i, iter = 0;
  double norm_sq, err;
  spinor * rho, * tmp;
  complex ctmp;
  spinor ** solver_field = NULL;
  const int nr_sf = 2;

  if(N == VOLUME) {
    init_solver_field(&solver_field, VOLUMEPLUSRAND, nr_sf);
  }
  else {
    init_solver_field(&solver_field, VOLUMEPLUSRAND/2, nr_sf);
  }

  rho = solver_field[0];
  tmp = solver_field[1];

  init_gcr(m, N+RAND);

  norm_sq = square_norm(Q, N, 1);
  if(norm_sq < 1.e-32) {
    norm_sq = 1.;
  }
  
  for(restart = 0; restart < max_restarts; restart++) {
    dfl_sloppy_prec = 0;
    f(tmp, P);
    diff(rho, Q, tmp, N);
    err = square_norm(rho, N, 1);
    if(g_proc_id == g_stdio_proc && g_debug_level > 2){
      printf("GCR: iteration number: %d, true residue: %g\n", iter, err); 
      fflush(stdout);
    }
    if(((err <= eps_sq) && (rel_prec == 0)) || ((err <= eps_sq*norm_sq) && (rel_prec == 1))) {
      finalize_solver(solver_field, nr_sf);
      return(iter);
    }
    for(k = 0; k < m; k++) {
      
      if(precon == 0) {
	assign(xi[k], rho, N);
      }
      else {
        zero_spinor_field(xi[k], N);  
        Msap_eo(xi[k], rho, 6);   
 	/* Msap(xi[k], rho, 8); */
      }
	  
      dfl_sloppy_prec = 1;
      dfl_little_D_prec = 1.e-12;
      f(tmp, xi[k]); 
	  
      /* tmp will become chi[k] */
      for(l = 0; l < k; l++) {
        a[l][k] = scalar_prod(chi[l], tmp, N, 1);
        assign_diff_mul(tmp, chi[l], a[l][k], N);
      }
      b[k] = sqrt(square_norm(tmp, N, 1));
      mul_r(chi[k], 1./b[k], tmp, N);
      c[k] = scalar_prod(chi[k], rho, N, 1);
      assign_diff_mul(rho, chi[k], c[k], N);
      err = square_norm(rho, N, 1);
      iter ++;
      if(g_proc_id == g_stdio_proc && g_debug_level > 0){
        if(rel_prec == 1) printf("# GCR: %d\t%g >= %g iterated residue\n", iter, err, eps_sq*norm_sq); 
        else printf("# GCR: %d\t%g >= %giterated residue\n", iter, err, eps_sq);
        fflush(stdout);
      }
      /* Precision reached? */
      if((k == m-1) || ((err <= eps_sq) && (rel_prec == 0)) || ((err <= eps_sq*norm_sq) && (rel_prec == 1))) {
        break;
      }
    }

    /* prepare for restart */
    _mult_real(c[k], c[k], 1./b[k]);
    assign_add_mul(P, xi[k], c[k], N);
    for(l = k-1; l >= 0; l--) {
      for(i = l+1; i <= k; i++) {
        _mult_assign_complex(ctmp, a[l][i], c[i]);
        /* c[l] -= ctmp */
        _diff_complex(c[l], ctmp);
      }
      _mult_real(c[l], c[l], 1./b[l]);
      assign_add_mul(P, xi[l], c[l], N);
    }
  }
  finalize_solver(solver_field, nr_sf);
  return(-1);
}
Example #6
0
int incr_eigcg(const int N, const int nrhs,  const int nrhs1, spinor * const x, spinor * const b, 
               const int ldh, matrix_mult f, const double eps_sq1, const double eps_sq, double restart_eps_sq,  
               const int rand_guess_opt, const int rel_prec, const int maxit, int nev, const int v_max) 
{ 
  /*Static variables and arrays.*/
  static spinor **solver_field; /*4 spinor fields*/

  static int ncurEvals=0;       /* current number of stored eigenvectors */
  static int ncurRHS=0;         /* current number of the system being solved */                   

  static spinor **evecs;        /* accumulated eigenvectors for deflation. */

  static void *_evals;
  static double *evals;         /* Ritz values */

  static void *_v;
  static spinor  *V;            /* work array for eigenvector search basis in eigCG */

  static void *_h;
  static _Complex double  *H;            /* The ncurEvals^2 matrix: H=evecs'*A*evecs */ 

  static void *_hu;
  static _Complex double  *HU;           /* used for diagonalization of H if eigenvalues requested
                                   also used as a copy of H if needed*/
  static void *_initwork;                            
  static _Complex double  *initwork;     /* vector of size ldh using with init-CG */ 

  static void *_ework;
  static _Complex double  *ework;
  /* end of the thinking part */

  static void *_work;
  static _Complex double  *work;

  static void *_rwork;
  static double *rwork;
  
  static void *_IPIV;
  static int *IPIV;        /*integer array to store permutations when solving the small linear system*/

  /* some constants */
  char cU='U'; char cN='N';  char cV='V'; 
  _Complex double  tpone= 1.0e+00;
  _Complex double  tzero= 0.0e+00;
  //tpone.re=+1.0e+00; tpone.im=0.0e+00; 
  //tzero.re=+0.0e+00; tzero.im=0.0e+00;

  /* Timing vars */
  double wt1,wt2,wE,wI;

  double eps_sq_used;

 
  /* Variables */
  double machEps = 1e-15;  
  double normb, normsq, tmpd,tmpd2;
  _Complex double  tempz;
  int i,j, ONE = 1;
  int tmpsize,tmpi,info=0;
  int numIts, flag, nAdded, nev_used;
  int maxit_remain;
  int esize,nrsf;

  int parallel; /* for parallel processing of the scalar products */

  /* leading dimension for spinor vectors */
  int LDN;
  if(N==VOLUME)
     LDN = VOLUMEPLUSRAND;
  else
     LDN = VOLUMEPLUSRAND/2;
  
 
  #ifdef MPI
    parallel=1;
  #else
    parallel=0;
  #endif
 
  /*think more about this */
  esize=2*12*N+4*nev*nev;  /* fixed size for ework used for restarting in eigcg*/

  nrsf=4;  /*number of solver fields */
  
  int lwork=3*ldh;

  double cur_res; //current residual squared (initial value will be computed in eigcg)

  /*increment the RHS counter*/
  ncurRHS = ncurRHS +1; 

  //set the tolerance to be used for this right-hand side 
  if(ncurRHS > nrhs1){
    eps_sq_used = eps_sq;
  }
  else{
    eps_sq_used = eps_sq1;
  }

  if(ncurRHS==1)/* If this is the first system, allocate needed memory for the solver*/
  {
    init_solver_field(&solver_field, LDN, nrsf); 
  }

  if(nev==0){ /*incremental eigcg is used as a cg solver. No need to restart forcing no-restart*/
    if(g_proc_id == g_stdio_proc && g_debug_level > 0) {
       fprintf(stdout, "CG won't be restarted in this mode since no deflation will take place (nev=0)\n"); 
       fflush(stdout);
    } 
  
    restart_eps_sq=0.0;
  }




  if((ncurRHS==1) && (nev >0) )/* If this is the first right-hand side and eigenvectors are needed, allocate needed memory*/
  { 
    init_solver_field(&evecs, LDN, ldh); 
     
    #if (defined SSE || defined SSE2 || defined SSE3)

    /*Extra elements are needed for allignment */
    //_v = malloc(LDN*v_max*sizeof(spinor)+ALIGN_BASE);
    _v = calloc(LDN*v_max+ALIGN_BASE,sizeof(spinor));
    V  = (spinor *)(((unsigned long int)(_v)+ALIGN_BASE)&~ALIGN_BASE);

    //_h=malloc(ldh*ldh*sizeof(_Complex double )+ALIGN_BASE);
    _h=calloc(ldh*ldh+ALIGN_BASE,sizeof(_Complex double ));
    H = (_Complex double  *)(((unsigned long int)(_h)+ALIGN_BASE)&~ALIGN_BASE);
 
    //_hu=malloc(ldh*ldh*sizeof(_Complex double )+ALIGN_BASE);
    _hu=calloc(ldh*ldh+ALIGN_BASE,sizeof(_Complex double ));
    HU = (_Complex double  *)(((unsigned long int)(_hu)+ALIGN_BASE)&~ALIGN_BASE);
    
    //_ework = malloc(esize*sizeof(_Complex double )+ALIGN_BASE);
    _ework = calloc(esize+ALIGN_BASE,sizeof(_Complex double ));
    ework=(_Complex double  *)(((unsigned long int)(_ework)+ALIGN_BASE)&~ALIGN_BASE);

    //_initwork = malloc(ldh*sizeof(_Complex double )+ALIGN_BASE);
    _initwork = calloc(ldh+ALIGN_BASE,sizeof(_Complex double ));
    initwork = (_Complex double  *)(((unsigned long int)(_initwork)+ALIGN_BASE)&~ALIGN_BASE);

    //_work = malloc(lwork*sizeof(_Complex double )+ALIGN_BASE);
    _work = calloc(lwork+ALIGN_BASE,sizeof(_Complex double ));
    work = (_Complex double  *)(((unsigned long int)(_work)+ALIGN_BASE)&~ALIGN_BASE);

    //_rwork = malloc(3*ldh*sizeof(double)+ALIGN_BASE);
    _rwork = calloc(3*ldh+ALIGN_BASE,sizeof(double));
    rwork = (double *)(((unsigned long int)(_rwork)+ALIGN_BASE)&~ALIGN_BASE);

    
    //_IPIV = malloc(ldh*sizeof(int)+ALIGN_BASE);
    _IPIV = calloc(ldh+ALIGN_BASE,sizeof(int));
    IPIV = (int *)(((unsigned long int)(_IPIV)+ALIGN_BASE)&~ALIGN_BASE);

    //_evals = malloc(ldh*sizeof(double)+ALIGN_BASE);
    _evals = calloc(ldh+ALIGN_BASE,sizeof(double)); 
    evals = (double *)(((unsigned long int)(_evals)+ALIGN_BASE)&~ALIGN_BASE);


    #else

    V = (spinor *) calloc(LDN*v_max,sizeof(spinor));
    H = calloc(ldh*ldh, sizeof(_Complex double ));
    HU= calloc(ldh*ldh, sizeof(_Complex double ));
    initwork = calloc(ldh, sizeof(_Complex double ));
    ework = calloc(esize, sizeof(_Complex double ));
    work = calloc(lwork,sizeof(_Complex double ));
    rwork= calloc(3*ldh,sizeof(double));
    IPIV = calloc(ldh, sizeof(int));
    evals = (double *) calloc(ldh, sizeof(double));

    #endif
    
  } /*if(ncurRHS==1)*/

  
  if(g_proc_id == g_stdio_proc && g_debug_level > 0) {
    fprintf(stdout, "System %d, eps_sq %e\n",ncurRHS,eps_sq_used); 
    fflush(stdout);
  } 
  
     /*---------------------------------------------------------------*/
     /* Call eigCG until this right-hand side converges               */
     /*---------------------------------------------------------------*/
  wE = 0.0; wI = 0.0;     /* Start accumulator timers */
  flag = -1;    	   /* First time through. Run eigCG regularly */
  maxit_remain = maxit;   /* Initialize Max and current # of iters   */
  numIts = 0;  

  while( flag == -1 || flag == 3)
  {
    //if(g_proc_id==g_stdio_proc)
      //printf("flag= %d, ncurEvals= %d\n",flag,ncurEvals);
    
    if(ncurEvals > 0)
    {
      /* --------------------------------------------------------- */
      /* Perform init-CG with evecs vectors                        */
      /* xinit = xinit + evecs*Hinv*evec'*(b-Ax0) 		     */
      /* --------------------------------------------------------- */

      wt1 = gettime();

      /*r0=b-Ax0*/
      normsq = square_norm(x,N,parallel);
      if(normsq>0.0)
      {
	f(solver_field[0],x); /* solver_field[0]= A*x */
	diff(solver_field[1],b,solver_field[0],N);  /* solver_filed[1]=b-A*x */		
      }
      else
	assign(solver_field[1],b,N); /* solver_field[1]=b */
	
      /* apply the deflation using init-CG */
      /* evecs'*(b-Ax) */
      for(i=0; i<ncurEvals; i++)
      {
        initwork[i]= scalar_prod(evecs[i],solver_field[1],N,parallel);
      }
    
      /* solve the linear system H y = c */
      tmpsize=ldh*ncurEvals;
      _FT(zcopy) (&tmpsize,H,&ONE,HU,&ONE); /* copy H into HU */
      _FT(zgesv) (&ncurEvals,&ONE,HU,&ldh,IPIV,initwork,&ldh,&info);

      if(info != 0)
      {
         if(g_proc_id == g_stdio_proc) {
            fprintf(stderr, "Error in ZGESV:, info =  %d\n",info); 
            fflush(stderr);
         }
         exit(1);
      }
    
      /* x = x + evecs*inv(H)*evecs'*r */
      for(i=0; i<ncurEvals; i++)
      {
        assign_add_mul(x,evecs[i],initwork[i],N);
      }
      
      /* compute elapsed time and add to accumulator */

      wt2 = gettime();
      
      wI = wI + wt2-wt1;
      
    }/* if(ncurEvals > 0) */


    /* ------------------------------------------------------------ */
    /* Adjust nev for eigcg according to available ldh/restart      */
    /* ------------------------------------------------------------ */
	  
    if (flag == 3) { /* restart with the same rhs, set nev_used = 0 */
      nev_used = 0;
      /* if convergence seems before next restart do not restart again */
      if(rel_prec)
      {
	       if (cur_res*(restart_eps_sq) < eps_sq*normb*normb) 
	           restart_eps_sq=0.0;
      }
      else
      {
	       if (cur_res*(restart_eps_sq) < eps_sq) 
	          restart_eps_sq=0.0;
      } /* if(rel_prec) */
	  
    }
    else
    {    
      /* First time through this rhs. Find nev evecs */
      /* limited by the ldh evecs we can store in total */
      if (ldh-ncurEvals < nev)
	       nev = ldh - ncurEvals;
      nev_used = nev;
      
    }

    /* ------------------------------------------------------------ */
    /* Solve Ax = b with x initial guess                            */
    /* ------------------------------------------------------------ */

    wt1 = gettime();

    eigcg( N, LDN, x, b, &normb, eps_sq_used, restart_eps_sq, rel_prec, maxit_remain, 
	     &numIts, &cur_res, &flag, solver_field, f, 
	     nev_used, v_max, V, esize, ework);
     
    //if(g_proc_id == g_stdio_proc) 
        //printf("eigcg flag= %d \n",flag); 
      
    wt2 = gettime();

    wE = wE + wt2-wt1;
    
    /* if flag == 3 update the remain max number of iterations */
    maxit_remain = maxit - numIts;
    
  }
  /* end while (flag ==-1 || flag == 3)               */
  /* ------------------------------------------------ */

  /* ---------- */
  /* Reporting  */
  /* ---------- */
  /* compute the exact residual */
  f(solver_field[0],x); /* solver_field[0]= A*x */
  diff(solver_field[1],b,solver_field[0],N);  /* solver_filed[1]=b-A*x */	
  normsq=square_norm(solver_field[1],N,parallel);
  if(g_debug_level > 0 && g_proc_id == g_stdio_proc)
  {
    fprintf(stdout, "For this rhs:\n");
    fprintf(stdout, "Total initCG Wallclock : %-f\n", wI);
    fprintf(stdout, "Total eigpcg Wallclock : %-f\n", wE);
    fprintf(stdout, "Iterations: %-d\n", numIts); 
    fprintf(stdout, "Residual: %e, Actual Resid of LinSys  : %e\n", cur_res,normsq);
    if (flag != 0) {
      fprintf(stderr, "Error: eigcg returned with nonzero exit status\n");
      return flag;
      fflush(stderr);
    }
    fflush(stdout);
  }
  /* ------------------------------------------------------------------- */
  /* ------------------------------------------------------------------- */
  /* Update the evecs and the factorization of evecs'*A*evecs            */
  /* ------------------------------------------------------------------- */
  if (nev > 0) 
  {

    wt1 = gettime();

    /* Append new Ritz vectors to the basis and orthogonalize them to evecs */
    for(i=0; i<nev_used; i++)
      assign(evecs[i+ncurEvals],&V[i*LDN],N);
    
    nAdded = ortho_new_vectors(evecs,N,ncurEvals,nev_used,machEps);

    /* expand H */
    for(j=ncurEvals; j< (ncurEvals+nAdded); j++)
    {
      f(solver_field[0],evecs[j]);
      
      for(i=0; i<=j; i++)
      {
	       H[i+j*ldh] = scalar_prod(evecs[i],solver_field[0],N,parallel);
	       H[j+i*ldh]=  conj(H[i+j*ldh]);
	       //H[j+i*ldh].re =  H[i+j*ldh].re;
	       //H[j+i*ldh].im = -H[i+j*ldh].im;
      }
      
    }
    
    /* update the number of vectors in the basis */
    ncurEvals = ncurEvals + nAdded;

    /* ---------- */
    /* Reporting  */
    /* ---------- */

    wt2 = gettime();

    
    if(g_proc_id == g_stdio_proc && g_debug_level > 0)
    {
      fprintf(stdout,"ncurRHS %d\n",ncurRHS);
      fprintf(stdout,"ncurEvals %d \n",ncurEvals);
      fprintf(stdout,"Update\n");
      fprintf(stdout,"Added %d vecs\n",nAdded);
      fprintf(stdout,"U Wallclock : %-f\n", wt2-wt1);
      fprintf(stdout,"Note: Update Wall time doesn't include time for computing eigenvalues and their residuals.\n"); 
      fflush(stdout);     
    }
    
    if(g_debug_level > 3)  /*compute eigenvalues and their residuals if requested*/
    {
      /* copy H into HU */
      tmpsize=ldh*ncurEvals;
      _FT(zcopy) (&tmpsize,H,&ONE,HU,&ONE);

      /* compute eigenvalues and eigenvectors of HU (using V and spinor fields as tmp work spaces)*/
      _FT(zheev)(&cV, &cU, &ncurEvals, HU, &ldh, evals, work, &lwork, rwork, &info,1,1);

      if(info != 0)
      {
	if(g_proc_id == g_stdio_proc) 
	{
	  fprintf(stderr,"Error in ZHEEV:, info =  %d\n",info); 
          fflush(stderr);
	}
	exit(1);
      }

      /* compute residuals and print out results */
      for(i=0; i<ncurEvals; i++)
      {
	    tmpi=12*N;
            tmpsize=12*LDN;

	    
            _FT(zgemv)(&cN,&tmpi,&ncurEvals,&tpone,(_Complex double  *)evecs[0],&tmpsize,
			                 &HU[i*ldh], &ONE,&tzero,(_Complex double  *) solver_field[0],&ONE,1);

            normsq=square_norm(solver_field[0],N,parallel);
            
            f(solver_field[1],solver_field[0]);

            tempz = scalar_prod(solver_field[0],solver_field[1],N,parallel);

            evals[i] = creal(tempz)/normsq;

            mul_r(solver_field[2],evals[i],solver_field[0],N);

            diff(solver_field[3],solver_field[1],solver_field[2], N);
	    
	    tmpd2= square_norm(solver_field[3],N,parallel);

            tmpd= sqrt(tmpd2/normsq);
	    
	    if(g_proc_id == g_stdio_proc)
	    {fprintf(stdout,"RR Eval[%d]: %22.15E rnorm: %22.15E\n", i+1, evals[i], tmpd); fflush(stdout);}
	
      } 
       
    }/*if(plvl >= 2)*/
  } /* if(nev>0) */

  /*--------------------------------------*/
  /*free memory that is no longer needed  */
  /* and reset ncurRHS and ncurEvals      */
  /*--------------------------------------*/

  if(ncurRHS == nrhs) /*this was the last system to be solved */
  {
     ncurRHS=0;
     ncurEvals=0;
     finalize_solver(solver_field,nrsf);
  }

  if( (ncurRHS == nrhs) && (nev >0) )/*this was the last system to be solved and there were allocated memory for eigenvector computation*/
  {
     finalize_solver(evecs,ldh);
     #if (defined SSE || defined SSE2 || defined SSE3)
     free(_v);
     free(_h);
     free(_hu);
     free(_ework);
     free(_initwork);
     free(_IPIV);
     free(_evals);
     free(_rwork);
     free(_work);
     #else
     free(V);
     free(H);
     free(HU);
     free(ework);
     free(initwork);
     free(IPIV);
     free(evals);
     free(rwork);
     free(work);
     #endif
  }

  return numIts;
}
Example #7
0
/* P output = solution , Q input = source */
int pcg_her(spinor * const P, spinor * const Q, const int max_iter, 
	    double eps_sq, const int rel_prec, const int N, matrix_mult f) {
  double normsp, pro, pro2, err, alpha_cg, beta_cg, squarenorm;
  int iteration;
  spinor ** solver_field = NULL;
  const int nr_sf = 5;

  if(N == VOLUME) {
    init_solver_field(&solver_field, VOLUMEPLUSRAND, nr_sf);
  }
  else {
    init_solver_field(&solver_field, VOLUMEPLUSRAND/2, nr_sf);
  }
  squarenorm = square_norm(Q, N, 1);
  /*        !!!!   INITIALIZATION    !!!! */
  assign(solver_field[0], P, N);
  /*        (r_0,r_0)  =  normsq         */
  normsp = square_norm(P, N, 1);

  assign(solver_field[3], Q, N);
  /* initialize residue r and search vector p */
  if(normsp==0){
    /* if a starting solution vector equal to zero is chosen */
    /* r0 */
    assign(solver_field[1], solver_field[3], N);
    /* p0 */
  }
  else{
    /* if a starting solution vector different from zero is chosen */
    /* r0 = b - A x0 */
    f(solver_field[2], solver_field[0]);
    diff(solver_field[1], solver_field[3], solver_field[2], N);
  }
  /* z0 = M^-1 r0 */
  invert_eigenvalue_part(solver_field[3], solver_field[1], 10, N);
  /* p0 = z0 */
  assign(solver_field[2], solver_field[3], N);

  /* Is this really real? */
  pro2 = scalar_prod_r(solver_field[1], solver_field[3], N, 1);  
  /* main loop */
  for(iteration = 0; iteration < max_iter; iteration++) {
    /* A p */
    f(solver_field[4], solver_field[2]);

    pro = scalar_prod_r(solver_field[2], solver_field[4], N, 1);
    /*  Compute alpha_cg(i+1)   */
    alpha_cg=pro2/pro;
     
    /*  Compute x_(i+1) = x_i + alpha_cg(i+1) p_i    */
    assign_add_mul_r(solver_field[0], solver_field[2],  alpha_cg, N);
    /*  Compute r_(i+1) = r_i - alpha_cg(i+1) Qp_i   */
    assign_add_mul_r(solver_field[1], solver_field[4], -alpha_cg, N);

    /* Check whether the precision is reached ... */
    err=square_norm(solver_field[1], N, 1);
    if(g_debug_level > 1 && g_proc_id == g_stdio_proc) {
      printf("%d\t%g\n",iteration,err); fflush( stdout);
    }

    if(((err <= eps_sq) && (rel_prec == 0)) || ((err <= eps_sq*squarenorm) && (rel_prec == 1))) {
      assign(P, solver_field[0], N);
      g_sloppy_precision = 0;
      finalize_solver(solver_field, nr_sf);
      return(iteration+1);
    }
#ifdef _USE_HALFSPINOR
    if(((err*err <= eps_sq) && (rel_prec == 0)) || ((err*err <= eps_sq*squarenorm) && (rel_prec == 1)) || iteration > 1400) {
      g_sloppy_precision = 1;
      if(g_debug_level > 2 && g_proc_id == g_stdio_proc) {
	printf("sloppy precision on\n"); fflush( stdout);
      }
    }
#endif
    /* z_j */
    beta_cg = 1/pro2;
/*     invert_eigenvalue_part(solver_field[3], solver_field[1], 10, N); */
    /* Compute beta_cg(i+1)
       Compute p_(i+1) = r_i+1 + beta_(i+1) p_i     */
    pro2 = scalar_prod_r(solver_field[1], solver_field[3], N, 1);
    beta_cg *= pro2;
    assign_mul_add_r(solver_field[2], beta_cg, solver_field[3], N);
  }
  assign(P, solver_field[0], N);
  g_sloppy_precision = 0;
/*   return(-1); */
  finalize_solver(solver_field, nr_sf);
  return(1);
}
Example #8
0
/* P inout (guess for the solving spinor)
   Q input
*/
int bicgstab_complex(spinor * const P,spinor * const Q, const int max_iter, 
		     double eps_sq, const int rel_prec, 
		     const int N, matrix_mult f){
  double err, d1, squarenorm;
  complex rho0, rho1, omega, alpha, beta, nom, denom;
  int i;
  spinor * r, * p, * v, *hatr, * s, * t;
  spinor ** solver_field = NULL;
  const int nr_sf = 6;

  if(N == VOLUME) {
    init_solver_field(&solver_field, VOLUMEPLUSRAND, nr_sf);
  }
  else {
    init_solver_field(&solver_field, VOLUMEPLUSRAND/2, nr_sf);
  }
  hatr = solver_field[0];
  r = solver_field[1];
  v = solver_field[2];
  p = solver_field[3];
  s = solver_field[4];
  t = solver_field[5];

  f(r, P);
  diff(p, Q, r, N);
  assign(r, p, N);
  assign(hatr, p, N);
  rho0 = scalar_prod(hatr, r, N, 1);
  squarenorm = square_norm(Q, N, 1);

  for(i = 0; i < max_iter; i++){
    err = square_norm(r, N, 1);
    if(g_proc_id == g_stdio_proc && g_debug_level > 1) {
      printf("%d %e\n", i, err);
      fflush(stdout);
    }
  
    if((((err <= eps_sq) && (rel_prec == 0)) || ((err <= eps_sq*squarenorm) && (rel_prec == 1))) && i>0) {
      finalize_solver(solver_field, nr_sf);
      return(i);
    }
    f(v, p);
    denom = scalar_prod(hatr, v, N, 1);
    _div_complex(alpha, rho0, denom);
    assign(s, r, N);
    assign_diff_mul(s, v, alpha, N);
    f(t, s);
    omega = scalar_prod(t,s, N, 1);
    d1 = square_norm(t, N, 1);
    omega.re/=d1; omega.im/=d1;
    assign_add_mul_add_mul(P, p, s, alpha, omega, N);
    assign(r, s, N);
    assign_diff_mul(r, t, omega, N);
    rho1 = scalar_prod(hatr, r, N, 1);
    if(fabs(rho1.re) < 1.e-25 && fabs(rho1.im) < 1.e-25) {
      finalize_solver(solver_field, nr_sf);
      return(-1);
    }
    _mult_assign_complex(nom, alpha, rho1);
    _mult_assign_complex(denom, omega, rho0);
    _div_complex(beta, nom, denom);
    omega.re=-omega.re; omega.im=-omega.im;
    assign_mul_bra_add_mul_ket_add(p, v, r, omega, beta, N);
    rho0.re = rho1.re; rho0.im = rho1.im;
  }
  finalize_solver(solver_field, nr_sf);
  return -1;
}
Example #9
0
int cr(spinor * const P, spinor * const Q, 
        const int m, const int max_restarts,
        const double eps_sq, const int rel_prec,
        const int N, const int precon, matrix_mult f) {

    int k, l, restart, i, iter = 0;
    double norm_sq, err;
    spinor * xi, * Axi, * chi, * Achi, *tmp;
    _Complex double alpha, beta;
    static _Complex double one = 1.0;
    double norm, rAr, newrAr;
    double atime, etime;
    spinor ** solver_field = NULL;
    const int nr_sf = 5;
    int save_sloppy = g_sloppy_precision;

    if(N == VOLUME) {
        init_solver_field(&solver_field, VOLUMEPLUSRAND, nr_sf);
    }
    else {
        init_solver_field(&solver_field, VOLUMEPLUSRAND/2, nr_sf);
    }

    atime = gettime();

    xi = solver_field[0];
    Axi = solver_field[1];
    chi = solver_field[2];
    Achi = solver_field[3];
    tmp = solver_field[4];

    norm_sq = square_norm(Q, N, 1);
    if(norm_sq < 1.e-32) {
        norm_sq = 1.;
    }

    dfl_sloppy_prec = 0;
    f(tmp, P);
    diff(chi, Q, tmp, N);
    assign(xi, chi, N);
    f(Axi, xi);
    f(Achi, chi);
    rAr = scalar_prod(chi, Achi, N, 1);
    err = square_norm(chi, N, 1);
    if(((err <= eps_sq) && (rel_prec == 0)) || ((err <= eps_sq*norm_sq) && (rel_prec == 1))) {
        finalize_solver(solver_field, nr_sf);
        return(iter);
    }
    

    for(k = 0; k < m; k++) {

        dfl_sloppy_prec = 1;

        norm = square_norm(Axi, N, 1);
        alpha = rAr/norm;
        assign_add_mul(P, xi, alpha, N);
        /* get the new residual */
        assign_diff_mul(chi, Axi, alpha, N);

        err = square_norm(chi, N, 1);
        iter ++;
        etime = gettime();
        if(g_proc_id == g_stdio_proc && g_debug_level > 3){
            printf("# CR: %d\t%g iterated residue, time spent %f s\n", iter, err, (etime - atime)); 
            fflush(stdout);
        }
        /* Precision reached? */
        if((k == m-1) || ((err <= eps_sq) && (rel_prec == 0)) || ((err <= eps_sq*norm_sq) && (rel_prec == 1))) {
            break;
        }

#ifdef _USE_HALFSPINOR
        if(((err*err <= eps_sq) && (rel_prec == 0)) || ((err*err <= eps_sq*norm_sq) && (rel_prec == 1))) {
            if (g_sloppy_precision_flag == 1) {
                g_sloppy_precision = 1;
                if(g_debug_level > 2 && g_proc_id == g_stdio_proc) {
                    printf("sloppy precision on\n"); fflush( stdout);
                }
            }
        }
#endif

        f(Achi, chi); 

        newrAr = scalar_prod(chi, Achi, N, 1); 
        beta = newrAr/rAr;
        assign_mul_add_mul(xi, beta, chi, one, N);
        assign_mul_add_mul(Axi,beta, Achi, one, N);
        rAr = newrAr;

    }

    g_sloppy_precision = save_sloppy;
    finalize_solver(solver_field, nr_sf);
    return(-1);
}
Example #10
0
int mixed_cg_mms_tm_nd(spinor ** const Pup, spinor ** const Pdn, 
		 spinor * const Qup, spinor * const Qdn, 
		 solver_pm_t * solver_pm) {

  double eps_sq = solver_pm->squared_solver_prec;
  int noshifts = solver_pm->no_shifts;
  int rel_prec = solver_pm->rel_prec;
  int max_iter = solver_pm->max_iter;
  int check_abs, check_rel;
  double * shifts = solver_pm->shifts;
  int Nshift = noshifts;
 
  // algorithm
  double rr_up, rr_dn, rr, rr_old, r0r0, dAd_up, dAd_dn, dAd;  
  
  if(rel_prec){
    check_rel = 1;
    check_abs = 0;
   }
   else{
    check_rel = 0;
    check_abs = 1;     
  }
  
  int use_eo=1, eofactor=2;
  //not even-odd?
  if(solver_pm->sdim == VOLUME) {
    eofactor = 1;
    use_eo = 0;
  }
  
  int N = VOLUME/eofactor;
  int Vol = VOLUMEPLUSRAND/eofactor;
 
  
  // norm of source
  rr_up = square_norm(Qup, N, 1);
  rr_dn = square_norm(Qdn, N, 1);
  rr    = rr_up + rr_dn;  
 
  if( (g_cart_id == 0 && g_debug_level > 2)) printf("# CGMMSND_mixed: Initial mms residue: %.6e\n", rr);
  if(rr < 1.0e-4){
    if( (g_cart_id == 0 && g_debug_level > 2)) printf("# CGMMSND_mixed: norm of source too low: falling back to double mms solver %.6e\n", rr);
    return(cg_mms_tm_nd(Pup, Pdn, Qup, Qdn, solver_pm));
  }
  
  r0r0   = rr;	// for relative precision 
  rr_old = rr;	// for the first iteration
  
  
  
  //allocate an auxiliary solver fields 
  spinor ** sf = NULL;
  const int nr_sf = 6;
  init_solver_field(&sf, Vol, nr_sf);  
   
  spinor32 ** sf32 = NULL;
  const int nr_sf32 = 8;
  init_solver_field_32(&sf32, Vol, nr_sf32);  
  
  
  //spinor fields  
  //we need one less than shifts, since one field is cared of by the usual cg fields
  init_mms_tm_nd_32(noshifts-1, Vol);
   
  // Pup/dn  can be used as auxiliary field to work on, as it is not later used (could be used as initial guess at the very start)
  // Q_up/dn  can be used as feedback, or if not, also as auxiliary field
  

  
  //allocate cg constants
  double * sigma;
  double * zitam1, * zita;
  double * alphas, * betas;
  double gamma;
  double alpham1;
    sigma = (double*)calloc((noshifts), sizeof(double));
    zitam1 = (double*)calloc((noshifts), sizeof(double));
    zita = (double*)calloc((noshifts), sizeof(double));
    alphas = (double*)calloc((noshifts), sizeof(double));
    betas = (double*)calloc((noshifts), sizeof(double));



  spinor32 *  r_up, *  r_dn, * Ad_up, * Ad_dn, *  x_up, *  x_dn, *  d_up, *  d_dn;		
  spinor * r_up_d, * r_dn_d, * x_up_d, * x_dn_d, * Ax_up_d, * Ax_dn_d;
  
 // iteration counter
 int j; 
 
 //reliable update flag
 int rel_update = 0;
 //no of reliable updates done
 int no_rel_update = 0;
 //use reliable update flag
 int use_reliable = 1;
 
 double rel_delta = 1.0e-10;
 int trigger_shift = -1;
 double * res;
 double * res0;
 double * maxres;
 res = (double*)calloc((noshifts), sizeof(double));
 res0 = (double*)calloc((noshifts), sizeof(double));
 maxres = (double*)calloc((noshifts), sizeof(double)); 
    
  /////////////////
  // ASSIGNMENTS //
  /////////////////
  
  x_up  = sf32[0];	
  x_dn  = sf32[1];	
  r_up  = sf32[2];	
  r_dn  = sf32[3];
  d_up  = sf32[4];
  d_dn  = sf32[5];
  Ad_up = sf32[6];
  Ad_dn = sf32[7];


  x_up_d = sf[0];
  x_dn_d = sf[1];
  r_up_d = sf[2];
  r_dn_d = sf[3];
  Ax_up_d = sf[4];
  Ax_dn_d = sf[5];  
  
  /*
  //matrix test
   spinor32 * help_low_up = sf32[0];
   spinor32 * help_low_dn = sf32[1];   
   spinor * help_high_up = sf[0];
   spinor * help_high_dn = sf[1];   
   assign_to_32(help_low_up, Qup, N);
   assign_to_32(help_low_dn, Qdn, N);   
   assign(help_high_up, Qup, N);
   assign(help_high_dn, Qdn, N);   
   double sqn_high = square_norm(help_high_up,N,1) +
                     square_norm(help_high_dn,N,1);
   printf("square_norm(Q_high) = %e\n", sqn_high);
   float sqn_low  = square_norm_32(help_low_up,N,1) +
                    square_norm_32(help_low_dn,N,1);   
   printf("square_norm(Q_low) = %e\n", sqn_low);  
   
   solver_pm->M_ndpsi32(sf32[2], sf32[3], help_low_up, help_low_dn);
   solver_pm->M_ndpsi(sf[2], sf[3], help_high_up, help_high_dn);
   
   assign_to_64(sf[4], sf32[2], N);
   assign_to_64(sf[5], sf32[3], N);   
   diff(sf[0], sf[4], sf[2], N);
   diff(sf[1], sf[5], sf[3], N);   
   double sqnrm = square_norm(sf[0], N, 1) +
                  square_norm(sf[1], N, 1);
   printf("Operator 32 test: (square_norm) / (spinor component) = %.8e\n", sqnrm/24.0/N);
   exit(1);  
  */
  
  // r(0) = b
  assign_to_32(r_up, Qup, N);
  assign_to_32(r_dn, Qdn, N); 
  
  // d(0) = b
  assign_to_32(d_up, Qup, N);
  assign_to_32(d_dn, Qdn, N); 
  

  
  maxres[0] = rr;
  res[0] = rr;
  res0[0] = rr;
  alphas[0] = 1.0;
  betas[0] = 0.0;
  sigma[0] = shifts[0]*shifts[0];
  if(g_cart_id == 0 && g_debug_level > 2) printf("# CGMMSND_mixed: shift %d is %e\n", 0, sigma[0]);

  // currently only implemented for P=0 
  for(int im = 1; im < noshifts; im++) {
    maxres[im] = rr;
    res[im] = rr;
    res0[im] = rr;    
    sigma[im] = shifts[im]*shifts[im] - sigma[0];
    if(g_cart_id == 0 && g_debug_level > 2) printf("# CGMMSND_mixed: shift %d is %e\n", im, sigma[im]);
    // these will be the result spinor fields
    zero_spinor_field_32(mms_x_up[im-1], N);
    zero_spinor_field_32(mms_x_dn[im-1], N);    

    assign_to_32(mms_d_up[im-1], Qup, N);
    assign_to_32(mms_d_dn[im-1], Qdn, N);
    zitam1[im] = 1.0;
    zita[im] = 1.0;
    alphas[im] = 1.0;
    betas[im] = 0.0;
  }

  //zero fields for solution Pup, Pdn
  for(int im = 0; im < noshifts; im++){
    zero_spinor_field(Pup[im], N);
    zero_spinor_field(Pdn[im], N);    
  }
  
  
  //////////
  // LOOP //
  //////////
    
  for (j = 0; j < max_iter; j++) {   
      // A*d(k)
    solver_pm->M_ndpsi32(Ad_up, Ad_dn, d_up,  d_dn);     
    //add zero'th shift
    assign_add_mul_r_32(Ad_up, d_up, (float) sigma[0], N);
    assign_add_mul_r_32(Ad_dn, d_dn, (float) sigma[0], N);
	     
    
    // alpha = r(k)*r(k) / d(k)*A*d(k)
    dAd_up = scalar_prod_r_32(d_up, Ad_up, N, 1);
    dAd_dn = scalar_prod_r_32(d_dn, Ad_dn, N, 1);

    dAd    = dAd_up + dAd_dn; 
    alpham1 = alphas[0];
    alphas[0]  = rr_old / dAd;	// rr_old is taken from the last iteration respectively
    
   
    // r(k+1)
    assign_add_mul_r_32(r_up, Ad_up, (float) -alphas[0],N);
    assign_add_mul_r_32(r_dn, Ad_dn, (float) -alphas[0],N);

    // r(k+1)*r(k+1)
    rr_up  = square_norm_32(r_up, N, 1);
    rr_dn  = square_norm_32(r_dn, N, 1);
    rr     = rr_up + rr_dn;
    
      

    if((g_cart_id == 0) && (g_debug_level > 2)) printf("# CGMMSND_mixed: mms iteration j = %i: rr = %.6e\n", j, rr);

		 

    // aborting ?? // check wether precision is reached ...
    if ( ((check_abs)&&(rr <= eps_sq)) || ((check_rel)&&(rr <= eps_sq*r0r0)) ) 
    {
	if ((check_rel)&&(rr <= eps_sq*r0r0)) {
	  if((g_cart_id == 0) && (g_debug_level > 3)) printf("# CGMMSND_mixed: Reached relative solver precision of eps_rel = %.2e\n", eps_sq);
	}
      break;
   }
    
    // update alphas and zitas  
    // used later
    for(int im = 1; im < noshifts; im++) {
      gamma = zita[im]*alpham1/(alphas[0]*betas[0]*(1.-zita[im]/zitam1[im]) 
				+ alpham1*(1.+sigma[im]*alphas[0]));
      zitam1[im] = zita[im];
      zita[im] = gamma;
      alphas[im] = alphas[0]*zita[im]/zitam1[im];
    }  
    
    //check for reliable update
    res[0] = rr;
    for(int im=1; im<noshifts; im++) res[im] = rr * zita[im]; 
      
    rel_update = 0;
    for(int im = (noshifts-1); im >= 0; im--) {
      if( res[im] > maxres[im] ) maxres[im] = res[im];
      if( (res[im] < rel_delta*res0[im]) && (res0[im]<=maxres[im]) && (use_reliable) ) rel_update=1; 
      if( rel_update && ( trigger_shift == -1) ) trigger_shift = im;
    }     
    
    if(!rel_update)
    {
      // x_j(k+1) = x_j(k) + alpha_j*d_j(k) 
      // alphas are set above
      assign_add_mul_r_32(x_up, d_up, (float) alphas[0], N);   
      assign_add_mul_r_32(x_dn, d_dn, (float) alphas[0], N);
      
      
      for(int im = 1; im < noshifts; im++) {
	assign_add_mul_r_32(mms_x_up[im-1], mms_d_up[im-1], (float) alphas[im],  N);   
	assign_add_mul_r_32(mms_x_dn[im-1], mms_d_dn[im-1], (float) alphas[im],  N);  
      }  
   
      // beta = r(k+1)*r(k+1) / r(k)*r(k)
      betas[0] = rr / rr_old;
      rr_old = rr;  // for next iteration
      
      // d_0(k+1) = r(k+1) + beta*d_0(k) 
      assign_mul_add_r_32(d_up, (float) betas[0], r_up, N);  
      assign_mul_add_r_32(d_dn, (float) betas[0], r_dn, N); 
       
      // d_j(k+1) = zita*r(k+1) + beta*d_j(k)
      for(int im = 1; im < noshifts; im++) {
	betas[im] = betas[0]*zita[im]*alphas[im]/(zitam1[im]*alphas[0]);
	assign_mul_add_mul_r_32(mms_d_up[im-1], r_up, (float) betas[im], (float) zita[im], N);
	assign_mul_add_mul_r_32(mms_d_dn[im-1], r_dn, (float) betas[im], (float) zita[im], N);
      }   
    }
    else{
      //reliable update
      if( (g_cart_id == 0) && (g_debug_level > 3) ){
	printf("# CGMMSND_mixed: Shift %d with offset squared %e triggered a reliable update\n", trigger_shift, sigma[trigger_shift]);
      }
      //add low prec solutions  
      assign_add_mul_r_32(x_up, d_up, (float) alphas[0], N); 
      assign_add_mul_r_32(x_dn, d_dn, (float) alphas[0], N); 
      
      addto_32(Pup[0], x_up, N);
      addto_32(Pdn[0], x_dn, N);	    
      for(int im = 1; im < noshifts; im++) {  
	assign_add_mul_r_32(mms_x_up[im-1], mms_d_up[im-1], alphas[im], N);
	assign_add_mul_r_32(mms_x_dn[im-1], mms_d_dn[im-1], alphas[im], N);	
	addto_32(Pup[im], mms_x_up[im-1], N);
        addto_32(Pdn[im], mms_x_dn[im-1], N);	
      }
      
      //add low precision for shift 0 only
      addto_32(x_up_d, x_up, N); 
      addto_32(x_dn_d, x_dn, N);      
 
      
      solver_pm->M_ndpsi(Ax_up_d, Ax_dn_d, x_up_d,  x_dn_d);
      //add zero'th shift
      assign_add_mul_r(Ax_up_d, x_up_d, sigma[0], N);
      assign_add_mul_r(Ax_dn_d, x_dn_d, sigma[0], N);
      
      diff(r_up_d, Qup, Ax_up_d, N);         
      diff(r_dn_d, Qdn, Ax_dn_d, N); 
 
      rr_up = square_norm(r_up_d, N, 1);
      rr_dn = square_norm(r_dn_d, N, 1);
      rr    = rr_up + rr_dn;
      if ((g_cart_id == 0) && (g_debug_level > 3) ) printf("# CGMMSND_mixed: New residue after reliable update: %.6e\n", rr);
       
      //update res[im]
      res[0] = rr;

       
      if(res[trigger_shift] > res0[trigger_shift]){
	if(g_cart_id == 0) printf("# CGMMSND_mixed: Warning: residue of shift no %d got larger after rel. update\n", trigger_shift);
	//if this is the zero'th shift not getting better -> no further convergence, break
	if(trigger_shift == 0) break;
      }    
      
      //zero float fields
      zero_spinor_field_32(x_up, N);
      zero_spinor_field_32(x_dn, N);        
      for(int im = 1; im < noshifts; im++) {
	zero_spinor_field_32(mms_x_up[im-1], N);
	zero_spinor_field_32(mms_x_dn[im-1], N);  
      }
      
      //update the source
      assign_to_32(r_up, r_up_d, N);
      assign_to_32(r_dn, r_dn_d, N); 
      

      
      betas[0] = res[0]/rr_old;
      rr_old = rr;
      // d_0(k+1) = r(k+1) + beta*d_0(k)
      assign_mul_add_r_32(d_up, betas[0], r_up, N);
      assign_mul_add_r_32(d_dn, betas[0], r_dn, N);      
      // d_j(k+1) = r(k+1) + beta*d_j(k)
      for(int im = 1; im < noshifts; im++) {
	betas[im] = betas[0]*zita[im]*alphas[im]/(zitam1[im]*alphas[0]);
        assign_mul_add_mul_r_32(mms_d_up[im-1], r_up, (float) betas[im], (float) zita[im], N);
	assign_mul_add_mul_r_32(mms_d_dn[im-1], r_dn, (float) betas[im], (float) zita[im], N);
      } 
      
      //new maxres for the shift that initiated the reliable update
      res[trigger_shift] = res[0]*zita[trigger_shift]*zita[trigger_shift];
      res0[trigger_shift] = res[trigger_shift];  
      maxres[trigger_shift] = res[trigger_shift];
      trigger_shift = -1;
      no_rel_update ++;
    }	//reliable update	
    
    //check if some shift is converged
    for(int im = 1; im < noshifts; im++) {    
      if(j > 0 && (j % 10 == 0) && (im == noshifts-1)) {
	double sn = square_norm_32(mms_d_up[im-1], N, 1);
	sn +=       square_norm_32(mms_d_dn[im-1], N, 1);
	if(alphas[noshifts-1]*alphas[noshifts-1]*sn <= eps_sq) {
	  noshifts--;
	  if( (g_debug_level > 1) && (g_cart_id == 0) ) {
	    printf("# CGMMSND_mixed: at iteration %d removed one shift, %d remaining\n", j, noshifts);
	  }
	  //if removed we add the latest solution vector for this shift 	  
	  addto_32(Pup[im], mms_x_up[im-1], N);
          addto_32(Pdn[im], mms_x_dn[im-1], N);
	}
      }
    }
       
  }//LOOP
  
  if( (g_cart_id == 0) && (g_debug_level > 1) ) printf("Final mms residue: %.6e\n", rr);

  //add the latest solutions 
  for(int im = 0; im < noshifts; im++) {  
    if(im == 0){   
      addto_32(Pup[0], x_up, N);
      addto_32(Pdn[0], x_dn, N);        
    }
    else{     
      addto_32(Pup[im], mms_x_up[im-1], N);
      addto_32(Pdn[im], mms_x_dn[im-1], N);      
    }
  } 
  
  if(g_debug_level > 4){
    if(g_cart_id == 0) printf("# CGMMSND_mixed: Checking mms result:\n");
    //loop over all shifts (-> Nshift) 
    for(int im = 0; im < Nshift; im++){
      solver_pm->M_ndpsi(sf[0], sf[1], Pup[im], Pdn[im]);
      assign_add_mul_r(sf[0], Pup[im] , shifts[im]*shifts[im], N);
      assign_add_mul_r(sf[1], Pdn[im] , shifts[im]*shifts[im], N);
      diff(sf[2], sf[0], Qup, N);
      diff(sf[3], sf[1], Qdn, N);
      rr_up = square_norm(sf[2], N, 1);
      rr_dn = square_norm(sf[3], N, 1);      
      rr = rr_up + rr_dn;
      if(g_cart_id == 0) printf("# CGMMSND_mixed: Shift[%d] squared residue: %e\n", im, rr);
    }
  }
  
 
  finalize_solver(sf, nr_sf);  
  finalize_solver_32(sf32, nr_sf32); 
 
  //free cg constants
  free(sigma); free(zitam1); free(zita); free(alphas); free(betas);    
  
  //free reliable update stuff
  free(res); free(res0); free(maxres);


  //if not converged -> return(-1)
  if(j<max_iter){
    return(j);
  }
  else{
    return(-1);
  }
}//
Example #11
0
void Msap_eo(spinor * const P, spinor * const Q, const int Ncy) {
    int blk, ncy = 0, eo, vol;
    spinor * r, * a, * b;
    double nrm;
    spinor * b_even, * b_odd, * a_even, * a_odd;
    spinor ** solver_field = NULL;
    const int nr_sf = 3;

    /*
     * here it would be probably better to get the working fields as a parameter
     * from the calling function
     */
    init_solver_field(&solver_field, VOLUME, nr_sf);
    r = solver_field[0];
    a = solver_field[1];
    b = solver_field[2];

    vol = block_list[0].volume/2;
    b_even = b;
    b_odd = b + vol + 1;
    a_even = a;
    a_odd = a + vol + 1;

    for(ncy = 0; ncy < Ncy; ncy++) {
        /* compute the global residue        */
        /* this can be done more efficiently */
        /* here only a naive implementation  */
        for(eo = 0; eo < 2; eo++) {
            D_psi(r, P);
            diff(r, Q, r, VOLUME);
            nrm = square_norm(r, VOLUME, 1);
            if(g_proc_id == 0 && g_debug_level > 1 && eo == 1) {
                printf("Msap: %d %1.3e\n", ncy, nrm);
            }
            /* choose the even (odd) block */

            for (blk = 0; blk < nb_blocks; blk++) {
                if(block_list[blk].evenodd == eo) {
                    /* get part of r corresponding to block blk into b_even and b_odd */
                    copy_global_to_block_eo(b_even, b_odd, r, blk);

                    assign_mul_one_pm_imu_inv(a_even, b_even, +1., vol);
                    Block_H_psi(&block_list[blk], a_odd, a_even, OE);
                    /* a_odd = a_odd - b_odd */
                    assign_mul_add_r(a_odd, -1., b_odd, vol);

                    mrblk(b_odd, a_odd, 3, 1.e-31, 1, vol, &Mtm_plus_block_psi, blk);

                    Block_H_psi(&block_list[blk], b_even, b_odd, EO);
                    mul_one_pm_imu_inv(b_even, +1., vol);
                    /* a_even = a_even - b_even */
                    assign_add_mul_r(a_even, b_even, -1., vol);

                    /* add even and odd part up to full spinor P */
                    add_eo_block_to_global(P, a_even, b_odd, blk);
                }
            }
        }
    }
    finalize_solver(solver_field, nr_sf);
    return;
}
Example #12
0
int invert_doublet_eo_quda(spinor * const Even_new_s, spinor * const Odd_new_s,
                           spinor * const Even_new_c, spinor * const Odd_new_c,
                           spinor * const Even_s, spinor * const Odd_s,
                           spinor * const Even_c, spinor * const Odd_c,
                           const double precision, const int max_iter,
                           const int solver_flag, const int rel_prec, const int even_odd_flag,
                           const SloppyPrecision sloppy_precision,
                           CompressionType compression) {

  spinor ** solver_field = NULL;
  const int nr_sf = 4;
  init_solver_field(&solver_field, VOLUMEPLUSRAND, nr_sf);

  convert_eo_to_lexic(solver_field[0],   Even_s,  Odd_s);
  convert_eo_to_lexic(solver_field[1],   Even_c,  Odd_c);
//  convert_eo_to_lexic(g_spinor_field[DUM_DERI+1], Even_new, Odd_new);

  void *spinorIn    = (void*)solver_field[0]; // source
  void *spinorIn_c  = (void*)solver_field[1]; // charme source
  void *spinorOut   = (void*)solver_field[2]; // solution
  void *spinorOut_c = (void*)solver_field[3]; // charme solution

  if ( rel_prec )
    inv_param.residual_type = QUDA_L2_RELATIVE_RESIDUAL;
  else
    inv_param.residual_type = QUDA_L2_ABSOLUTE_RESIDUAL;

  inv_param.kappa = g_kappa;

  // IMPORTANT: use opposite TM mu-flavor since gamma5 -> -gamma5
  inv_param.mu      = -g_mubar /2./g_kappa;
  inv_param.epsilon =  g_epsbar/2./g_kappa;


  // figure out which BC to use (theta, trivial...)
  set_boundary_conditions(&compression);

  // set the sloppy precision of the mixed prec solver
  set_sloppy_prec(sloppy_precision);

  // load gauge after setting precision
   _loadGaugeQuda(compression);

  // choose dslash type
  if( g_c_sw > 0.0 ) {
    inv_param.dslash_type = QUDA_TWISTED_CLOVER_DSLASH;
    inv_param.matpc_type = QUDA_MATPC_EVEN_EVEN;
    inv_param.solution_type = QUDA_MAT_SOLUTION;
    inv_param.clover_order = QUDA_PACKED_CLOVER_ORDER;
    inv_param.clover_coeff = g_c_sw*g_kappa;
  }
  else {
    inv_param.dslash_type = QUDA_TWISTED_MASS_DSLASH;
    inv_param.matpc_type = QUDA_MATPC_EVEN_EVEN_ASYMMETRIC;
    inv_param.solution_type = QUDA_MAT_SOLUTION;
  }

  // choose solver
  if(solver_flag == BICGSTAB) {
    if(g_proc_id == 0) {printf("# QUDA: Using BiCGstab!\n"); fflush(stdout);}
    inv_param.inv_type = QUDA_BICGSTAB_INVERTER;
  }
  else {
    /* Here we invert the hermitean operator squared */
    inv_param.inv_type = QUDA_CG_INVERTER;
    if(g_proc_id == 0) {
      printf("# QUDA: Using mixed precision CG!\n");
      printf("# QUDA: mu = %f, kappa = %f\n", g_mu/2./g_kappa, g_kappa);
      fflush(stdout);
    }
  }

  if( even_odd_flag ) {
    inv_param.solve_type = QUDA_NORMOP_PC_SOLVE;
    if(g_proc_id == 0) printf("# QUDA: Using preconditioning!\n");
  }
  else {
    inv_param.solve_type = QUDA_NORMOP_SOLVE;
    if(g_proc_id == 0) printf("# QUDA: Not using preconditioning!\n");
  }

  inv_param.tol = sqrt(precision)*0.25;
  inv_param.maxiter = max_iter;

  inv_param.twist_flavor = QUDA_TWIST_NONDEG_DOUBLET;
  inv_param.Ls = 2;

  // NULL pointers to host fields to force
  // construction instead of download of the clover field:
  if( g_c_sw > 0.0 )
    loadCloverQuda(NULL, NULL, &inv_param);

  // reorder spinor
  reorder_spinor_toQuda( (double*)spinorIn,   inv_param.cpu_prec, 1, (double*)spinorIn_c );

  // perform the inversion
  invertQuda(spinorOut, spinorIn, &inv_param);

  if( inv_param.verbosity == QUDA_VERBOSE )
    if(g_proc_id == 0)
      printf("# QUDA: Device memory used:  Spinor: %f GiB,  Gauge: %f GiB, Clover: %f GiB\n",
   inv_param.spinorGiB, gauge_param.gaugeGiB, inv_param.cloverGiB);
  if( inv_param.verbosity > QUDA_SILENT )
    if(g_proc_id == 0)
      printf("# QUDA: Done: %i iter / %g secs = %g Gflops\n",
   inv_param.iter, inv_param.secs, inv_param.gflops/inv_param.secs);

  // number of CG iterations
  int iteration = inv_param.iter;

  // reorder spinor
  reorder_spinor_fromQuda( (double*)spinorIn,    inv_param.cpu_prec, 1, (double*)spinorIn_c );
  reorder_spinor_fromQuda( (double*)spinorOut,   inv_param.cpu_prec, 1, (double*)spinorOut_c );
  convert_lexic_to_eo(Even_s,     Odd_s,     solver_field[0]);
  convert_lexic_to_eo(Even_c,     Odd_c,     solver_field[1]);
  convert_lexic_to_eo(Even_new_s, Odd_new_s, solver_field[2]);
  convert_lexic_to_eo(Even_new_c, Odd_new_c, solver_field[3]);

  finalize_solver(solver_field, nr_sf);
  freeGaugeQuda();

  if(iteration >= max_iter)
    return(-1);

  return(iteration);
}
Example #13
0
/* P output = solution , Q input = source */
int cg_her_nd(spinor * const P_up,spinor * P_dn, spinor * const Q_up, spinor * const Q_dn, 
	      const int max_iter, double eps_sq, const int rel_prec, 
	      const int N, matrix_mult_nd f) {
  double normsp, normsq, pro, err, alpha_cg, beta_cg, squarenorm;
  int iteration;
  double err1, err2;
  spinor ** up_field = NULL;
  spinor ** dn_field = NULL;  
  const int nr_sf = 5;
  /* do we really need so many fields??? */
  init_solver_field(&up_field, VOLUMEPLUSRAND, nr_sf);
  init_solver_field(&dn_field, VOLUMEPLUSRAND, nr_sf);

  squarenorm = square_norm(Q_up, N, 1);
  squarenorm+= square_norm(Q_dn, N, 1);
  /*        !!!!   INITIALIZATION    !!!! */
  assign(up_field[0], P_up, N);
  assign(dn_field[0], P_dn, N);
  
  /*        (r_0,r_0)  =  normsq         */
  normsp =square_norm(P_up, N, 1);
  normsp+=square_norm(P_dn, N, 1);

/*   assign(up_field[5], Q_up, N); */
/*   assign(dn_field[5], Q_dn, N); */
  
  /* initialize residue r and search vector p */
  if(normsp==0){
    /* if a starting solution vector equal to zero is chosen */
    assign(up_field[1], Q_up, N);
    assign(dn_field[1], Q_dn, N);
    assign(up_field[2], Q_up, N);
    assign(dn_field[2], Q_dn, N);
    normsq =square_norm(Q_up, N, 1);
    normsq+=square_norm(Q_dn, N, 1);
  }
  else {
    /* if a starting solution vector different from zero is chosen */
    f(up_field[3],dn_field[3],
      up_field[0],dn_field[0]);
   
    diff(up_field[1], Q_up, up_field[3], N);
    diff(dn_field[1], Q_dn, dn_field[3], N);
    assign(up_field[2], up_field[1], N);
    assign(dn_field[2], dn_field[1], N);
    normsq =square_norm(up_field[2], N, 1);
    normsq+=square_norm(dn_field[2], N, 1);
  }

  /* main loop */
  for(iteration=0;iteration<max_iter;iteration++){
    f(up_field[4],dn_field[4],
      up_field[2],dn_field[2]);

    pro =scalar_prod_r(up_field[2], up_field[4], N, 1);
    pro+=scalar_prod_r(dn_field[2], dn_field[4], N, 1);
     
    /*  Compute alpha_cg(i+1)   */
    alpha_cg=normsq/pro;
     
    /*  Compute x_(i+1) = x_i + alpha_cg(i+1) p_i    */
    assign_add_mul_r(up_field[0], up_field[2],  alpha_cg, N);
    assign_add_mul_r(dn_field[0], dn_field[2],  alpha_cg, N);
    /*  Compute r_(i+1) = r_i - alpha_cg(i+1) Qp_i   */
    assign_add_mul_r(up_field[1], up_field[4], -alpha_cg, N);
    assign_add_mul_r(dn_field[1], dn_field[4], -alpha_cg, N);

    /* Check whether the precision is reached ... */
    err1 =square_norm(up_field[1], N, 1);
    err2 =square_norm(dn_field[1], N, 1);
    err = err1 + err2;
    if(g_debug_level > 1 && g_proc_id == g_stdio_proc) {
      printf("cg_her_nd : i = %d  esqr  %e = %e + %e \n",iteration,err, err1, err2); fflush( stdout);
    }

    if(((err <= eps_sq) && (rel_prec == 0)) || ((err <= eps_sq*squarenorm) && (rel_prec == 1))) {
      assign(P_up, up_field[0], N);
      assign(P_dn, dn_field[0], N);
      g_sloppy_precision = 0;
      finalize_solver(up_field, nr_sf);
      finalize_solver(dn_field, nr_sf);
      return(iteration+1);
    }
#ifdef _USE_HALFSPINOR
    if(((err*err <= eps_sq) && (rel_prec == 0)) || ((err*err <= eps_sq*squarenorm) && (rel_prec == 1))) {
      g_sloppy_precision = 1;
      if(g_debug_level > 2 && g_proc_id == g_stdio_proc) {
	printf("sloppy precision on\n"); fflush( stdout);
      }
    }
#endif
    /* Compute beta_cg(i+1)
       Compute p_(i+1) = r_i+1 + beta_(i+1) p_i     */
    beta_cg=err/normsq;
    assign_mul_add_r(up_field[2], beta_cg, up_field[1], N);
    assign_mul_add_r(dn_field[2], beta_cg, dn_field[1], N);
    normsq=err;
  }

  assign(P_up, up_field[0], N);
  assign(P_dn, dn_field[0], N);
  g_sloppy_precision = 0;  
  
  finalize_solver(up_field, nr_sf);
  finalize_solver(dn_field, nr_sf);
  return(-1);
}
Example #14
0
int gmres_dr(spinor * const P,spinor * const Q, 
	  const int m, const int nr_ev, const int max_restarts,
	  const double eps_sq, const int rel_prec,
	  const int N, matrix_mult f){

  int restart=0, i, j, k, l;
  double beta, eps, norm, beta2=0.;
  complex *lswork = NULL;
  int lwork;
  complex tmp1, tmp2;
  int info=0;
  int _m = m, mp1 = m+1, np1 = nr_ev+1, ne = nr_ev, V2 = 12*(VOLUMEPLUSRAND)/2, _N = 12*N;
  spinor ** solver_field = NULL;
  const int nr_sf = 3;

  if(N == VOLUME) {
    init_solver_field(&solver_field, VOLUMEPLUSRAND, nr_sf);
  }
  else {
    init_solver_field(&solver_field, VOLUMEPLUSRAND/2, nr_sf);
  }
  double err=0.;
  spinor * r0, * x0;

  cmone.re = -1.; cmone.im=0.;
  cpone.re = 1.; cpone.im=0.;
  czero.re = 0.; czero.im = 0.;
  
  r0 = solver_field[0];
  x0 = solver_field[2];
  eps=sqrt(eps_sq);  
  init_gmres_dr(m, (VOLUMEPLUSRAND));
  norm = sqrt(square_norm(Q, N, 1));

  assign(x0, P, N);

  /* first normal GMRES cycle */
  /* r_0=Q-AP  (b=Q, x+0=P) */
  f(r0, x0);
  diff(r0, Q, r0, N);
  
  /* v_0=r_0/||r_0|| */
  alpha[0].re=sqrt(square_norm(r0, N, 1));
  err = alpha[0].re;
  
  if(g_proc_id == g_stdio_proc && g_debug_level > 0){
    printf("%d\t%e true residue\n", restart*m, alpha[0].re*alpha[0].re); 
    fflush(stdout);
  }
  
  if(alpha[0].re==0.){
    assign(P, x0, N);
    finalize_solver(solver_field, nr_sf);
    return(restart*m);
  }
  
  mul_r(V[0], 1./alpha[0].re, r0, N);
  
  for(j = 0; j < m; j++){
    /* solver_field[0]=A*v_j */

    /* Set h_ij and omega_j */
    /* solver_field[1] <- omega_j */    
    f(solver_field[1], V[j]);
/*     assign(solver_field[1], solver_field[0], N); */
    for(i = 0; i <= j; i++){
      H[i][j] = scalar_prod(V[i], solver_field[1], N, 1);
      /* G, work and work2 are in Fortran storage: columns first */
      G[j][i] = H[i][j];
      work2[j][i] = H[i][j];
      work[i][j].re = H[i][j].re;
      work[i][j].im = -H[i][j].im;
      assign_diff_mul(solver_field[1], V[i], H[i][j], N);
    }
    
    _complex_set(H[j+1][j], sqrt(square_norm(solver_field[1], N, 1)), 0.);
    G[j][j+1] = H[j+1][j];
    work2[j][j+1] = H[j+1][j];
    work[j+1][j].re =  H[j+1][j].re;
    work[j+1][j].im =  -H[j+1][j].im;
    beta2 = H[j+1][j].re*H[j+1][j].re; 
    for(i = 0; i < j; i++){
      tmp1 = H[i][j];
      tmp2 = H[i+1][j];
      _mult_real(H[i][j], tmp2, s[i]);
      _add_assign_complex_conj(H[i][j], c[i], tmp1);
      _mult_real(H[i+1][j], tmp1, s[i]);
      _diff_assign_complex(H[i+1][j], c[i], tmp2);
    }
    
    /* Set beta, s, c, alpha[j],[j+1] */
    beta = sqrt(_complex_square_norm(H[j][j]) + _complex_square_norm(H[j+1][j]));
    s[j] = H[j+1][j].re / beta;
    _mult_real(c[j], H[j][j], 1./beta);
    _complex_set(H[j][j], beta, 0.);
    _mult_real(alpha[j+1], alpha[j], s[j]);
    tmp1 = alpha[j];
    _mult_assign_complex_conj(alpha[j], c[j], tmp1);
    
    /* precision reached? */
    if(g_proc_id == g_stdio_proc && g_debug_level > 0){
      printf("%d\t%e residue\n", restart*m+j, alpha[j+1].re*alpha[j+1].re); 
      fflush(stdout);
    }
    if(((alpha[j+1].re <= eps) && (rel_prec == 0)) || ((alpha[j+1].re <= eps*norm) && (rel_prec == 1))){
      _mult_real(alpha[j], alpha[j], 1./H[j][j].re);
      assign_add_mul(x0, V[j], alpha[j], N);
      for(i = j-1; i >= 0; i--){
	for(k = i+1; k <= j; k++){
	  _mult_assign_complex(tmp1, H[i][k], alpha[k]); 
	  /* alpha[i] -= tmp1 */
	  _diff_complex(alpha[i], tmp1);
	}
	_mult_real(alpha[i], alpha[i], 1./H[i][i].re);
	assign_add_mul(x0, V[i], alpha[i], N);
      }
      for(i = 0; i < m; i++){
	alpha[i].im = 0.;
      }
      assign(P, x0, N);
      finalize_solver(solver_field, nr_sf);
      return(restart*m+j);
    }
    /* if not */
    else {
      mul_r(V[(j+1)], 1./H[j+1][j].re, solver_field[1], N); 
    }
    
  }
  j=m-1;
  /* prepare for restart */
  _mult_real(alpha[j], alpha[j], 1./H[j][j].re);
  assign_add_mul(x0, V[j], alpha[j], N);
  if(g_proc_id == 0 && g_debug_level > 3) {
    printf("alpha: %e %e\n", alpha[j].re, alpha[j].im);
  }
  for(i = j-1; i >= 0; i--){
    for(k = i+1; k <= j; k++){
      _mult_assign_complex(tmp1, H[i][k], alpha[k]);
      _diff_complex(alpha[i], tmp1);
    }
    _mult_real(alpha[i], alpha[i], 1./H[i][i].re);
    if(g_proc_id == 0 && g_debug_level > 3) {
      printf("alpha: %e %e\n", alpha[i].re, alpha[i].im);
    }
    assign_add_mul(x0, V[i], alpha[i], N);
  }

  /* This produces c=V_m+1*r0 */
  for(i = 0; i < mp1; i++) {
    c[i] = scalar_prod(V[i], r0, N, 1); 
    if(g_proc_id == 0 && g_debug_level > 3) {
      printf("c: %e %e err = %e\n", c[i].re, c[i].im, err);
    }
  }

  for(restart = 1; restart < max_restarts; restart++) {  

    /* compute c-\bar H \alpha */
    _FT(zgemv) ("N", &mp1, &_m, &cmone, G[0], &mp1, alpha, &one, &cpone, c, &one, 1);
    err = sqrt(short_scalar_prod(c, c, mp1).re);
    if(g_proc_id == 0 && g_debug_level > 0) {
      printf("%d\t %e short residue\n", m*restart, err*err);
    } 
    
    /* Compute new residual r0 */
    /* r_0=Q-AP  (b=Q, x+0=P) */
    if(g_debug_level > 0) {
      f(r0, x0);
      diff(r0, Q, r0, N);
      tmp1.im=sqrt(square_norm(r0, N, 1));
      if(g_proc_id == g_stdio_proc){
	printf("%d\t%e true residue\n", m*restart, tmp1.im*tmp1.im); 
	fflush(stdout);
      }
    }
    mul(r0, c[0], V[0], N);
    for(i = 1; i < mp1; i++) {
      assign_add_mul(r0, V[i], c[i], N);
    } 
    if(g_debug_level > 3) {
      tmp1.im=sqrt(square_norm(r0, N, 1));
      if(g_proc_id == g_stdio_proc){
	printf("%d\t%e residue\n", m*restart, tmp1.im*tmp1.im); 
	fflush(stdout);
      }
    }
    /* Stop if satisfied */
    if(err < eps){
      assign(P, x0, N);
      finalize_solver(solver_field, nr_sf);
      return(restart*m);
    }

    /* Prepare to compute harmonic Ritz pairs */
    for(i = 0; i < m-1; i++){
      alpha[i].re = 0.;
      alpha[i].im = 0.;
    }
    alpha[m-1].re = 1.;
    alpha[m-1].im = 0.;
    _FT(zgesv) (&_m, &one, work[0], &mp1, idx, alpha, &_m, &info); 
    for(i = 0; i < m; i++) {
      G[m-1][i].re += (beta2*alpha[idx[i]-1].re);
      G[m-1][i].im += (beta2*alpha[idx[i]-1].im);
    }
    if(g_proc_id == 0 && g_debug_level > 3){
      printf("zgesv returned info = %d, c[m-1]= %e, %e , idx[m-1]=%d\n", 
	     info, alpha[idx[m-1]-1].re, alpha[idx[m-1]-1].im, idx[m-1]);
    }
    /* c - \bar H * d -> c */
    /* G contains H + \beta^2 H^-He_n e_n^H */

    /* Compute harmonic Ritz pairs */
    diagonalise_general_matrix(m, G[0], mp1, alpha, evalues);
    for(i = 0; i < m; i++) {
      sortarray[i] = _complex_square_norm(evalues[i]);
      idx[i] = i;
    }
    quicksort(m, sortarray, idx);
    if(g_proc_id == g_stdio_proc && g_debug_level > 1) {
      for(i = 0; i < m; i++) {
	printf("# Evalues %d %e  %e \n", i, evalues[idx[i]].re, evalues[idx[i]].im);
      }
      fflush(stdout);
    }
    
    /* Copy the first nr_ev eigenvectors to work */
    for(i = 0; i < ne; i++) {
      for(l = 0; l < m; l++) {
	work[i][l] = G[idx[i]][l];
      }
    }
    /* Orthonormalize them */
    for(i = 0; i < ne; i++) {
      work[i][m].re = 0.;
      work[i][m].im = 0.;
      short_ModifiedGS(work[i], m, i, work[0], mp1); 
    }
    /* Orthonormalize c - \bar H d to work */
    short_ModifiedGS(c, m+1, ne, work[0], mp1);
    for(i = 0; i < mp1; i++) {
      work[nr_ev][i] = c[i];
    }
    /* Now compute \bar H = P^T_k+1 \bar H_m P_k */
    for(i = 0; i < mp1; i++) {
      for(l = 0; l < mp1; l++) {
	H[i][l].re = 0.;
	H[i][l].im = 0.;
      }
    }    

    _FT(zgemm) ("N", "N", &mp1, &ne, &_m, &cpone, work2[0], &mp1, work[0], &mp1, &czero, G[0], &mp1, 1, 1); 
    _FT(zgemm) ("C", "N", &np1, &ne , &mp1, &cpone, work[0], &mp1, G[0], &mp1, &czero, H[0], &mp1, 1, 1);

    if(g_debug_level > 3) {
      for(i = 0; i < ne+1; i++) {
	for(l = 0; l < ne+1; l++) {
	  if(g_proc_id == 0) {
	    printf("(g[%d], g[%d]) = %e, %e\n", i, l, short_scalar_prod(work[i], work[l], m+1).re, 
		   short_scalar_prod(work[i], work[l], m+1).im);
	    printf("(g[%d], g[%d]) = %e, %e\n", l, i, short_scalar_prod(work[l], work[i], m+1).re, 
		   short_scalar_prod(work[l], work[i], m+1).im);
	  }
	}
      }
    }
    /* V_k+1 = V_m+1 P_k+1 */
/*     _FT(zgemm) ("N", "N", &_N, &np1, &mp1, &cpone, (complex*)V[0], &V2, work[0], &mp1, &czero, (complex*)Z[0], &V2, 1, 1);  */
    for(l = 0; l < np1; l++) {
      mul(Z[l], work[l][0], V[0], N);
      for(i = 1; i < mp1; i++) {
	assign_add_mul(Z[l], V[i], work[l][i], N);
      }
    }
    /* copy back to V */
    for(i = 0; i < np1; i++) {
      assign(V[i], Z[i], N); 
    }
    /* Reorthogonalise v_nr_ev */
    ModifiedGS((complex*)V[nr_ev], _N, nr_ev, (complex*)V[0], V2);  
    if(g_debug_level > 3) {
      for(i = 0; i < np1; i++) {
	for(l = 0; l < np1; l++) {
	  tmp1 = scalar_prod(V[l], V[i], N, 1);
	  if(g_proc_id == 0) {
	    printf("(V[%d], V[%d]) = %e %e %d %d %d %d %d %d %e %e\n", l, i, tmp1.re, tmp1.im, np1, mp1, ne, _m, _N, V2, H[l][i].re, H[l][i].im);
	  }
	}
      }
    }
    /* Copy the content of H to work, work2 and G */
    for(i=0; i < mp1; i++) { 
      for(l = 0; l < mp1; l++) { 
 	G[i][l] = H[i][l];
	work2[i][l] = H[i][l];
	work[l][i].re = H[i][l].re;
	work[l][i].im = -H[i][l].im;
      }
    }

    for(j = ne; j < m; j++) {
      /* solver_field[0]=A*v_j */
      f(solver_field[1], V[j]);
      
      /* Set h_ij and omega_j */
      /* solver_field[1] <- omega_j */
/*       assign(solver_field[1], solver_field[0], N); */
      for(i = 0; i <= j; i++){
	H[j][i] = scalar_prod(V[i], solver_field[1], N, 1);  
	/* H, G, work and work2 are now all in Fortran storage: columns first */
	G[j][i] = H[j][i];
	work2[j][i] = H[j][i];
	work[i][j].re = H[j][i].re;
	work[i][j].im = -H[j][i].im;
	assign_diff_mul(solver_field[1], V[i], H[j][i], N);
      }
      beta2 = square_norm(solver_field[1], N, 1);
      _complex_set(H[j][j+1], sqrt(beta2), 0.);
      G[j][j+1] = H[j][j+1];
      work2[j][j+1] = H[j][j+1];
      work[j+1][j].re =  H[j][j+1].re;
      work[j+1][j].im =  -H[j][j+1].im;
      mul_r(V[(j+1)], 1./H[j][j+1].re, solver_field[1], N);
    }

    /* Solve the least square problem for alpha*/
    /* This produces c=V_m+1*r0 */
    for(i = 0; i < mp1; i++) {      
      c[i] = scalar_prod(V[i], r0, N, 1);  
      alpha[i] = c[i];
      if(g_proc_id == 0 && g_debug_level > 3) {
	printf("c: %e %e err = %e\n", c[i].re, c[i].im, err);
      }
    }
    if(lswork == NULL) {
      lwork = -1;
      _FT(zgels) ("N", &mp1, &_m, &one, H[0], &mp1, alpha, &mp1, &tmp1, &lwork, &info, 1);
      lwork = (int)tmp1.re;
      lswork = (complex*)malloc(lwork*sizeof(complex));
    }
    _FT(zgels) ("N", &mp1, &_m, &one, H[0], &mp1, alpha, &mp1, lswork, &lwork, &info, 1);
    if(g_proc_id == 0 && g_debug_level > 3) {
      printf("zgels returned info = %d\n", info);
      fflush(stdout);
    }
    /* Compute the new solution vector */
    for(i = 0; i < m; i++){
      if(g_proc_id == 0 && g_debug_level > 3) {
	printf("alpha: %e %e\n", alpha[i].re, alpha[i].im);
      }
      assign_add_mul(x0, V[i], alpha[i], N);
    }
  }


  /* If maximal number of restart is reached */
  assign(P, x0, N);
  finalize_solver(solver_field, nr_sf);
  return(-1);
}
Example #15
0
/* P output = solution , Q input = source */
int mixed_cg_her(spinor * const P, spinor * const Q, solver_params_t solver_params, 
                 const int max_iter, double eps_sq, const int rel_prec, const int N,
                 matrix_mult f, matrix_mult32 f32) {

  int i = 0, iter = 0, j = 0;
  float sqnrm = 0., sqnrm2, squarenorm;
  float pro, err, alpha_cg, beta_cg;
  double sourcesquarenorm, sqnrm_d, squarenorm_d;
  spinor *delta, *y, *xhigh;
  spinor32 *x, *stmp;
  spinor ** solver_field = NULL;
  spinor32 ** solver_field32 = NULL;  
  const int nr_sf = 3;
  const int nr_sf32 = 4;

  int max_inner_it = mixcg_maxinnersolverit;
  int N_outer = max_iter/max_inner_it;
  //to be on the save side we allow at least 10 outer iterations
  if(N_outer < 10) N_outer = 10;
  
  int save_sloppy = g_sloppy_precision_flag;
  double atime, etime, flops;
  
  if(N == VOLUME) {
    init_solver_field(&solver_field, VOLUMEPLUSRAND, nr_sf);    
    init_solver_field_32(&solver_field32, VOLUMEPLUSRAND, nr_sf32);
  }
  else {
    init_solver_field(&solver_field, VOLUMEPLUSRAND/2, nr_sf);
    init_solver_field_32(&solver_field32, VOLUMEPLUSRAND/2, nr_sf32);    
  }

  squarenorm_d = square_norm(Q, N, 1);
  sourcesquarenorm = squarenorm_d;
  sqnrm_d = squarenorm_d;
 
  delta = solver_field[0];
  y = solver_field[1];
  xhigh = solver_field[2];
  x = solver_field32[3];   
  assign(delta, Q, N);
  
  //set solution to zero
  zero_spinor_field(P, N);
  
  atime = gettime();
  for(i = 0; i < N_outer; i++) {

    /* main CG loop in lower precision */
    zero_spinor_field_32(x, N);
    zero_spinor_field_32(solver_field32[0], N);   
    assign_to_32(solver_field32[1], delta, N);
    assign_to_32(solver_field32[2], delta, N);
    
    sqnrm = (float) sqnrm_d;
    sqnrm2 = sqnrm;
    
    /*inner CG loop */
    for(j = 0; j <= max_inner_it; j++) {
      
      f32(solver_field32[0], solver_field32[2]); 
      pro = scalar_prod_r_32(solver_field32[2], solver_field32[0], N, 1);
      alpha_cg = sqnrm2 / pro;
      
      assign_add_mul_r_32(x, solver_field32[2], alpha_cg, N);
      
      assign_mul_add_r_32(solver_field32[0], -alpha_cg, solver_field32[1], N);      
      
      err = square_norm_32(solver_field32[0], N, 1);

      if(g_proc_id == g_stdio_proc && g_debug_level > 2) {
	      printf("inner CG: %d res^2 %g\n", iter+j, err);
	      fflush(stdout);
      }
    
      //if (((err <= eps_sq) && (rel_prec == 0)) || ((err <= eps_sq*squarenorm) && (rel_prec == 1))){
      if((err <= mixcg_innereps*sqnrm)|| (j==max_inner_it) ||  ((1.3*err <= eps_sq) && (rel_prec == 0)) || ((1.3*err <= eps_sq*sourcesquarenorm) && (rel_prec == 1))) {
	      break;
      }
      beta_cg = err / sqnrm2;
      assign_mul_add_r_32(solver_field32[2], beta_cg, solver_field32[0], N);
      stmp = solver_field32[0];
      solver_field32[0] = solver_field32[1];
      solver_field32[1] = stmp;
      sqnrm2 = err;
    }
    /* end inner CG loop */
    iter += j;

    /* we want to apply a true double matrix with f(y,P) -> set sloppy off here*/
    g_sloppy_precision_flag = 0;
    
    /* calculate defect in double precision */
    assign_to_64(xhigh, x, N);    
    add(P, P, xhigh, N);
    f(y, P);
    diff(delta, Q, y, N);
    sqnrm_d = square_norm(delta, N, 1);
    if(g_debug_level > 2 && g_proc_id == 0) {
      printf("mixed CG: last inner residue: %g\t\n", err);
      printf("mixed CG: true residue %d %g\t\n",iter, sqnrm_d); fflush(stdout);
    }
    
    /* here we can reset it to its initial value*/
    g_sloppy_precision_flag = save_sloppy;
    
    if(((sqnrm_d <= eps_sq) && (rel_prec == 0)) || ((sqnrm_d <= eps_sq*sourcesquarenorm) && (rel_prec == 1))) {
      etime = gettime();     

      if(g_debug_level > 0 && g_proc_id == 0) {
      	if(N != VOLUME){
      	  /* 2 A + 2 Nc Ns + N_Count ( 2 A + 10 Nc Ns ) */
      	  /* 2*1608.0 because the linalg is over VOLUME/2 */
      	  flops = (2*(2*1608.0+2*3*4) + 2*3*4 + iter*(2.*(2*1608.0+2*3*4) + 10*3*4))*N/1.0e6f;
      	  printf("# mixed CG: iter: %d eps_sq: %1.4e t/s: %1.4e\n", iter, eps_sq, etime-atime); 
      	  printf("# mixed CG: flopcount (for e/o tmWilson only): t/s: %1.4e mflops_local: %.1f mflops: %.1f\n", 
      	      etime-atime, flops/(etime-atime), g_nproc*flops/(etime-atime));
      	}
      	else{
      	  /* 2 A + 2 Nc Ns + N_Count ( 2 A + 10 Nc Ns ) */
      	  flops = (2*(1608.0+2*3*4) + 2*3*4 + iter*(2.*(1608.0+2*3*4) + 10*3*4))*N/1.0e6f;      
      	  printf("# mixed CG: iter: %d eps_sq: %1.4e t/s: %1.4e\n", iter, eps_sq, etime-atime); 
      	  printf("# mixed CG: flopcount (for non-e/o tmWilson only): t/s: %1.4e mflops_local: %.1f mflops: %.1f\n", 
      	      etime-atime, flops/(etime-atime), g_nproc*flops/(etime-atime));      
      	}
      }      
      
      finalize_solver(solver_field, nr_sf);
      finalize_solver_32(solver_field32, nr_sf32); 
      return(iter+i);
    }
    iter++;
  }
  finalize_solver(solver_field, nr_sf);
  finalize_solver_32(solver_field32, nr_sf32); 
  return(-1);
}
Example #16
0
void Msap_eo_old(spinor * const P, spinor * const Q, const int Ncy, const int Niter) {
  int blk, ncy = 0, eo, vol, vols;
  spinor * r, * a, * b, * c;
  double nrm;
  double musave = g_mu;
  double kappasave = g_kappa;
  spinor * b_even, * b_odd, * a_even, * a_odd;
  spinor ** solver_field = NULL;
  // also get space for mrblk! 6 = 3+3
  const int nr_sf = 6;

  if(kappa_dflgen > 0) {
    g_kappa = kappa_dfl;
  }
  if(mu_dflgen > -10) {
    g_mu = mu_dfl;
    // make sure the sign is correct!
    if(g_mu*musave < 0) g_mu *= -1.;
  }
  boundary(g_kappa);
  /* 
   * here it would be probably better to get the working fields as a parameter 
   * from the calling function
   */
  vols = block_list[0].volume/2+block_list[0].spinpad;
  vol = block_list[0].volume/2;

  init_solver_field(&solver_field, nb_blocks*2*vols, nr_sf);
  r = solver_field[0];
  a = solver_field[1];
  b = solver_field[2];

  for(ncy = 0; ncy < Ncy; ncy++) {
    /* compute the global residue        */
    /* this can be done more efficiently */
    /* here only a naive implementation  */
    for(eo = 0; eo < 2; eo++) {
      D_psi(r, P);
      diff(r, Q, r, VOLUME);
      nrm = square_norm(r, VOLUME, 1);
      if(g_proc_id == 0 && g_debug_level > 2 && eo == 0) {
	printf("Msap_eo: %d %1.3e mu = %e\n", ncy, nrm, g_mu/2./g_kappa);
	fflush(stdout);
      }
      /* choose the even (odd) block */

      // rely on nested parallelism
      // 
      #ifdef TM_USE_OMP
      # pragma omp parallel for private (a_even, a_odd, b_even, b_odd, c)
      #endif
      for (blk = 0; blk < nb_blocks; blk++) {
 	b_even = b + blk*2*vols;
 	b_odd = b +blk*2*vols + vols;
 	a_even = a + blk*2*vols;
 	a_odd = a + blk*2*vols + vols;
	c = solver_field[3] + blk*vols;

	if(block_list[blk].evenodd == eo) {
	  /* get part of r corresponding to block blk into b_even and b_odd */

	  copy_global_to_block_eo(b_even, b_odd, r, blk);
	  if(g_c_sw > 0) {
	    assign_mul_one_sw_pm_imu_inv_block(EE, a_even, b_even, g_mu, &block_list[blk]);
	    Block_H_psi(&block_list[blk], a_odd, a_even, OE);
	    /* a_odd = b_odd - a_odd */
	    diff(a_odd, b_odd, a_odd, vol);

	    mrblk(b_odd, a_odd, solver_field[3] + blk*2*3*vols, Niter, 1.e-31, 1, vol, &Msw_plus_block_psi, blk);
	    
	    Block_H_psi(&block_list[blk], b_even, b_odd, EO);
	    assign(c, b_even, vol);
	    assign_mul_one_sw_pm_imu_inv_block(EE, b_even, c, g_mu, &block_list[blk]);
	  }
	  else {
	    assign_mul_one_pm_imu_inv(a_even, b_even, +1., vol);
	    Block_H_psi(&block_list[blk], a_odd, a_even, OE);
	    /* a_odd = b_odd - a_odd */
	    diff(a_odd, b_odd, a_odd, vol);

	    mrblk(b_odd, a_odd, solver_field[3] + blk*2*3*vols, Niter, 1.e-31, 1, vol, &Mtm_plus_block_psi, blk);

	    Block_H_psi(&block_list[blk], b_even, b_odd, EO);
	    mul_one_pm_imu_inv(b_even, +1., vol);
	  }
	  /* a_even = a_even - b_even */
	  diff(a_even, a_even, b_even, vol);

	  /* add even and odd part up to full spinor P */
	  add_eo_block_to_global(P, a_even, b_odd, blk);

	}
      }
    }
  }
  finalize_solver(solver_field, nr_sf);
  g_mu = musave;
  g_kappa = kappasave;
  boundary(g_kappa);
  return;
}
Example #17
0
int bicgstabell(spinor * const x0, spinor * const b, const int max_iter, 
		double eps_sq, const int rel_prec, const int _l, const int N, matrix_mult f) {

  double err;
  int i, j, k, l;
  double rho0, rho1, beta, alpha, omega, gamma0 = 0., squarenorm;
  spinor * r[5], * u[5], * r0_tilde, * x;
  double tau[5][5], gamma[25], gammap[25], gammapp[25], sigma[25];
  spinor ** solver_field = NULL;
  const int nr_sf = 2*(_l+1)+2;

  l = _l;
  k = -l;

  if(N == VOLUME) {
    init_solver_field(&solver_field, VOLUMEPLUSRAND, nr_sf);
  }
  else {
    init_solver_field(&solver_field, VOLUMEPLUSRAND/2, nr_sf);
  }
  r0_tilde = solver_field[0];
  for(i = 0; i <= l; i++){
    r[i] = solver_field[2+2*i];
    u[i] = solver_field[3+2*i];
  }

  x = x0; 
  assign(u[0], b, N);
  f(r0_tilde, x);
  diff(r[0], u[0], r0_tilde, N);
  zero_spinor_field(solver_field[1], N);
  assign(r0_tilde, r[0], N);
  squarenorm = square_norm(b, N, 1);

  rho0 = 1.;
  alpha = 0.;
  omega = 1.;
  err = square_norm(r0_tilde, N, 1);
  while( k < max_iter && (((err > eps_sq) && (rel_prec == 0)) 
			  || ((err > eps_sq*squarenorm) && (rel_prec == 1)) 
			  )) {
    k+=l;

    /* The BiCG part */

    rho0 *= -omega;
    for(j = 0; j < l; j++) {
      rho1 = scalar_prod_r(r[j], r0_tilde, N, 1);
      beta = (rho1/rho0);
      beta *= alpha; 
      rho0 = rho1;
      for(i = 0; i <= j; i++) {
	/* u_i = r_i - \beta u_i */
	assign_mul_add_r(u[i], -beta, r[i], N);
      }
      f(u[j+1], u[j]);
      gamma0 = scalar_prod_r(u[j+1], r0_tilde, N, 1);
      alpha = rho0/gamma0;
      /* r_i = r_i - \alpha u_{i+1} */
      for(i = 0; i <= j; i++) {
	assign_add_mul_r(r[i], u[i+1], -alpha, N);
      }
      f(r[j+1], r[j]);
      /* x = x + \alpha u_0 */
      assign_add_mul_r(x, u[0], alpha, N);
      err = square_norm(r[j+1], N, 1);
      if(g_proc_id == 0 && g_debug_level > 1) {printf("%d %d err = %e\n", k, j, err);fflush(stdout);}
    }

    /* The MR part */

    for(j = 1; j <= l; j++){
      for(i = 1; i < j; i++){
	tau[i][j] = scalar_prod_r(r[j], r[i], N, 1)/sigma[i];
	assign_add_mul_r(r[j], r[i], -tau[i][j], N);
      }
      sigma[j] = scalar_prod_r(r[j], r[j], N, 1);
      gammap[j] = scalar_prod_r(r[0], r[j], N, 1)/sigma[j];
    }
    gamma[l] = gammap[l];
    omega = gamma[l];
    for(j = l-1; j > 0; j--) {
      gamma[j] = gammap[j];
      for(i = j+1; i <= l; i++) {
	gamma[j] -= (tau[j][i]*gamma[i]);
      }
    }
    for(j = 1; j < l; j++) {
      gammapp[j] = gamma[j+1];
      for(i = j+1; i < l; i++){
	gammapp[j] += (tau[j][i]*gamma[i+1]);
      }
    }
    assign_add_mul_r(x, r[0], gamma[1], N);
    assign_add_mul_r(r[0], r[l], -gammap[l], N);
    for(j = 1; j < l; j++){
      assign_add_mul_r(x, r[j], gammapp[j], N);
      assign_add_mul_r(r[0], r[j], -gammap[j], N);
    }
    assign_add_mul_r(u[0], u[l], -gamma[l], N);
    for(j = 1; j < l; j++){
      assign_add_mul_r(u[0], u[j], -gamma[j], N);
    }
    err = square_norm(r[0], N, 1);
    if(g_proc_id == 0 && g_debug_level > 0){
      printf(" BiCGstabell iterated %d %d, %e rho0 = %e, alpha = %e, gamma0= %e\n", l, k, err, rho0, alpha, gamma0);
      fflush( stdout );
    }
  }
  finalize_solver(solver_field, nr_sf);
  if(k == max_iter) return(-1);
  return(k);
}
Example #18
0
void Msap_eo(spinor * const P, spinor * const Q, const int Ncy, const int Niter) {
  int ncy = 0, vol, vols;
  spinor * r, * a, * b;
  double nrm;
  double musave = g_mu;
  double kappasave = g_kappa;
  spinor ** solver_field = NULL;
  // also get space for mrblk! 6 = 3+3
  const int nr_sf = 6;

  if(kappa_Msap > 0) {
    g_kappa = kappa_Msap;
  }
  if(mu_Msap > -10) {
    g_mu = mu_Msap;
    // make sure the sign is correct!
    if(g_mu*musave < 0) g_mu *= -1.;
  }
  boundary(g_kappa);
  /* 
   * here it would be probably better to get the working fields as a parameter 
   * from the calling function
   */
  vols = block_list[0].volume/2+block_list[0].spinpad;
  vol = block_list[0].volume/2;

  init_solver_field(&solver_field, nb_blocks*2*vols, nr_sf);
  r = solver_field[0];
  a = solver_field[1];
  b = solver_field[2];

  int * blk_e_list = malloc(nb_blocks/2*sizeof(int));
  int * blk_o_list = malloc(nb_blocks/2*sizeof(int));
  int iblke = 0, iblko = 0;
  for(int blk = 0; blk < nb_blocks; blk++) {
    if (block_list[blk].evenodd == 0) {
      blk_e_list[iblke] = blk;
      iblke++;
    }
    else {
      blk_o_list[iblko] = blk;
      iblko++;
    }
  }

  for(ncy = 0; ncy < Ncy; ncy++) {
    /* compute the global residue        */
    /* this can be done more efficiently */
    /* here only a naive implementation  */
    for(int eo = 0; eo < 2; eo++) {
      D_psi(r, P);
      diff(r, Q, r, VOLUME);
      nrm = square_norm(r, VOLUME, 1);
      if(g_proc_id == 0 && g_debug_level > 2 && eo == 0) {
	printf("Msap_eo: %d %1.3e mu = %e\n", ncy, nrm, g_mu/2./g_kappa);
	fflush(stdout);
      }
      int * blk_eo_list;
      if(eo == 0) {
	blk_eo_list = blk_e_list;
      }
      else {
	blk_eo_list = blk_o_list;
      }
      /* choose the even (odd) block */
      // rely on nested parallelism
      // 
      #ifdef TM_USE_OMP
      # pragma omp parallel for 
      #endif
      for (int iblk = 0; iblk < nb_blocks/2; iblk++) {
	int blk = blk_eo_list[iblk];
 	spinor32 * b_even = (spinor32*) (b + blk*2*vols);
 	spinor32 * b_odd = (spinor32*) (b +blk*2*vols + vols);
 	spinor32 * a_even = (spinor32*) (a + blk*2*vols);
 	spinor32 * a_odd = (spinor32*) (a + blk*2*vols + vols);
        // mrblk needs 3 solver fields which we distribute according to the block number
	spinor32 * c = (spinor32*) (solver_field[3] + blk*2*3*vols);

	/* get part of r corresponding to block blk into b_even and b_odd */
	copy_global_to_block_eo_32(b_even, b_odd, r, blk);
	if(g_c_sw > 0) {
	  assign_mul_one_sw_pm_imu_inv_block_32(EE, a_even, b_even, g_mu, &block_list[blk]);
	  Block_H_psi_32(&block_list[blk], a_odd, a_even, OE);
	  /* a_odd = b_odd - a_odd */
	  diff_32(a_odd, b_odd, a_odd, vol);
	  
	  mrblk_32(b_odd, a_odd, c,
		   Niter, 1.e-31, 1, vol, &Msw_plus_block_psi_32, blk);
	  
	  Block_H_psi_32(&block_list[blk], b_even, b_odd, EO);
	  assign_32(c, b_even, vol);
	  assign_mul_one_sw_pm_imu_inv_block_32(EE, b_even, c, g_mu, &block_list[blk]);
	}
	else {
	  assign_mul_one_pm_imu_inv_32(a_even, b_even, +1., vol);
	  Block_H_psi_32(&block_list[blk], a_odd, a_even, OE);
	  /* a_odd = b_odd - a_odd */
	  diff_32(a_odd, b_odd, a_odd, vol);
	  
	  mrblk_32(b_odd, a_odd, c, 
		   Niter, 1.e-31, 1, vol, &Mtm_plus_block_psi_32, blk);
	  
	  Block_H_psi_32(&block_list[blk], b_even, b_odd, EO);
	  mul_one_pm_imu_inv_32(b_even, +1., vol);
	}
	/* a_even = a_even - b_even */
	diff_32(a_even, a_even, b_even, vol);
	
	/* add even and odd part up to full spinor P */
	add_eo_block_32_to_global(P, a_even, b_odd, blk);
      }
    }
  }
  free(blk_e_list);
  free(blk_o_list);
  finalize_solver(solver_field, nr_sf);
  g_mu = musave;
  g_kappa = kappasave;
  boundary(g_kappa);
  return;
}
Example #19
0
/* P output = solution , Q input = source */
int cg_mms_tm(spinor * const P, spinor * const Q, const int max_iter, 
	      double eps_sq, const int rel_prec, const int N, matrix_mult f) {

  static double normsq, pro, err, alpha_cg = 1., beta_cg = 0., squarenorm;
  int iteration, im, append = 0;
  char filename[100];
  static double gamma, alpham1;
  int const cg_mms_default_precision = 32;
  double tmp_mu = g_mu;
  WRITER * writer = NULL;
  paramsInverterInfo *inverterInfo = NULL;
  paramsPropagatorFormat *propagatorFormat = NULL;
  spinor * temp_save; //used to save all the masses
  spinor ** solver_field = NULL;
  const int nr_sf = 5;

  init_solver_field(&solver_field, VOLUMEPLUSRAND, nr_sf);
  init_mms_tm(g_no_extra_masses);

  /* currently only implemented for P=0 */
  zero_spinor_field(P, N);
  /*  Value of the bare MMS-masses (\mu^2 - \mu_0^2) */
  for(im = 0; im < g_no_extra_masses; im++) {
    sigma[im] = g_extra_masses[im]*g_extra_masses[im] - g_mu*g_mu;
    assign(xs_mms_solver[im], P, N);
    assign(ps_mms_solver[im], Q, N);
    zitam1[im] = 1.0;
    zita[im] = 1.0;
    alphas[im] = 1.0;
    betas[im] = 0.0;
  }

  squarenorm = square_norm(Q, N, 1);
  assign(solver_field[0], P, N);
/*   normsp = square_norm(P, N, 1); */

  /* initialize residue r and search vector p */
/*   if(normsp == 0){ */
  /* currently only implemented for P=0 */
  if(1) {
    /* if a starting solution vector equal to zero is chosen */
    assign(solver_field[1], Q, N);
    assign(solver_field[2], Q, N);
    normsq = square_norm(Q, N, 1);
  }
  else{
    /* if a starting solution vector different from zero is chosen */
    f(solver_field[3], solver_field[0]);

    diff(solver_field[1], Q, solver_field[3], N);
    assign(solver_field[2], solver_field[1], N);
    normsq = square_norm(solver_field[2], N, 1);
  }

  /* main loop */
  for(iteration = 0; iteration < max_iter; iteration++) {

    /*   Q^2*p and then (p,Q^2*p)  */
    f(solver_field[4], solver_field[2]);
    pro = scalar_prod_r(solver_field[2], solver_field[4], N, 1);

    /* For the update of the coeff. of the shifted pol. we need alpha_cg(i-1) and alpha_cg(i).
       This is the reason why we need this double definition of alpha */
    alpham1 = alpha_cg;

    /* Compute alpha_cg(i+1) */
    alpha_cg = normsq/pro;
    for(im = 0; im < g_no_extra_masses; im++) {

      /* Now gamma is a temp variable that corresponds to zita(i+1) */ 
      gamma = zita[im]*alpham1/(alpha_cg*beta_cg*(1.-zita[im]/zitam1[im]) 
				+ alpham1*(1.+sigma[im]*alpha_cg));

      /* Now zita(i-1) is put equal to the old zita(i) */
      zitam1[im] = zita[im];
      /* Now zita(i+1) is updated */
      zita[im] = gamma;
      /* Update of alphas(i) = alpha_cg(i)*zita(i+1)/zita(i) */ 
      alphas[im] = alpha_cg*zita[im]/zitam1[im];
      /* Compute xs(i+1) = xs(i) + alphas(i)*ps(i) */
      assign_add_mul_r(xs_mms_solver[im], ps_mms_solver[im], alphas[im], N); 
    }

    /*  Compute x_(i+1) = x_i + alpha_cg(i+1) p_i    */
    assign_add_mul_r(solver_field[0], solver_field[2],  alpha_cg, N);
    /*  Compute r_(i+1) = r_i - alpha_cg(i+1) Qp_i   */
    assign_add_mul_r(solver_field[1], solver_field[4], -alpha_cg, N);

    /* Check whether the precision eps_sq is reached */

    err = square_norm(solver_field[1], N, 1);
    if(g_debug_level > 2 && g_proc_id == g_stdio_proc) {
      printf("CGMMS iteration: %d residue: %g\n", iteration, err); fflush( stdout );
    }

    if( ((err <= eps_sq) && (rel_prec == 0)) ||
      ((err <= eps_sq*squarenorm) && (rel_prec == 1)) ) {

      assign(P, solver_field[0], N);
      f(solver_field[2], P);
      diff(solver_field[3], solver_field[2], Q, N);
      err = square_norm(solver_field[3], N, 1);
      if(g_debug_level > 0 && g_proc_id == g_stdio_proc) {
        printf("# CG MMS true residue at final iteration (%d) was %g.\n", iteration, err); 
        fflush( stdout);
      }
      g_sloppy_precision = 0;
      g_mu = tmp_mu;

      /* save all the results of (Q^dagger Q)^(-1) \gamma_5 \phi */
      /* here ... */
      /* when im == -1 save the base mass*/
      for(im = -1; im < g_no_extra_masses; im++) {
        if(im==-1) {
          temp_save=solver_field[0];
        } else {
          temp_save=xs_mms_solver[im];
        }

        if(SourceInfo.type != 1) {
          if (PropInfo.splitted) {
            sprintf(filename, "%s.%.4d.%.2d.%.2d.cgmms.%.2d.inverted", SourceInfo.basename, SourceInfo.nstore, SourceInfo.t, SourceInfo.ix, im+1);
          } else {
            sprintf(filename, "%s.%.4d.%.2d.cgmms.%.2d.inverted", SourceInfo.basename, SourceInfo.nstore, SourceInfo.t, im+1);
          }
        }
        else {
          sprintf(filename, "%s.%.4d.%.5d.cgmms.%.2d.0", SourceInfo.basename, SourceInfo.nstore, SourceInfo.sample, im+1);
        }
        if(g_kappa != 0) {
          mul_r(temp_save, (2*g_kappa)*(2*g_kappa), temp_save, N);
        }

        append = !PropInfo.splitted;

        construct_writer(&writer, filename, append);

        if (PropInfo.splitted || SourceInfo.ix == index_start) {
          //Create the inverter info NOTE: always set to TWILSON=12 and 1 flavour (to be adjusted)
          inverterInfo = construct_paramsInverterInfo(err, iteration+1, 12, 1);
          if (im == -1) {
            inverterInfo->cgmms_mass = inverterInfo->mu;
          } else {
            inverterInfo->cgmms_mass = g_extra_masses[im]/(2 * inverterInfo->kappa);
          }
          write_spinor_info(writer, PropInfo.format, inverterInfo, append);
          //Create the propagatorFormat NOTE: always set to 1 flavour (to be adjusted)
          propagatorFormat = construct_paramsPropagatorFormat(cg_mms_default_precision, 1);
          write_propagator_format(writer, propagatorFormat);
          free(inverterInfo);
          free(propagatorFormat);
        }
        convert_lexic_to_eo(solver_field[2], solver_field[1], temp_save);
        write_spinor(writer, &solver_field[2], &solver_field[1], 1, 32);
        destruct_writer(writer);
      }
      finalize_solver(solver_field, nr_sf);
      return(iteration+1);
    }

    /* Compute beta_cg(i+1) = (r(i+1),r(i+1))/(r(i),r(i))
       Compute p(i+1) = r(i+1) + beta(i+1)*p(i)  */
    beta_cg = err/normsq;
    assign_mul_add_r(solver_field[2], beta_cg, solver_field[1], N);
    normsq = err;

    /* Compute betas(i+1) = beta_cg(i)*(zita(i+1)*alphas(i))/(zita(i)*alpha_cg(i))
       Compute ps(i+1) = zita(i+1)*r(i+1) + betas(i+1)*ps(i)  */
    for(im = 0; im < g_no_extra_masses; im++) {
      betas[im] = beta_cg*zita[im]*alphas[im]/(zitam1[im]*alpha_cg);
      assign_mul_add_mul_r(ps_mms_solver[im], solver_field[1], betas[im], zita[im], N);
    }
  }
  assign(P, solver_field[0], N);
  g_sloppy_precision = 0;
  finalize_solver(solver_field, nr_sf);
  return(-1);
}
Example #20
0
int gmres(spinor * const P,spinor * const Q, 
	  const int m, const int max_restarts,
	  const double eps_sq, const int rel_prec,
	  const int N, const int parallel, matrix_mult f){

  int restart, i, j, k;
  double beta, eps, norm;
  complex tmp1, tmp2;
  spinor ** solver_field = NULL;
  const int nr_sf = 3;

  if(N == VOLUME) {
    init_solver_field(&solver_field, VOLUMEPLUSRAND, nr_sf);
  }
  else {
    init_solver_field(&solver_field, VOLUMEPLUSRAND/2, nr_sf);
  }

  eps=sqrt(eps_sq);
  init_gmres(m, VOLUMEPLUSRAND);

  norm = sqrt(square_norm(Q, N, parallel));

  assign(solver_field[2], P, N);
  for(restart = 0; restart < max_restarts; restart++){
    /* r_0=Q-AP  (b=Q, x+0=P) */
    f(solver_field[0], solver_field[2]);
    diff(solver_field[0], Q, solver_field[0], N);

    /* v_0=r_0/||r_0|| */
    alpha[0].re=sqrt(square_norm(solver_field[0], N, parallel));

    if(g_proc_id == g_stdio_proc && g_debug_level > 1){
      printf("%d\t%g true residue\n", restart*m, alpha[0].re*alpha[0].re); 
      fflush(stdout);
    }

    if(alpha[0].re==0.){
      assign(P, solver_field[2], N);
      finalize_solver(solver_field, nr_sf);
      return(restart*m);
    }

    mul_r(V[0], 1./alpha[0].re, solver_field[0], N);

    for(j = 0; j < m; j++){
      /* solver_field[0]=A*v_j */

      f(solver_field[0], V[j]);

      /* Set h_ij and omega_j */
      /* solver_field[1] <- omega_j */
      assign(solver_field[1], solver_field[0], N);
      for(i = 0; i <= j; i++){
	H[i][j] = scalar_prod(V[i], solver_field[1], N, parallel);
	assign_diff_mul(solver_field[1], V[i], H[i][j], N);
      }

      _complex_set(H[j+1][j], sqrt(square_norm(solver_field[1], N, parallel)), 0.);
      for(i = 0; i < j; i++){
	tmp1 = H[i][j];
	tmp2 = H[i+1][j];
	_mult_real(H[i][j], tmp2, s[i]);
	_add_assign_complex_conj(H[i][j], c[i], tmp1);
	_mult_real(H[i+1][j], tmp1, s[i]);
	_diff_assign_complex(H[i+1][j], c[i], tmp2);
      }

      /* Set beta, s, c, alpha[j],[j+1] */
      beta = sqrt(_complex_square_norm(H[j][j]) + _complex_square_norm(H[j+1][j]));
      s[j] = H[j+1][j].re / beta;
      _mult_real(c[j], H[j][j], 1./beta);
      _complex_set(H[j][j], beta, 0.);
      _mult_real(alpha[j+1], alpha[j], s[j]);
      tmp1 = alpha[j];
      _mult_assign_complex_conj(alpha[j], c[j], tmp1);

      /* precision reached? */
      if(g_proc_id == g_stdio_proc && g_debug_level > 1){
	printf("%d\t%g residue\n", restart*m+j, alpha[j+1].re*alpha[j+1].re); 
	fflush(stdout);
      }
      if(((alpha[j+1].re <= eps) && (rel_prec == 0)) || ((alpha[j+1].re <= eps*norm) && (rel_prec == 1))){
	_mult_real(alpha[j], alpha[j], 1./H[j][j].re);
	assign_add_mul(solver_field[2], V[j], alpha[j], N);
	for(i = j-1; i >= 0; i--){
	  for(k = i+1; k <= j; k++){
 	    _mult_assign_complex(tmp1, H[i][k], alpha[k]); 
	    _diff_complex(alpha[i], tmp1);
	  }
	  _mult_real(alpha[i], alpha[i], 1./H[i][i].re);
	  assign_add_mul(solver_field[2], V[i], alpha[i], N);
	}
	for(i = 0; i < m; i++){
	  alpha[i].im = 0.;
	}
	assign(P, solver_field[2], N);
	finalize_solver(solver_field, nr_sf);
	return(restart*m+j);
      }
      /* if not */
      else{
	if(j != m-1){
	  mul_r(V[(j+1)], 1./H[j+1][j].re, solver_field[1], N);
	}
      }

    }
    j=m-1;
    /* prepare for restart */
    _mult_real(alpha[j], alpha[j], 1./H[j][j].re);
    assign_add_mul(solver_field[2], V[j], alpha[j], N);
    for(i = j-1; i >= 0; i--){
      for(k = i+1; k <= j; k++){
	_mult_assign_complex(tmp1, H[i][k], alpha[k]);
	_diff_complex(alpha[i], tmp1);
      }
      _mult_real(alpha[i], alpha[i], 1./H[i][i].re);
      assign_add_mul(solver_field[2], V[i], alpha[i], N);
    }
    for(i = 0; i < m; i++){
      alpha[i].im = 0.;
    }
  }

  /* If maximal number of restarts is reached */
  assign(P, solver_field[2], N);
  finalize_solver(solver_field, nr_sf);
  return(-1);
}