C++ (Cpp) mul_r Examples

Programming Language: C++ (Cpp)

Method/Function: mul_r

Examples at hotexamples.com: 15

C++ (Cpp) mul_r - 15 examples found. These are the top rated real world C++ (Cpp) examples of mul_r extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: chrono_guess.c Project: VincentDrach/tmLQCD

void chrono_add_solution(spinor * const trial, spinor ** const v, int index_array[],
			const int N, int * _n, const int V) {

  double norm = 0.;
  int i;

  if(N > 0) {
    if(g_proc_id == 0 && g_debug_level > 1) {
      printf("CSG: adding vector %d to the list of length %d\n", (*_n)+1, N);
      fflush(stdout);
    }
    if((*_n) < N) {
      index_array[(*_n)] = *_n;
      (*_n)= (*_n)+1;
      /* normalise vector */
      norm = sqrt(square_norm(trial, V, 1));
      mul_r(v[index_array[(*_n)-1]], 1/norm, trial, V);
    }
    else {
      /* Reorder the index_array */
      /* Keep most recent first  */
      for(i = 1; i < N; i++) {
	index_array[i-1] = index_array[i];
      }
      index_array[N-1] = (index_array[N-2]+1)%N;
      /* and normalise */
      norm = sqrt(square_norm(trial, V, 1));
      mul_r(v[index_array[N-1]], 1/norm, trial, V);
    }
  }

  return;
}

Example #2

Show file

File: ndratcor_monomial.c Project: Finkenrath/tmLQCD

// applies ((Q_h\tau_1 * R)^2 - 1)
void apply_Z_ndpsi(spinor * const k_up, spinor * const k_dn,
		     spinor * const l_up, spinor * const l_dn,
		     const int id, hamiltonian_field_t * const hf,
		     solver_params_t * solver_params) {
  monomial * mnl = &monomial_list[id];

  mnl->iter0 += solve_mms_nd(g_chi_up_spinor_field, g_chi_dn_spinor_field,
			                       l_up, l_dn, solver_params);  
  
  // apply R to the pseudo-fermion fields
  assign(k_up, l_up, VOLUME/2);
  assign(k_dn, l_dn, VOLUME/2);
  for(int j = (mnl->rat.np-1); j > -1; j--) {
    assign_add_mul_r(k_up, g_chi_up_spinor_field[j], 
		     mnl->rat.rmu[j], VOLUME/2);
    assign_add_mul_r(k_dn, g_chi_dn_spinor_field[j], 
		     mnl->rat.rmu[j], VOLUME/2);
  }

  // apply R a second time
  mnl->iter0 += solve_mms_nd(g_chi_up_spinor_field, g_chi_dn_spinor_field,
	       k_up, k_dn,
	       solver_params);
  for(int j = (mnl->rat.np-1); j > -1; j--) {
    assign_add_mul_r(k_up, g_chi_up_spinor_field[j], 
		     mnl->rat.rmu[j], VOLUME/2);
    assign_add_mul_r(k_dn, g_chi_dn_spinor_field[j], 
		     mnl->rat.rmu[j], VOLUME/2);
  }
  mul_r(g_chi_up_spinor_field[mnl->rat.np], mnl->rat.A*mnl->rat.A, 
	k_up, VOLUME/2);
  mul_r(g_chi_dn_spinor_field[mnl->rat.np], mnl->rat.A*mnl->rat.A, 
	k_dn, VOLUME/2);
  // apply Q^2 and compute the residue
  solver_params->M_ndpsi(k_up, k_dn,
		     g_chi_up_spinor_field[mnl->rat.np], g_chi_dn_spinor_field[mnl->rat.np]);
  diff(k_up, k_up, l_up, VOLUME/2);
  diff(k_dn, k_dn, l_dn, VOLUME/2);
  
}

Example #3

Show file

File: Dov_psi.c Project: palao/tmLQCD

/* |R>=rnorm^2 Q^2 |S> */
void norm_Q_sqr_psi(spinor * const R, spinor * const S, 
		    const double rnorm) { 

  spinor *aux;
  aux=lock_Dov_WS_spinor(1);

  /* Term -1-s is done in D_psi! does this comment make sense for HMC? */
  /* no, it doesn't, we do have to work on this */
  /* here we need to set kappa = 1./(2 (-1-s) + 8) */
  D_psi(R, S);
  gamma5(aux, R, VOLUME);
  D_psi(R, aux);
  gamma5(R, R, VOLUME);
  mul_r(R, rnorm*rnorm, R, VOLUME);

  unlock_Dov_WS_spinor(1);
  return;
}

Example #4

Show file

File: Dov_psi.c Project: palao/tmLQCD

/* |R>=rnorm^n Q^n |S>                                                     */
void norm_Q_n_psi(spinor * const R, spinor * const S, 
		  const int n, const double rnorm) { 

  int i;
  double npar = 1.;
  spinor *aux;

  aux=lock_Dov_WS_spinor(1);

  assign(aux, S, VOLUME);
  
  
  for(i=0; i < n; i++){
    D_psi(R, aux);
    /* Term -1-s is done in D_psi! does this comment make sense for HMC? */
    gamma5(aux, R, VOLUME);
    npar *= rnorm;
  }
  mul_r(R, npar, aux, VOLUME);
  unlock_Dov_WS_spinor(1);
  return;
}

Example #5

Show file

File: cg_mms_tm.c Project: palao/tmLQCD

/* P output = solution , Q input = source */
int cg_mms_tm(spinor * const P, spinor * const Q, const int max_iter, 
	      double eps_sq, const int rel_prec, const int N, matrix_mult f) {

  static double normsq, pro, err, alpha_cg = 1., beta_cg = 0., squarenorm;
  int iteration, im, append = 0;
  char filename[100];
  static double gamma, alpham1;
  int const cg_mms_default_precision = 32;
  double tmp_mu = g_mu;
  WRITER * writer = NULL;
  paramsInverterInfo *inverterInfo = NULL;
  paramsPropagatorFormat *propagatorFormat = NULL;
  spinor * temp_save; //used to save all the masses
  spinor ** solver_field = NULL;
  const int nr_sf = 5;

  init_solver_field(&solver_field, VOLUMEPLUSRAND, nr_sf);
  init_mms_tm(g_no_extra_masses);

  /* currently only implemented for P=0 */
  zero_spinor_field(P, N);
  /*  Value of the bare MMS-masses (\mu^2 - \mu_0^2) */
  for(im = 0; im < g_no_extra_masses; im++) {
    sigma[im] = g_extra_masses[im]*g_extra_masses[im] - g_mu*g_mu;
    assign(xs_mms_solver[im], P, N);
    assign(ps_mms_solver[im], Q, N);
    zitam1[im] = 1.0;
    zita[im] = 1.0;
    alphas[im] = 1.0;
    betas[im] = 0.0;
  }

  squarenorm = square_norm(Q, N, 1);
  assign(solver_field[0], P, N);
/*   normsp = square_norm(P, N, 1); */

  /* initialize residue r and search vector p */
/*   if(normsp == 0){ */
  /* currently only implemented for P=0 */
  if(1) {
    /* if a starting solution vector equal to zero is chosen */
    assign(solver_field[1], Q, N);
    assign(solver_field[2], Q, N);
    normsq = square_norm(Q, N, 1);
  }
  else{
    /* if a starting solution vector different from zero is chosen */
    f(solver_field[3], solver_field[0]);

    diff(solver_field[1], Q, solver_field[3], N);
    assign(solver_field[2], solver_field[1], N);
    normsq = square_norm(solver_field[2], N, 1);
  }

  /* main loop */
  for(iteration = 0; iteration < max_iter; iteration++) {

    /*   Q^2*p and then (p,Q^2*p)  */
    f(solver_field[4], solver_field[2]);
    pro = scalar_prod_r(solver_field[2], solver_field[4], N, 1);

    /* For the update of the coeff. of the shifted pol. we need alpha_cg(i-1) and alpha_cg(i).
       This is the reason why we need this double definition of alpha */
    alpham1 = alpha_cg;

    /* Compute alpha_cg(i+1) */
    alpha_cg = normsq/pro;
    for(im = 0; im < g_no_extra_masses; im++) {

      /* Now gamma is a temp variable that corresponds to zita(i+1) */ 
      gamma = zita[im]*alpham1/(alpha_cg*beta_cg*(1.-zita[im]/zitam1[im]) 
				+ alpham1*(1.+sigma[im]*alpha_cg));

      /* Now zita(i-1) is put equal to the old zita(i) */
      zitam1[im] = zita[im];
      /* Now zita(i+1) is updated */
      zita[im] = gamma;
      /* Update of alphas(i) = alpha_cg(i)*zita(i+1)/zita(i) */ 
      alphas[im] = alpha_cg*zita[im]/zitam1[im];
      /* Compute xs(i+1) = xs(i) + alphas(i)*ps(i) */
      assign_add_mul_r(xs_mms_solver[im], ps_mms_solver[im], alphas[im], N); 
    }

    /*  Compute x_(i+1) = x_i + alpha_cg(i+1) p_i    */
    assign_add_mul_r(solver_field[0], solver_field[2],  alpha_cg, N);
    /*  Compute r_(i+1) = r_i - alpha_cg(i+1) Qp_i   */
    assign_add_mul_r(solver_field[1], solver_field[4], -alpha_cg, N);

    /* Check whether the precision eps_sq is reached */

    err = square_norm(solver_field[1], N, 1);
    if(g_debug_level > 2 && g_proc_id == g_stdio_proc) {
      printf("CGMMS iteration: %d residue: %g\n", iteration, err); fflush( stdout );
    }

    if( ((err <= eps_sq) && (rel_prec == 0)) ||
      ((err <= eps_sq*squarenorm) && (rel_prec == 1)) ) {

      assign(P, solver_field[0], N);
      f(solver_field[2], P);
      diff(solver_field[3], solver_field[2], Q, N);
      err = square_norm(solver_field[3], N, 1);
      if(g_debug_level > 0 && g_proc_id == g_stdio_proc) {
        printf("# CG MMS true residue at final iteration (%d) was %g.\n", iteration, err); 
        fflush( stdout);
      }
      g_sloppy_precision = 0;
      g_mu = tmp_mu;

      /* save all the results of (Q^dagger Q)^(-1) \gamma_5 \phi */
      /* here ... */
      /* when im == -1 save the base mass*/
      for(im = -1; im < g_no_extra_masses; im++) {
        if(im==-1) {
          temp_save=solver_field[0];
        } else {
          temp_save=xs_mms_solver[im];
        }

        if(SourceInfo.type != 1) {
          if (PropInfo.splitted) {
            sprintf(filename, "%s.%.4d.%.2d.%.2d.cgmms.%.2d.inverted", SourceInfo.basename, SourceInfo.nstore, SourceInfo.t, SourceInfo.ix, im+1);
          } else {
            sprintf(filename, "%s.%.4d.%.2d.cgmms.%.2d.inverted", SourceInfo.basename, SourceInfo.nstore, SourceInfo.t, im+1);
          }
        }
        else {
          sprintf(filename, "%s.%.4d.%.5d.cgmms.%.2d.0", SourceInfo.basename, SourceInfo.nstore, SourceInfo.sample, im+1);
        }
        if(g_kappa != 0) {
          mul_r(temp_save, (2*g_kappa)*(2*g_kappa), temp_save, N);
        }

        append = !PropInfo.splitted;

        construct_writer(&writer, filename, append);

        if (PropInfo.splitted || SourceInfo.ix == index_start) {
          //Create the inverter info NOTE: always set to TWILSON=12 and 1 flavour (to be adjusted)
          inverterInfo = construct_paramsInverterInfo(err, iteration+1, 12, 1);
          if (im == -1) {
            inverterInfo->cgmms_mass = inverterInfo->mu;
          } else {
            inverterInfo->cgmms_mass = g_extra_masses[im]/(2 * inverterInfo->kappa);
          }
          write_spinor_info(writer, PropInfo.format, inverterInfo, append);
          //Create the propagatorFormat NOTE: always set to 1 flavour (to be adjusted)
          propagatorFormat = construct_paramsPropagatorFormat(cg_mms_default_precision, 1);
          write_propagator_format(writer, propagatorFormat);
          free(inverterInfo);
          free(propagatorFormat);
        }
        convert_lexic_to_eo(solver_field[2], solver_field[1], temp_save);
        write_spinor(writer, &solver_field[2], &solver_field[1], 1, 32);
        destruct_writer(writer);
      }
      finalize_solver(solver_field, nr_sf);
      return(iteration+1);
    }

    /* Compute beta_cg(i+1) = (r(i+1),r(i+1))/(r(i),r(i))
       Compute p(i+1) = r(i+1) + beta(i+1)*p(i)  */
    beta_cg = err/normsq;
    assign_mul_add_r(solver_field[2], beta_cg, solver_field[1], N);
    normsq = err;

    /* Compute betas(i+1) = beta_cg(i)*(zita(i+1)*alphas(i))/(zita(i)*alpha_cg(i))
       Compute ps(i+1) = zita(i+1)*r(i+1) + betas(i+1)*ps(i)  */
    for(im = 0; im < g_no_extra_masses; im++) {
      betas[im] = beta_cg*zita[im]*alphas[im]/(zitam1[im]*alpha_cg);
      assign_mul_add_mul_r(ps_mms_solver[im], solver_field[1], betas[im], zita[im], N);
    }
  }
  assign(P, solver_field[0], N);
  g_sloppy_precision = 0;
  finalize_solver(solver_field, nr_sf);
  return(-1);
}

Example #6

Show file

File: operator.c Project: chaotika86/tmLQCD

void op_invert(const int op_id, const int index_start, const int write_prop) {
  operator * optr = &operator_list[op_id];
  double atime = 0., etime = 0., nrm1 = 0., nrm2 = 0.;
  int i;
  optr->iterations = 0;
  optr->reached_prec = -1.;
  g_kappa = optr->kappa;
  boundary(g_kappa);

  atime = gettime();
  if(optr->type == TMWILSON || optr->type == WILSON || optr->type == CLOVER) {
    g_mu = optr->mu;
    g_c_sw = optr->c_sw;
    if(optr->type == CLOVER) {
      if (g_cart_id == 0 && g_debug_level > 1) {
	printf("#\n# csw = %e, computing clover leafs\n", g_c_sw);
      }
      init_sw_fields(VOLUME);
      sw_term( (const su3**) g_gauge_field, optr->kappa, optr->c_sw); 
      /* this must be EE here!   */
      /* to match clover_inv in Qsw_psi */
      sw_invert(EE, optr->mu);
    }

    for(i = 0; i < 2; i++) {
      if (g_cart_id == 0) {
        printf("#\n# 2 kappa mu = %e, kappa = %e, c_sw = %e\n", g_mu, g_kappa, g_c_sw);
      }
      if(optr->type != CLOVER) {
	if(use_preconditioning){
	  g_precWS=(void*)optr->precWS;
	}
	else {
	  g_precWS=NULL;
	}
	
	optr->iterations = invert_eo( optr->prop0, optr->prop1, optr->sr0, optr->sr1,
				      optr->eps_sq, optr->maxiter,
				      optr->solver, optr->rel_prec,
				      0, optr->even_odd_flag,optr->no_extra_masses, optr->extra_masses, optr->id );
	
	/* check result */
	M_full(g_spinor_field[4], g_spinor_field[5], optr->prop0, optr->prop1);
      }
      else {
	optr->iterations = invert_clover_eo(optr->prop0, optr->prop1, optr->sr0, optr->sr1,
					    optr->eps_sq, optr->maxiter,
					    optr->solver, optr->rel_prec,
					    &g_gauge_field, &Qsw_pm_psi, &Qsw_minus_psi);
	/* check result */
 	Msw_full(g_spinor_field[4], g_spinor_field[5], optr->prop0, optr->prop1);
      }

      diff(g_spinor_field[4], g_spinor_field[4], optr->sr0, VOLUME / 2);
      diff(g_spinor_field[5], g_spinor_field[5], optr->sr1, VOLUME / 2);

      nrm1 = square_norm(g_spinor_field[4], VOLUME / 2, 1);
      nrm2 = square_norm(g_spinor_field[5], VOLUME / 2, 1);
      optr->reached_prec = nrm1 + nrm2;

      /* convert to standard normalisation  */
      /* we have to mult. by 2*kappa        */
      if (optr->kappa != 0.) {
        mul_r(optr->prop0, (2*optr->kappa), optr->prop0, VOLUME / 2);
        mul_r(optr->prop1, (2*optr->kappa), optr->prop1, VOLUME / 2);
      }
      if (optr->solver != CGMMS && write_prop) /* CGMMS handles its own I/O */
        optr->write_prop(op_id, index_start, i);
      if(optr->DownProp) {
        optr->mu = -optr->mu;
      } else 
        break;
    }
  }
  else if(optr->type == DBTMWILSON || optr->type == DBCLOVER) {
    g_mubar = optr->mubar;
    g_epsbar = optr->epsbar;
    g_c_sw = 0.;
    if(optr->type == DBCLOVER) {
      g_c_sw = optr->c_sw;
      if (g_cart_id == 0 && g_debug_level > 1) {
	printf("#\n# csw = %e, computing clover leafs\n", g_c_sw);
      }
      init_sw_fields(VOLUME);
      sw_term( (const su3**) g_gauge_field, optr->kappa, optr->c_sw); 
      sw_invert_nd(optr->mubar*optr->mubar-optr->epsbar*optr->epsbar);
    }

    for(i = 0; i < SourceInfo.no_flavours; i++) {
      if(optr->type != DBCLOVER) {
	optr->iterations = invert_doublet_eo( optr->prop0, optr->prop1, optr->prop2, optr->prop3, 
					      optr->sr0, optr->sr1, optr->sr2, optr->sr3,
					      optr->eps_sq, optr->maxiter,
					      optr->solver, optr->rel_prec);
      }
      else {
	optr->iterations = invert_cloverdoublet_eo( optr->prop0, optr->prop1, optr->prop2, optr->prop3, 
						    optr->sr0, optr->sr1, optr->sr2, optr->sr3,
						    optr->eps_sq, optr->maxiter,
						    optr->solver, optr->rel_prec);
      }
      g_mu = optr->mubar;
      if(optr->type != DBCLOVER) {
	M_full(g_spinor_field[DUM_DERI+1], g_spinor_field[DUM_DERI+2], optr->prop0, optr->prop1); 
      }
      else {
	Msw_full(g_spinor_field[DUM_DERI+1], g_spinor_field[DUM_DERI+2], optr->prop0, optr->prop1); 
      }
      assign_add_mul_r(g_spinor_field[DUM_DERI+1], optr->prop2, -optr->epsbar, VOLUME/2);
      assign_add_mul_r(g_spinor_field[DUM_DERI+2], optr->prop3, -optr->epsbar, VOLUME/2);

      g_mu = -g_mu;
      if(optr->type != DBCLOVER) {
	M_full(g_spinor_field[DUM_DERI+3], g_spinor_field[DUM_DERI+4], optr->prop2, optr->prop3); 
      }
      else {
	Msw_full(g_spinor_field[DUM_DERI+3], g_spinor_field[DUM_DERI+4], optr->prop2, optr->prop3);
      }
      assign_add_mul_r(g_spinor_field[DUM_DERI+3], optr->prop0, -optr->epsbar, VOLUME/2);
      assign_add_mul_r(g_spinor_field[DUM_DERI+4], optr->prop1, -optr->epsbar, VOLUME/2);

      diff(g_spinor_field[DUM_DERI+1], g_spinor_field[DUM_DERI+1], optr->sr0, VOLUME/2); 
      diff(g_spinor_field[DUM_DERI+2], g_spinor_field[DUM_DERI+2], optr->sr1, VOLUME/2); 
      diff(g_spinor_field[DUM_DERI+3], g_spinor_field[DUM_DERI+3], optr->sr2, VOLUME/2); 
      diff(g_spinor_field[DUM_DERI+4], g_spinor_field[DUM_DERI+4], optr->sr3, VOLUME/2); 

      nrm1  = square_norm(g_spinor_field[DUM_DERI+1], VOLUME/2, 1); 
      nrm1 += square_norm(g_spinor_field[DUM_DERI+2], VOLUME/2, 1); 
      nrm1 += square_norm(g_spinor_field[DUM_DERI+3], VOLUME/2, 1); 
      nrm1 += square_norm(g_spinor_field[DUM_DERI+4], VOLUME/2, 1); 
      optr->reached_prec = nrm1;
      g_mu = g_mu1;
      /* For standard normalisation */
      /* we have to mult. by 2*kappa */
      mul_r(g_spinor_field[DUM_DERI], (2*optr->kappa), optr->prop0, VOLUME/2);
      mul_r(g_spinor_field[DUM_DERI+1], (2*optr->kappa), optr->prop1, VOLUME/2);
      mul_r(g_spinor_field[DUM_DERI+2], (2*optr->kappa), optr->prop2, VOLUME/2);
      mul_r(g_spinor_field[DUM_DERI+3], (2*optr->kappa), optr->prop3, VOLUME/2);
      /* the final result should be stored in the convention used in */
      /* hep-lat/0606011                                             */
      /* this requires multiplication of source with                 */
      /* (1+itau_2)/sqrt(2) and the result with (1-itau_2)/sqrt(2)   */

      mul_one_pm_itau2(optr->prop0, optr->prop2, g_spinor_field[DUM_DERI], 
                       g_spinor_field[DUM_DERI+2], -1., VOLUME/2);
      mul_one_pm_itau2(optr->prop1, optr->prop3, g_spinor_field[DUM_DERI+1], 
                       g_spinor_field[DUM_DERI+3], -1., VOLUME/2);
      /* write propagator */
      if(write_prop) optr->write_prop(op_id, index_start, i);

      mul_r(optr->prop0, 1./(2*optr->kappa), g_spinor_field[DUM_DERI], VOLUME/2);
      mul_r(optr->prop1, 1./(2*optr->kappa), g_spinor_field[DUM_DERI+1], VOLUME/2);
      mul_r(optr->prop2, 1./(2*optr->kappa), g_spinor_field[DUM_DERI+2], VOLUME/2);
      mul_r(optr->prop3, 1./(2*optr->kappa), g_spinor_field[DUM_DERI+3], VOLUME/2);

      /* mirror source, but not for volume sources */
      if(i == 0 && SourceInfo.no_flavours == 2 && SourceInfo.type != 1) {
        if (g_cart_id == 0) {
          fprintf(stdout, "# Inversion done in %d iterations, squared residue = %e!\n",
                  optr->iterations, optr->reached_prec);
        }
        mul_one_pm_itau2(g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+2], optr->sr0, optr->sr2, -1., VOLUME/2);
        mul_one_pm_itau2(g_spinor_field[DUM_DERI+1], g_spinor_field[DUM_DERI+3], optr->sr1, optr->sr3, -1., VOLUME/2);

        mul_one_pm_itau2(optr->sr0, optr->sr2, g_spinor_field[DUM_DERI+2], g_spinor_field[DUM_DERI], +1., VOLUME/2);
        mul_one_pm_itau2(optr->sr1, optr->sr3, g_spinor_field[DUM_DERI+3], g_spinor_field[DUM_DERI+1], +1., VOLUME/2);

      }
      /* volume sources need only one inversion */
      else if(SourceInfo.type == 1) i++;
    }
  }
  else if(optr->type == OVERLAP) {
    g_mu = 0.;
    m_ov=optr->m;
    eigenvalues(&optr->no_ev, 5000, optr->ev_prec, 0, optr->ev_readwrite, nstore, optr->even_odd_flag);
/*     ov_check_locality(); */
/*      index_jd(&optr->no_ev_index, 5000, 1.e-12, optr->conf_input, nstore, 4); */
    ov_n_cheby=optr->deg_poly;

    if(use_preconditioning==1)
      g_precWS=(void*)optr->precWS;
    else
      g_precWS=NULL;


    if(g_debug_level > 3) ov_check_ginsparg_wilson_relation_strong(); 

    invert_overlap(op_id, index_start); 

    if(write_prop) optr->write_prop(op_id, index_start, 0);
  }
  etime = gettime();
  if (g_cart_id == 0 && g_debug_level > 0) {
    fprintf(stdout, "# Inversion done in %d iterations, squared residue = %e!\n",
            optr->iterations, optr->reached_prec);
    fprintf(stdout, "# Inversion done in %1.2e sec. \n", etime - atime);
  }
  return;
}

Example #7

Show file

File: arpack_cg.c Project: marcuspetschlies/tmLQCD_main

int arpack_cg(
  /* solver params */
  const int N,                   /* (IN) Number of lattice sites for this process*/
  solver_params_t solver_params, /* (IN) parameters for solver */
  spinor * const x,              /* (IN/OUT) initial guess on input, solution on output for this RHS*/
  spinor * const b,              /* (IN) right-hand side*/
  matrix_mult f,                 /* (IN) f(s,r) computes s=A*r, i.e. matrix-vector multiply in double precision */
  matrix_mult f32,               /* (IN) f(s,r) computes s=A*r, i.e. matrix-vector multiply in single precision */
  const double eps_sq,           /* (IN) squared tolerance of convergence of the linear system for systems nrhs1+1 till nrhs*/
  const int rel_prec,            /* (IN) 0 for using absoute error for convergence
                                         1 for using relative error for convergence*/
  const int maxit,               /* (IN) Maximum allowed number of iterations to solution for the linear system*/
  matrix_mult f_final,           /* (IN) final operator application during projection of type 1 */
  matrix_mult f_initial          /* (IN) initial operator application during projection of type 1 */
) {

  /* Static variables and arrays. */
  static int ncurRHS=0;                  /* current number of the system being solved */                   
  static void *_ax,*_r,*_tmps1,*_tmps2;                  
  static spinor *ax,*r,*tmps1,*tmps2;                  
  static _Complex double *evecs,*evals,*H,*HU,*Hinv,*initwork,*tmpv1;
  static _Complex double *zheev_work;
  static double *hevals,*zheev_rwork;
  static int *IPIV; 
  static int info_arpack=0;
  static int nconv=0; /* number of converged eigenvectors as returned by arpack */
  int i,j,tmpsize;
  char cV='V',cN='N', cU='U';   
  int ONE=1;
  int zheev_lwork,zheev_info;
  _Complex double c1, c2, c3, tpone=1.0,tzero=0.0;
  double d1,d2,d3;
  double et1,et2;  /* timing variables */
  char evecs_filename[500];
  char howmny = 'P';
  FILE *evecs_fs=NULL;
  size_t evecs_count;
  WRITER *evecs_writer=NULL;
  spinor *evecs_ptr0 = NULL, *evecs_ptr1 = NULL;
  paramsPropagatorFormat *evecs_propagatorFormat = NULL;
  void *evecs_io_buffer = NULL;

  int parallel;        /* for parallel processing of the scalar products */
#ifdef TM_USE_MPI
    parallel=1;
#else
    parallel=0;
#endif

  /* leading dimension for spinor vectors */
  int LDN;
  if(N==VOLUME)
     LDN = VOLUMEPLUSRAND;
  else
     LDN = VOLUMEPLUSRAND/2; 

  /*(IN) Number of right-hand sides to be solved*/ 
  const int nrhs =   solver_params.arpackcg_nrhs; 
  /*(IN) First number of right-hand sides to be solved using tolerance eps_sq1*/ 
  const int nrhs1 =   solver_params.arpackcg_nrhs1;
  /*(IN) squared tolerance of convergence of the linear system for systems 1 till nrhs1*/
  const double eps_sq1 = solver_params.arpackcg_eps_sq1;
  /*(IN) suqared tolerance for restarting cg */
  const double res_eps_sq =   solver_params.arpackcg_res_eps_sq;

  /* parameters for arpack */

  /*(IN) number of eigenvectors to be computed by arpack*/
  const int nev = solver_params.arpackcg_nev;
   /*(IN) size of the subspace used by arpack with the condition (nev+1) =< ncv*/
  const int ncv = solver_params.arpackcg_ncv;
  /*(IN) tolerance for computing eigenvalues with arpack */
  double arpack_eig_tol =   solver_params.arpackcg_eig_tol;
  /*(IN) maximum number of iterations to be used by arpack*/
  int arpack_eig_maxiter =   solver_params.arpackcg_eig_maxiter;
  /*(IN) 0 for eigenvalues with smallest real part "SR"
         1 for eigenvalues with largest real part "LR"
         2 for eigenvalues with smallest absolute value "SM"
         3 for eigenvalues with largest absolute value "LM"
         4 for eigenvalues with smallest imaginary part "SI"
         5 for eigenvalues with largest imaginary part  "LI"*/
  int kind =   solver_params.arpackcg_evals_kind;
  /*(IN) 0 don't compute the eiegnvalues and their residuals of the original system 
         1 compute the eigenvalues and the residuals for the original system (the orthonormal basis
           still be used in deflation and they are not overwritten).*/
  int comp_evecs =   solver_params.arpackcg_comp_evecs;
  /*(IN) 0 no polynomial acceleration; 1 use polynomial acceleration*/
  int acc =   solver_params.use_acc;
  /*(IN) degree of the Chebyshev polynomial (irrelevant if acc=0)*/
  int cheb_k = solver_params.cheb_k;
  /*(IN) lower end of the interval where the acceleration will be used (irrelevant if acc=0)*/
  double emin = solver_params.op_evmin;
  /*(IN) upper end of the interval where the acceleration will be used (irrelevant if acc=0)*/
  double emax = solver_params.op_evmax;
  /*(IN) file name to be used for printing out debugging information from arpack*/
  char *arpack_logfile = solver_params.arpack_logfile;
  /*(IN) read eigenvectors in Schur basis from file */
  int  arpack_read_ev = solver_params.arpackcg_read_ev;
  /*(IN) write eigenvectors in Schur basis to file */
  int  arpack_write_ev = solver_params.arpackcg_write_ev;
  /*(IN) file name to be used for reading and writing evecs from and to disc */
  char *arpack_evecs_filename = solver_params.arpack_evecs_filename;
   /*(IN) precision used for writing eigenvectors */
  int arpack_evecs_writeprec = solver_params.arpack_evecs_writeprec;
  /* how to project with approximate eigenvectors */
  int projection_type = solver_params.projection_type;
  /* file format for evecs used by arpack */
  char *arpack_evecs_fileformat = solver_params.arpack_evecs_fileformat; 

  /*-------------------------------------------------------------
    if this is the first right hand side, allocate memory, 
    call arpack, and compute resiudals of eigenvectors if needed
    -------------------------------------------------------------*/ 
  if(ncurRHS==0){ 
#if (defined SSE || defined SSE2 || defined SSE3)
    _ax = malloc((LDN+ALIGN_BASE)*sizeof(spinor));
    if(_ax==NULL)
    {
       if(g_proc_id == g_stdio_proc)
          fprintf(stderr,"[arpack_cg] insufficient memory for _ax inside arpack_cg.\n");
       exit(1);
    }
    else
       {ax  = (spinor *) ( ((unsigned long int)(_ax)+ALIGN_BASE)&~ALIGN_BASE);}

    _r = malloc((LDN+ALIGN_BASE)*sizeof(spinor));
    if(_r==NULL)
    {
       if(g_proc_id == g_stdio_proc)
          fprintf(stderr,"[arpack_cg] insufficient memory for _r inside arpack_cg.\n");
       exit(1);
    }
    else
       {r  = (spinor *) ( ((unsigned long int)(_r)+ALIGN_BASE)&~ALIGN_BASE);}

    _tmps1 = malloc((LDN+ALIGN_BASE)*sizeof(spinor));
    if(_tmps1==NULL)
    {
       if(g_proc_id == g_stdio_proc)
          fprintf(stderr,"[arpack_cg] insufficient memory for _tmps1 inside arpack_cg.\n");
       exit(1);
    }
    else
       {tmps1  = (spinor *) ( ((unsigned long int)(_tmps1)+ALIGN_BASE)&~ALIGN_BASE);}

    _tmps2 = malloc((LDN+ALIGN_BASE)*sizeof(spinor));
    if(_tmps2==NULL)
    {
       if(g_proc_id == g_stdio_proc)
          fprintf(stderr,"[arpack_cg] insufficient memory for _tmps2 inside arpack_cg.\n");
       exit(1);
    }
    else
       {tmps2  = (spinor *) ( ((unsigned long int)(_tmps2)+ALIGN_BASE)&~ALIGN_BASE);}

#else
    ax = (spinor *) malloc(LDN*sizeof(spinor));
    r  = (spinor *) malloc(LDN*sizeof(spinor));
    tmps1 = (spinor *) malloc(LDN*sizeof(spinor));
    tmps2 = (spinor *) malloc(LDN*sizeof(spinor));
    
    if( (ax == NULL)  || (r==NULL) || (tmps1==NULL) || (tmps2==NULL) )
    {
       if(g_proc_id == g_stdio_proc)
          fprintf(stderr,"[arpack_cg] insufficient memory for ax,r,tmps1,tmps2 inside arpack_cg.\n");
       exit(1);
    }
#endif


    evecs = (_Complex double *) malloc(ncv*12*N*sizeof(_Complex double)); /* note: no extra buffer  */
    evals = (_Complex double *) malloc(ncv*sizeof(_Complex double)); 
    tmpv1 = (_Complex double *) malloc(12*N*sizeof(_Complex double));

    if((evecs == NULL)  || (evals==NULL) || (tmpv1==NULL))
    {
       if(g_proc_id == g_stdio_proc)
          fprintf(stderr,"[arpack_cg] insufficient memory for evecs and evals inside arpack_cg.\n");
       exit(1);
    }

    if ( arpack_read_ev == 1) {

      if (strcmp(arpack_evecs_fileformat, "partfile") == 0) {
        /* set evec filenmae */
        sprintf(evecs_filename, "%s.%.5d.pt%.2dpx%.2dpy%.2dpz%.2d", arpack_evecs_filename, nev, g_proc_coords[0], g_proc_coords[1], g_proc_coords[2], g_proc_coords[3]);
        evecs_fs = fopen(evecs_filename, "r");
        if (evecs_fs == NULL) {
          fprintf(stderr, "[arpack_cg] (%.4d) Error, could not open file %s for reading\n", g_cart_id, evecs_filename);
          return(-2);
        }
        fprintf(stdout, "# [arpack_cg] reading eigenvectors from file %s\n", evecs_filename);

        if(arpack_evecs_writeprec == 64) {
 
          evecs_io_buffer = (void*)evecs;
   
          et1=gettime();
          evecs_count = fread( evecs_io_buffer, sizeof(_Complex double), (size_t)nev*12*N, evecs_fs);
          et2=gettime();
        
        } else {
          evecs_io_buffer = malloc(sizeof(_Complex double) * (size_t)nev*12*N );
          if( evecs_io_buffer == NULL) {
            fprintf(stderr, "[arpack_cg] (%.4d) Error, could not allocate memory for evecs_io_buffer\n", g_cart_id);
            return(-42);
          }
  
          et1=gettime();
          evecs_count = fread( evecs_io_buffer, sizeof(_Complex double)/2, (size_t)nev*12*N, evecs_fs);
          et2=gettime();

          single2double(evecs, evecs_io_buffer, nev*24*N);

          free( evecs_io_buffer );
          evecs_io_buffer = NULL;
        }
       
        if( evecs_count != ((size_t)nev*12*N) ) {
          fprintf(stderr, "[arpack_cg] (%.4d) Error, could not proper amount of data from file %s\n", g_cart_id, evecs_filename);
          return(-3);
        }
        fclose(evecs_fs);
        evecs_fs = NULL;
        if(g_proc_id == g_stdio_proc) {
          fprintf(stdout,"# [arpack_cg] ARPACK time for reading %d eigenvectors: %+e seconds\n", nev, et2-et1);
        }
      } else if(strcmp(arpack_evecs_fileformat, "single") == 0) {

        if(N==VOLUME) {
          for(i=0; i<nev; i++) {
            sprintf(evecs_filename, "%s.ev%.5d", arpack_evecs_filename, i);
            evecs_ptr0 = (spinor*)&(evecs[i*12*N]);
            evecs_ptr1 = NULL;
            read_spinor(evecs_ptr0,  evecs_ptr1, evecs_filename, 0);
          } /* end of loop on eigenvectors */
        } else if(N==VOLUME/2) {
          for(i=0; i<nev/2; i++) {
            sprintf(evecs_filename, "%s.ev%.5d", arpack_evecs_filename, 2*i);
            evecs_ptr0 = (spinor*)&(evecs[(2*i  )*12*N]);
            evecs_ptr1 = (spinor*)&(evecs[(2*i+1)*12*N]);
            read_spinor(evecs_ptr0,  evecs_ptr1, evecs_filename, 0);
          } /* end of loop on eigenvectors */
        }
      }   /* of if arpack_evecs_fileformat */

      /* set info_arpack pro forma to SUCCESS */
      nconv = nev;
      info_arpack = 0;
    } else {
      et1=gettime();
      evals_arpack(N,nev,ncv,kind,howmny,acc,cheb_k,emin,emax,evals,evecs,arpack_eig_tol,arpack_eig_maxiter,f,&info_arpack,&nconv,arpack_logfile);
      et2=gettime();

      if(info_arpack != 0){ /* arpack didn't converge */
      if(g_proc_id == g_stdio_proc)
        fprintf(stderr,"[arpack_cg] WARNING: ARPACK didn't converge. exiting..\n");
        return -1;
      }
    
      if(g_proc_id == g_stdio_proc)
      {
         fprintf(stdout,"# [arpack_cg] ARPACK has computed %d eigenvectors\n",nconv);
         fprintf(stdout,"# [arpack_cg] ARPACK time: %+e\n",et2-et1);
      }

      if ( arpack_write_ev == 1) {

        if(strcmp(arpack_evecs_fileformat, "partfile") == 0 ) {

          if( g_cart_id == 0 ) fprintf(stdout, "# [arpack_cg] writing evecs in partfile format\n");
          /* set evec filenmae */
          sprintf(evecs_filename, "%s.%.5d.pt%.2dpx%.2dpy%.2dpz%.2d", arpack_evecs_filename, nconv, g_proc_coords[0], g_proc_coords[1], g_proc_coords[2], g_proc_coords[3]);

          evecs_fs = fopen(evecs_filename, "w");
          if (evecs_fs == NULL) {
            fprintf(stderr, "[arpack_cg] (%.4d) Error, could not open file %s for writing\n", g_cart_id, evecs_filename);
            return(-4);
          }
        
          if(arpack_evecs_writeprec == 64) {

            evecs_io_buffer = (void*)evecs;
 
            et1=gettime();
            evecs_count = fwrite( evecs_io_buffer, sizeof(_Complex double), (size_t)nconv*12*N, evecs_fs);
            et2=gettime();

          } else {
            evecs_io_buffer = malloc(sizeof(_Complex double) * (size_t)nconv*12*N );
            if( evecs_io_buffer == NULL) {
              fprintf(stderr, "[arpack_cg] (%.4d) Error, could not allocate memory for evecs_io_buffer\n", g_cart_id);
              return(-41);
            }
            double2single(evecs_io_buffer, evecs, nconv*24*N);
 
            et1=gettime();
            evecs_count = fwrite( evecs_io_buffer, sizeof(_Complex double)/2, (size_t)nconv*12*N, evecs_fs);
            et2=gettime();
            free(evecs_io_buffer);
            evecs_io_buffer = NULL;
          }
 
          if( evecs_count != ((size_t)nconv*12*N) ) {
            fprintf(stderr, "[arpack_cg] (%.4d) Error, could not write proper amount of data to file %s\n", g_cart_id, evecs_filename);
            return(-5);
          }
          fclose(evecs_fs);
          evecs_fs = NULL;

          if(g_proc_id == g_stdio_proc) {
            fprintf(stdout,"[arpack_cg] (%.4d) ARPACK time for writing %d eigenvectors: %+e seconds\n", g_cart_id, nconv, et2-et1);
          }

        } else if (strcmp(arpack_evecs_fileformat, "single") == 0) {

          if(N==VOLUME) {
            for(i=0; i<nconv; i++) {
              sprintf(evecs_filename, "%s.ev%.5d", arpack_evecs_filename, i);
              construct_writer(&evecs_writer, evecs_filename, 0);
              evecs_propagatorFormat = construct_paramsPropagatorFormat(arpack_evecs_writeprec, 1);
              write_propagator_format(evecs_writer, evecs_propagatorFormat);
              free(evecs_propagatorFormat);
              evecs_ptr0 = (spinor*)&(evecs[i*12*N]);
              evecs_ptr1 = NULL;
              write_spinor(evecs_writer, &evecs_ptr0, &evecs_ptr1, 1, arpack_evecs_writeprec);
              destruct_writer(evecs_writer);
              evecs_writer=NULL;
            } /* end of loop on converged eigenvectors */
          } else if(N==VOLUME/2) {
            for(i=0; i<nconv/2; i++) {
              sprintf(evecs_filename, "%s.ev%.5d", arpack_evecs_filename, 2*i);
              construct_writer(&evecs_writer, evecs_filename, 0);
              evecs_propagatorFormat = construct_paramsPropagatorFormat(arpack_evecs_writeprec, 1);
              write_propagator_format(evecs_writer, evecs_propagatorFormat);
              free(evecs_propagatorFormat);
              evecs_ptr0 = (spinor*)&(evecs[(2*i  )*12*N]);
              evecs_ptr1 = (spinor*)&(evecs[(2*i+1)*12*N]);
              write_spinor(evecs_writer, &evecs_ptr0, &evecs_ptr1,1, arpack_evecs_writeprec);
              destruct_writer(evecs_writer);
              evecs_writer=NULL;
            }  /* end of loop on converged eigenvectors */
          }    /* end of if N == VOLUME */

        }      /* of if arpack_evecs_fileformat */

      }        /* end of if arpack_write_ev == 1 */

    }          /* end of if arpack_read_ev == 1 */

    H        = (_Complex double *) malloc(nconv*nconv*sizeof(_Complex double)); 
    Hinv     = (_Complex double *) malloc(nconv*nconv*sizeof(_Complex double)); 
    initwork = (_Complex double *) malloc(nconv*sizeof(_Complex double)); 
    IPIV     = (int *) malloc(nconv*sizeof(int));
    zheev_lwork = 3*nconv;
    zheev_work  = (_Complex double *) malloc(zheev_lwork*sizeof(_Complex double));
    zheev_rwork = (double *) malloc(3*nconv*sizeof(double));
    hevals      = (double *) malloc(nconv*sizeof(double));

    if((H==NULL) || (Hinv==NULL) || (initwork==NULL) || (IPIV==NULL) || (zheev_lwork==NULL) || (zheev_rwork==NULL) || (hevals==NULL))
    {
       if(g_proc_id == g_stdio_proc)
          fprintf(stderr,"[arpack_cg] insufficient memory for H, Hinv, initwork, IPIV, zheev_lwork, zheev_rwork, hevals inside arpack_cg.\n");
       exit(1);
    }

    et1=gettime();
    /* compute the elements of the hermitian matrix H 
       leading dimension is nconv and active dimension is nconv */
    
    if( projection_type == 0) {
    
      for(i=0; i<nconv; i++)
      {
        assign_complex_to_spinor(r,&evecs[i*12*N],12*N);
        f(ax,r);
        c1 = scalar_prod(r,ax,N,parallel);
        H[i+nconv*i] = creal(c1);  /* diagonal should be real */
        for(j=i+1; j<nconv; j++)
        {
          assign_complex_to_spinor(r,&evecs[j*12*N],12*N);
          c1 = scalar_prod(r,ax,N,parallel);
          H[j+nconv*i] = c1;
          H[i+nconv*j] = conj(c1); /* enforce hermiticity */
        }
      }

    } else if ( projection_type == 1 )  {

      for(i=0; i<nconv; i++)
      {
        assign_complex_to_spinor(tmps1, &evecs[i*12*N], 12*N);
        f_final(r, tmps1);
        f(ax,r);
        c1 = scalar_prod(r,ax,N,parallel);
        c2 = scalar_prod(r,r,N,parallel);
        H[i+nconv*i] = creal(c1) / creal(c2);   /* diagonal should be real */
        for(j=i+1; j<nconv; j++)
        {
          assign_complex_to_spinor(tmps1, &evecs[j*12*N], 12*N);
          f_final(r, tmps1);
          c1 = scalar_prod(r,ax,N,parallel);
          c3 = scalar_prod(r, r, N, parallel);

          H[j+nconv*i] = c1 / sqrt( creal(c2) * creal(c3) );
          H[i+nconv*j] = conj(c1) / sqrt( creal(c2) * creal(c3) ); /* enforce hermiticity */
        }
      }
    }


    et2=gettime();
    if(g_proc_id == g_stdio_proc) {
      fprintf(stdout,"[arpack_cg] time to compute H: %+e\n",et2-et1);
    }

/*
    if(g_cart_id == 0) {
      for(i=0; i<nconv; i++) {
      for(j=0; j<nconv; j++) {
        fprintf(stdout, "# [arpack_cg] H[%d, %d] = %25.16e %25.16e\n", i, j, creal(H[i*nconv+j]), cimag(H[i*nconv+j]));
      }}
    }
*/



     et1=gettime();
     /* compute Ritz values and Ritz vectors if needed */
     if( (nconv>0) && (comp_evecs !=0))
     {
         HU = (_Complex double *) malloc(nconv*nconv*sizeof(_Complex double)); 
         if( HU==NULL ) {
           if(g_proc_id == g_stdio_proc)
             fprintf(stderr,"[arpack_cg] insufficient memory for HU inside arpack_cg\n");
             exit(2);
         }
         /* copy H into HU */
         tmpsize=nconv*nconv;
         _FT(zcopy)(&tmpsize,H,&ONE,HU,&ONE);

         /* compute eigenvalues and eigenvectors of HU*/
         /* SUBROUTINE ZHEEV( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, RWORK,INFO ) */
         _FT(zheev)(&cV,&cU,&nconv,HU,&nconv,hevals,zheev_work,&zheev_lwork,zheev_rwork,&zheev_info,1,1);

         if(zheev_info != 0)
         {
	    if(g_proc_id == g_stdio_proc) 
	    {
	        fprintf(stderr,"[arpack_cg] Error in ZHEEV:, info =  %d\n",zheev_info); 
                fflush(stderr);
	    }
	    exit(1);
         }

         /* If you want to replace the schur (orthonormal) basis by eigen basis
            use something like this. It is better to use the schur basis because
            they are better conditioned. Use this part only to get the eigenvalues
            and their resduals for the operator (D^\daggerD)
            esize=(ncv-nconv)*12*N;
            Zrestart_X(evecs,12*N,HU,12*N,nconv,nconv,&evecs[nconv*N],esize); */

         /* compute residuals and print out results */

	 if(g_proc_id == g_stdio_proc)
	 {fprintf(stdout,"# [arpack_cg] Ritz values of A and their residulas (||A*x-lambda*x||/||x||\n"); 
          fprintf(stdout,"# [arpack_cg] =============================================================\n");
          fflush(stdout);}

         for(i=0; i<nconv; i++)
         {
	    tmpsize=12*N;
            _FT(zgemv)(&cN,&tmpsize,&nconv,&tpone,evecs,&tmpsize,
		       &HU[i*nconv],&ONE,&tzero,tmpv1,&ONE,1);

            assign_complex_to_spinor(r,tmpv1,12*N);

            d1=square_norm(r,N,parallel);
            
            f(ax,r);

            mul_r(tmps1,hevals[i],r,N);

            diff(tmps2,ax,tmps1,N);
	    
	    d2= square_norm(tmps2,N,parallel);

            d3= sqrt(d2/d1);
	    
	    if(g_proc_id == g_stdio_proc)
	    {fprintf(stdout,"Eval[%06d]: %22.15E rnorm: %22.15E\n", i, hevals[i], d3); fflush(stdout);}
        } 
        free( HU ); HU = NULL;
     }  /* if( (nconv_arpack>0) && (comp_evecs !=0)) */
     et2=gettime();
     if(g_proc_id == g_stdio_proc) {
       fprintf(stdout,"[arpack_cg] time to compute eigenvectors: %+e\n",et2-et1);
     }

  }  /* if(ncurRHS==0) */
    
  double eps_sq_used,restart_eps_sq_used;  /* tolerance squared for the linear system */

  double cur_res; /* current residual squared */

  /*increment the RHS counter*/
  ncurRHS = ncurRHS +1; 

  /* set the tolerance to be used for this right-hand side  */
  if(ncurRHS > nrhs1){
    eps_sq_used = eps_sq;
  }
  else{
    eps_sq_used = eps_sq1;
  }
  
  if(g_proc_id == g_stdio_proc && g_debug_level > 0) {
    fprintf(stdout, "# [arpack_cg] System %d, eps_sq %e, projection type %d\n",ncurRHS,eps_sq_used, projection_type); 
    fflush(stdout);
  } 
  
  /*---------------------------------------------------------------*/
  /* Call init-CG until this right-hand side converges             */
  /*---------------------------------------------------------------*/
  double wt1,wt2,wE,wI;
  double normsq,tol_sq;
  int flag,maxit_remain,numIts,its;
  int info_lapack;

  wE = 0.0; wI = 0.0;     /* Start accumulator timers */
  flag = -1;    	  /* System has not converged yet */
  maxit_remain = maxit;   /* Initialize Max and current # of iters   */
  numIts = 0;  
  restart_eps_sq_used=res_eps_sq;

  while( flag == -1 )
  {
    
    if(nconv > 0)
    {


      /* --------------------------------------------------------- */
      /* Perform init-CG with evecs vectors                        */
      /* xinit = xinit + evecs*Hinv*evec'*(b-Ax0) 		   */
      /* --------------------------------------------------------- */
      wt1 = gettime();

      /*r0=b-Ax0*/
      f(ax,x); /*ax = A*x */
      diff(r,b,ax,N);  /* r=b-A*x */

      if( projection_type == 0) {

        /* x = x + evecs*inv(H)*evecs'*r */
        for(int i=0; i < nconv; i++)
        {
           assign_complex_to_spinor(tmps1,&evecs[i*12*N],12*N);
           initwork[i]= scalar_prod(tmps1,r,N,parallel);
        }

        /* solve the linear system H y = c */
        tmpsize=nconv*nconv;
        _FT(zcopy) (&tmpsize,H,&ONE,Hinv,&ONE); /* copy H into Hinv */
        /* SUBROUTINE ZGESV( N, NRHS, A, LDA, IPIV, B, LDB, INFO ) */
        _FT(zgesv) (&nconv,&ONE,Hinv,&nconv,IPIV,initwork,&nconv,&info_lapack);

        if(info_lapack != 0)
        {
           if(g_proc_id == g_stdio_proc) {
              fprintf(stderr, "[arpack_cg] Error in ZGESV:, info =  %d\n",info_lapack); 
              fflush(stderr);
           }
           exit(1);
        }

        /* x = x + evecs*inv(H)*evecs'*r */
        for(i=0; i<nconv; i++)
        {
          assign_complex_to_spinor(tmps1,&evecs[i*12*N],12*N);
          assign_add_mul(x,tmps1,initwork[i],N);
        }

      } else if ( projection_type == 1 ) {
        /* x = x + evecs*inv(H)*evecs'*r */

        /* tmps2 = Q^+ r */
        f_initial(tmps2, r);

        for(int i=0; i < nconv; i++) {
          /* tmps1 = v_i */
          assign_complex_to_spinor(tmps1,&evecs[i*12*N],12*N);

          /* initwork_i = v_i^+ Q^+ r / lambda_i^2 */
          initwork[i]= scalar_prod(tmps1, tmps2, N, parallel) / ( H[i*nconv+i] * H[i*nconv+i] );
        }

        memset(tmps2, 0, N*sizeof(spinor) );
        for(i=0; i<nconv; i++) {
          assign_complex_to_spinor(tmps1, &evecs[i*12*N], 12*N);
          assign_add_mul(tmps2, tmps1, initwork[i], N);
        }

        /* apply final operator */
        f_final(tmps1, tmps2);
        assign_add_mul(x, tmps1, 1., N);

      }  /* end of if projection type */

      /* compute elapsed time and add to accumulator */

      wt2 = gettime();
      wI = wI + wt2-wt1;
      
    }/* if(nconv > 0) */


    /* which tolerance to use */
    if(eps_sq_used > restart_eps_sq_used)
    {
       tol_sq = eps_sq_used;
       flag   = 1; /* shouldn't restart again */
    }
    else
    {
       tol_sq = restart_eps_sq_used;
    }

    wt1 = gettime();
    its = cg_her(x,b,maxit_remain,tol_sq,rel_prec,N,f); 
          
    wt2 = gettime();

    wE = wE + wt2-wt1;

    /* check convergence */
    if(its == -1)
    {
       /* cg didn't converge */
       if(g_proc_id == g_stdio_proc) {
         fprintf(stderr, "[arpack_cg] CG didn't converge within the maximum number of iterations in arpack_cg. Exiting...\n"); 
         fflush(stderr);
         exit(1);
         
       }
    } 
    else
    {
       numIts += its;   
       maxit_remain = maxit - numIts; /* remaining number of iterations */
       restart_eps_sq_used = restart_eps_sq_used*res_eps_sq; /* prepare for the next restart */
    }
    
  }
  /* end while (flag ==-1)               */
  
  /* ---------- */
  /* Reporting  */
  /* ---------- */
  /* compute the exact residual */
  f(ax,x); /* ax= A*x */
  diff(r,b,ax,N);  /* r=b-A*x */	
  normsq=square_norm(r,N,parallel);
  if(g_debug_level > 0 && g_proc_id == g_stdio_proc)
  {
    fprintf(stdout, "# [arpack_cg] For this rhs:\n");
    fprintf(stdout, "# [arpack_cg] Total initCG Wallclock : %+e\n", wI);
    fprintf(stdout, "# [arpack_cg] Total cg Wallclock : %+e\n", wE);
    fprintf(stdout, "# [arpack_cg] Iterations: %-d\n", numIts); 
    fprintf(stdout, "# [arpack_cg] Actual Resid of LinSys  : %+e\n",normsq);
  }


  /* free memory if this was your last system to solve */
  if(ncurRHS == nrhs){
#if ( (defined SSE) || (defined SSE2) || (defined SSE3)) 
    free(_ax);  free(_r);  free(_tmps1); free(_tmps2);
#else
    free(ax); free(r); free(tmps1); free(tmps2);
#endif
    free(evecs); free(evals); free(H); free(Hinv);
    free(initwork); free(tmpv1); free(zheev_work);
    free(hevals); free(zheev_rwork); free(IPIV);
  }


  return numIts;
}

Example #8

Show file

File: cloverdetratio_monomial.c Project: VincentDrach/tmLQCD

void cloverdetratio_derivative_orig(const int no, hamiltonian_field_t * const hf) {
  monomial * mnl = &monomial_list[no];

  /* This factor 2* a missing factor 2 in trace_lambda */
  mnl->forcefactor = 1.;

  /*********************************************************************
   *
   *  this is being run in case there is even/odd preconditioning
   * 
   * This term is det((Q^2 + \mu_1^2)/(Q^2 + \mu_2^2))
   * mu1 and mu2 are set according to the monomial
   *
   *********************************************************************/
  /* First term coming from the second field */
  /* Multiply with W_+ */
  g_mu = mnl->mu;
  g_mu3 = mnl->rho2; //rho2
  boundary(mnl->kappa);

  // we compute the clover term (1 + T_ee(oo)) for all sites x
  sw_term( (const su3**) hf->gaugefield, mnl->kappa, mnl->c_sw); 
  // we invert it for the even sites only including mu
  sw_invert(EE, mnl->mu);
  
  if(mnl->solver != CG) {
    fprintf(stderr, "Bicgstab currently not implemented, using CG instead! (detratio_monomial.c)\n");
  }
  
  mnl->Qp(g_spinor_field[DUM_DERI+2], mnl->pf);
  g_mu3 = mnl->rho; // rho1

  /* Invert Q_{+} Q_{-} */
  /* X_W -> DUM_DERI+1 */
  chrono_guess(g_spinor_field[DUM_DERI+1], g_spinor_field[DUM_DERI+2], mnl->csg_field, 
	       mnl->csg_index_array, mnl->csg_N, mnl->csg_n, VOLUME/2, mnl->Qsq);
  mnl->iter1 += cg_her(g_spinor_field[DUM_DERI+1], g_spinor_field[DUM_DERI+2], mnl->maxiter, 
		       mnl->forceprec, g_relative_precision_flag, VOLUME/2, mnl->Qsq);
  chrono_add_solution(g_spinor_field[DUM_DERI+1], mnl->csg_field, mnl->csg_index_array,
		      mnl->csg_N, &mnl->csg_n, VOLUME/2);
  /* Y_W -> DUM_DERI  */
  mnl->Qm(g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1]);
  
  /* apply Hopping Matrix M_{eo} */
  /* to get the even sites of X */
  H_eo_sw_inv_psi(g_spinor_field[DUM_DERI+2], g_spinor_field[DUM_DERI+1], EE, -mnl->mu);
  /* \delta Q sandwitched by Y_o^\dagger and X_e */
  deriv_Sb(OE, g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+2], hf, mnl->forcefactor); 
  
  /* to get the even sites of Y */
  H_eo_sw_inv_psi(g_spinor_field[DUM_DERI+3], g_spinor_field[DUM_DERI], EE, mnl->mu);
  /* \delta Q sandwitched by Y_e^\dagger and X_o */
  deriv_Sb(EO, g_spinor_field[DUM_DERI+3], g_spinor_field[DUM_DERI+1], hf, mnl->forcefactor); 

  // here comes the clover term...
  // computes the insertion matrices for S_eff
  // result is written to swp and swm
  // even/even sites sandwiched by gamma_5 Y_e and gamma_5 X_e  
  gamma5(g_spinor_field[DUM_DERI+2], g_spinor_field[DUM_DERI+2], VOLUME/2);
  sw_spinor(EO, g_spinor_field[DUM_DERI+2], g_spinor_field[DUM_DERI+3]);
  
  // odd/odd sites sandwiched by gamma_5 Y_o and gamma_5 X_o
  gamma5(g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI], VOLUME/2);
  sw_spinor(OE, g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1]);

  g_mu3 = mnl->rho2; // rho2
  
  /* Second term coming from the second field */
  /* The sign is opposite!! */
  mul_r(g_spinor_field[DUM_DERI], -1., mnl->pf, VOLUME/2);
  
  /* apply Hopping Matrix M_{eo} */
  /* to get the even sites of X */
  H_eo_sw_inv_psi(g_spinor_field[DUM_DERI+2], g_spinor_field[DUM_DERI+1], EE, -mnl->mu);
  /* \delta Q sandwitched by Y_o^\dagger and X_e */
  deriv_Sb(OE, g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+2], hf, mnl->forcefactor); 
  
  /* to get the even sites of Y */
  H_eo_sw_inv_psi(g_spinor_field[DUM_DERI+3], g_spinor_field[DUM_DERI], EE, mnl->mu);
  /* \delta Q sandwitched by Y_e^\dagger and X_o */
  deriv_Sb(EO, g_spinor_field[DUM_DERI+3], g_spinor_field[DUM_DERI+1], hf, mnl->forcefactor);

  // here comes the clover term...
  // computes the insertion matrices for S_eff
  // result is written to swp and swm
  // even/even sites sandwiched by gamma_5 Y_e and gamma_5 X_e
  gamma5(g_spinor_field[DUM_DERI+2], g_spinor_field[DUM_DERI+2], VOLUME/2);
  sw_spinor(EO, g_spinor_field[DUM_DERI+2], g_spinor_field[DUM_DERI+3]);
  
  // odd/odd sites sandwiched by gamma_5 Y_o and gamma_5 X_o
  gamma5(g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI], VOLUME/2);
  sw_spinor(OE, g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1]);

  sw_all(hf, mnl->kappa*mnl->forcefactor, mnl->c_sw);
  
  g_mu = g_mu1;
  g_mu3 = 0.;
  boundary(g_kappa);

  return;
}

Example #9

Show file

File: gcr.c Project: annube/tmLQCD

int gcr(spinor * const P, spinor * const Q, 
	const int m, const int max_restarts,
	const double eps_sq, const int rel_prec,
	const int N, const int precon, matrix_mult f) {

  int k, l, restart, i, iter = 0;
  double norm_sq, err;
  spinor * rho, * tmp;
  complex ctmp;
  spinor ** solver_field = NULL;
  const int nr_sf = 2;

  if(N == VOLUME) {
    init_solver_field(&solver_field, VOLUMEPLUSRAND, nr_sf);
  }
  else {
    init_solver_field(&solver_field, VOLUMEPLUSRAND/2, nr_sf);
  }

  rho = solver_field[0];
  tmp = solver_field[1];

  init_gcr(m, N+RAND);

  norm_sq = square_norm(Q, N, 1);
  if(norm_sq < 1.e-32) {
    norm_sq = 1.;
  }
  
  for(restart = 0; restart < max_restarts; restart++) {
    dfl_sloppy_prec = 0;
    f(tmp, P);
    diff(rho, Q, tmp, N);
    err = square_norm(rho, N, 1);
    if(g_proc_id == g_stdio_proc && g_debug_level > 2){
      printf("GCR: iteration number: %d, true residue: %g\n", iter, err); 
      fflush(stdout);
    }
    if(((err <= eps_sq) && (rel_prec == 0)) || ((err <= eps_sq*norm_sq) && (rel_prec == 1))) {
      finalize_solver(solver_field, nr_sf);
      return(iter);
    }
    for(k = 0; k < m; k++) {
      
      if(precon == 0) {
	assign(xi[k], rho, N);
      }
      else {
        zero_spinor_field(xi[k], N);  
        Msap_eo(xi[k], rho, 6);   
 	/* Msap(xi[k], rho, 8); */
      }
	  
      dfl_sloppy_prec = 1;
      dfl_little_D_prec = 1.e-12;
      f(tmp, xi[k]); 
	  
      /* tmp will become chi[k] */
      for(l = 0; l < k; l++) {
        a[l][k] = scalar_prod(chi[l], tmp, N, 1);
        assign_diff_mul(tmp, chi[l], a[l][k], N);
      }
      b[k] = sqrt(square_norm(tmp, N, 1));
      mul_r(chi[k], 1./b[k], tmp, N);
      c[k] = scalar_prod(chi[k], rho, N, 1);
      assign_diff_mul(rho, chi[k], c[k], N);
      err = square_norm(rho, N, 1);
      iter ++;
      if(g_proc_id == g_stdio_proc && g_debug_level > 0){
        if(rel_prec == 1) printf("# GCR: %d\t%g >= %g iterated residue\n", iter, err, eps_sq*norm_sq); 
        else printf("# GCR: %d\t%g >= %giterated residue\n", iter, err, eps_sq);
        fflush(stdout);
      }
      /* Precision reached? */
      if((k == m-1) || ((err <= eps_sq) && (rel_prec == 0)) || ((err <= eps_sq*norm_sq) && (rel_prec == 1))) {
        break;
      }
    }

    /* prepare for restart */
    _mult_real(c[k], c[k], 1./b[k]);
    assign_add_mul(P, xi[k], c[k], N);
    for(l = k-1; l >= 0; l--) {
      for(i = l+1; i <= k; i++) {
        _mult_assign_complex(ctmp, a[l][i], c[i]);
        /* c[l] -= ctmp */
        _diff_complex(c[l], ctmp);
      }
      _mult_real(c[l], c[l], 1./b[l]);
      assign_add_mul(P, xi[l], c[l], N);
    }
  }
  finalize_solver(solver_field, nr_sf);
  return(-1);
}

Example #10

Show file

File: incr_eigcg.c Project: LorenzoRiggio/tmLQCD

int incr_eigcg(const int N, const int nrhs,  const int nrhs1, spinor * const x, spinor * const b, 
               const int ldh, matrix_mult f, const double eps_sq1, const double eps_sq, double restart_eps_sq,  
               const int rand_guess_opt, const int rel_prec, const int maxit, int nev, const int v_max) 
{ 
  /*Static variables and arrays.*/
  static spinor **solver_field; /*4 spinor fields*/

  static int ncurEvals=0;       /* current number of stored eigenvectors */
  static int ncurRHS=0;         /* current number of the system being solved */                   

  static spinor **evecs;        /* accumulated eigenvectors for deflation. */

  static void *_evals;
  static double *evals;         /* Ritz values */

  static void *_v;
  static spinor  *V;            /* work array for eigenvector search basis in eigCG */

  static void *_h;
  static _Complex double  *H;            /* The ncurEvals^2 matrix: H=evecs'*A*evecs */ 

  static void *_hu;
  static _Complex double  *HU;           /* used for diagonalization of H if eigenvalues requested
                                   also used as a copy of H if needed*/
  static void *_initwork;                            
  static _Complex double  *initwork;     /* vector of size ldh using with init-CG */ 

  static void *_ework;
  static _Complex double  *ework;
  /* end of the thinking part */

  static void *_work;
  static _Complex double  *work;

  static void *_rwork;
  static double *rwork;
  
  static void *_IPIV;
  static int *IPIV;        /*integer array to store permutations when solving the small linear system*/

  /* some constants */
  char cU='U'; char cN='N';  char cV='V'; 
  _Complex double  tpone= 1.0e+00;
  _Complex double  tzero= 0.0e+00;
  //tpone.re=+1.0e+00; tpone.im=0.0e+00; 
  //tzero.re=+0.0e+00; tzero.im=0.0e+00;

  /* Timing vars */
  double wt1,wt2,wE,wI;

  double eps_sq_used;

 
  /* Variables */
  double machEps = 1e-15;  
  double normb, normsq, tmpd,tmpd2;
  _Complex double  tempz;
  int i,j, ONE = 1;
  int tmpsize,tmpi,info=0;
  int numIts, flag, nAdded, nev_used;
  int maxit_remain;
  int esize,nrsf;

  int parallel; /* for parallel processing of the scalar products */

  /* leading dimension for spinor vectors */
  int LDN;
  if(N==VOLUME)
     LDN = VOLUMEPLUSRAND;
  else
     LDN = VOLUMEPLUSRAND/2;
  
 
  #ifdef MPI
    parallel=1;
  #else
    parallel=0;
  #endif
 
  /*think more about this */
  esize=2*12*N+4*nev*nev;  /* fixed size for ework used for restarting in eigcg*/

  nrsf=4;  /*number of solver fields */
  
  int lwork=3*ldh;

  double cur_res; //current residual squared (initial value will be computed in eigcg)

  /*increment the RHS counter*/
  ncurRHS = ncurRHS +1; 

  //set the tolerance to be used for this right-hand side 
  if(ncurRHS > nrhs1){
    eps_sq_used = eps_sq;
  }
  else{
    eps_sq_used = eps_sq1;
  }

  if(ncurRHS==1)/* If this is the first system, allocate needed memory for the solver*/
  {
    init_solver_field(&solver_field, LDN, nrsf); 
  }

  if(nev==0){ /*incremental eigcg is used as a cg solver. No need to restart forcing no-restart*/
    if(g_proc_id == g_stdio_proc && g_debug_level > 0) {
       fprintf(stdout, "CG won't be restarted in this mode since no deflation will take place (nev=0)\n"); 
       fflush(stdout);
    } 
  
    restart_eps_sq=0.0;
  }




  if((ncurRHS==1) && (nev >0) )/* If this is the first right-hand side and eigenvectors are needed, allocate needed memory*/
  { 
    init_solver_field(&evecs, LDN, ldh); 
     
    #if (defined SSE || defined SSE2 || defined SSE3)

    /*Extra elements are needed for allignment */
    //_v = malloc(LDN*v_max*sizeof(spinor)+ALIGN_BASE);
    _v = calloc(LDN*v_max+ALIGN_BASE,sizeof(spinor));
    V  = (spinor *)(((unsigned long int)(_v)+ALIGN_BASE)&~ALIGN_BASE);

    //_h=malloc(ldh*ldh*sizeof(_Complex double )+ALIGN_BASE);
    _h=calloc(ldh*ldh+ALIGN_BASE,sizeof(_Complex double ));
    H = (_Complex double  *)(((unsigned long int)(_h)+ALIGN_BASE)&~ALIGN_BASE);
 
    //_hu=malloc(ldh*ldh*sizeof(_Complex double )+ALIGN_BASE);
    _hu=calloc(ldh*ldh+ALIGN_BASE,sizeof(_Complex double ));
    HU = (_Complex double  *)(((unsigned long int)(_hu)+ALIGN_BASE)&~ALIGN_BASE);
    
    //_ework = malloc(esize*sizeof(_Complex double )+ALIGN_BASE);
    _ework = calloc(esize+ALIGN_BASE,sizeof(_Complex double ));
    ework=(_Complex double  *)(((unsigned long int)(_ework)+ALIGN_BASE)&~ALIGN_BASE);

    //_initwork = malloc(ldh*sizeof(_Complex double )+ALIGN_BASE);
    _initwork = calloc(ldh+ALIGN_BASE,sizeof(_Complex double ));
    initwork = (_Complex double  *)(((unsigned long int)(_initwork)+ALIGN_BASE)&~ALIGN_BASE);

    //_work = malloc(lwork*sizeof(_Complex double )+ALIGN_BASE);
    _work = calloc(lwork+ALIGN_BASE,sizeof(_Complex double ));
    work = (_Complex double  *)(((unsigned long int)(_work)+ALIGN_BASE)&~ALIGN_BASE);

    //_rwork = malloc(3*ldh*sizeof(double)+ALIGN_BASE);
    _rwork = calloc(3*ldh+ALIGN_BASE,sizeof(double));
    rwork = (double *)(((unsigned long int)(_rwork)+ALIGN_BASE)&~ALIGN_BASE);

    
    //_IPIV = malloc(ldh*sizeof(int)+ALIGN_BASE);
    _IPIV = calloc(ldh+ALIGN_BASE,sizeof(int));
    IPIV = (int *)(((unsigned long int)(_IPIV)+ALIGN_BASE)&~ALIGN_BASE);

    //_evals = malloc(ldh*sizeof(double)+ALIGN_BASE);
    _evals = calloc(ldh+ALIGN_BASE,sizeof(double)); 
    evals = (double *)(((unsigned long int)(_evals)+ALIGN_BASE)&~ALIGN_BASE);


    #else

    V = (spinor *) calloc(LDN*v_max,sizeof(spinor));
    H = calloc(ldh*ldh, sizeof(_Complex double ));
    HU= calloc(ldh*ldh, sizeof(_Complex double ));
    initwork = calloc(ldh, sizeof(_Complex double ));
    ework = calloc(esize, sizeof(_Complex double ));
    work = calloc(lwork,sizeof(_Complex double ));
    rwork= calloc(3*ldh,sizeof(double));
    IPIV = calloc(ldh, sizeof(int));
    evals = (double *) calloc(ldh, sizeof(double));

    #endif
    
  } /*if(ncurRHS==1)*/

  
  if(g_proc_id == g_stdio_proc && g_debug_level > 0) {
    fprintf(stdout, "System %d, eps_sq %e\n",ncurRHS,eps_sq_used); 
    fflush(stdout);
  } 
  
     /*---------------------------------------------------------------*/
     /* Call eigCG until this right-hand side converges               */
     /*---------------------------------------------------------------*/
  wE = 0.0; wI = 0.0;     /* Start accumulator timers */
  flag = -1;    	   /* First time through. Run eigCG regularly */
  maxit_remain = maxit;   /* Initialize Max and current # of iters   */
  numIts = 0;  

  while( flag == -1 || flag == 3)
  {
    //if(g_proc_id==g_stdio_proc)
      //printf("flag= %d, ncurEvals= %d\n",flag,ncurEvals);
    
    if(ncurEvals > 0)
    {
      /* --------------------------------------------------------- */
      /* Perform init-CG with evecs vectors                        */
      /* xinit = xinit + evecs*Hinv*evec'*(b-Ax0) 		     */
      /* --------------------------------------------------------- */

      wt1 = gettime();

      /*r0=b-Ax0*/
      normsq = square_norm(x,N,parallel);
      if(normsq>0.0)
      {
	f(solver_field[0],x); /* solver_field[0]= A*x */
	diff(solver_field[1],b,solver_field[0],N);  /* solver_filed[1]=b-A*x */		
      }
      else
	assign(solver_field[1],b,N); /* solver_field[1]=b */
	
      /* apply the deflation using init-CG */
      /* evecs'*(b-Ax) */
      for(i=0; i<ncurEvals; i++)
      {
        initwork[i]= scalar_prod(evecs[i],solver_field[1],N,parallel);
      }
    
      /* solve the linear system H y = c */
      tmpsize=ldh*ncurEvals;
      _FT(zcopy) (&tmpsize,H,&ONE,HU,&ONE); /* copy H into HU */
      _FT(zgesv) (&ncurEvals,&ONE,HU,&ldh,IPIV,initwork,&ldh,&info);

      if(info != 0)
      {
         if(g_proc_id == g_stdio_proc) {
            fprintf(stderr, "Error in ZGESV:, info =  %d\n",info); 
            fflush(stderr);
         }
         exit(1);
      }
    
      /* x = x + evecs*inv(H)*evecs'*r */
      for(i=0; i<ncurEvals; i++)
      {
        assign_add_mul(x,evecs[i],initwork[i],N);
      }
      
      /* compute elapsed time and add to accumulator */

      wt2 = gettime();
      
      wI = wI + wt2-wt1;
      
    }/* if(ncurEvals > 0) */


    /* ------------------------------------------------------------ */
    /* Adjust nev for eigcg according to available ldh/restart      */
    /* ------------------------------------------------------------ */
	  
    if (flag == 3) { /* restart with the same rhs, set nev_used = 0 */
      nev_used = 0;
      /* if convergence seems before next restart do not restart again */
      if(rel_prec)
      {
	       if (cur_res*(restart_eps_sq) < eps_sq*normb*normb) 
	           restart_eps_sq=0.0;
      }
      else
      {
	       if (cur_res*(restart_eps_sq) < eps_sq) 
	          restart_eps_sq=0.0;
      } /* if(rel_prec) */
	  
    }
    else
    {    
      /* First time through this rhs. Find nev evecs */
      /* limited by the ldh evecs we can store in total */
      if (ldh-ncurEvals < nev)
	       nev = ldh - ncurEvals;
      nev_used = nev;
      
    }

    /* ------------------------------------------------------------ */
    /* Solve Ax = b with x initial guess                            */
    /* ------------------------------------------------------------ */

    wt1 = gettime();

    eigcg( N, LDN, x, b, &normb, eps_sq_used, restart_eps_sq, rel_prec, maxit_remain, 
	     &numIts, &cur_res, &flag, solver_field, f, 
	     nev_used, v_max, V, esize, ework);
     
    //if(g_proc_id == g_stdio_proc) 
        //printf("eigcg flag= %d \n",flag); 
      
    wt2 = gettime();

    wE = wE + wt2-wt1;
    
    /* if flag == 3 update the remain max number of iterations */
    maxit_remain = maxit - numIts;
    
  }
  /* end while (flag ==-1 || flag == 3)               */
  /* ------------------------------------------------ */

  /* ---------- */
  /* Reporting  */
  /* ---------- */
  /* compute the exact residual */
  f(solver_field[0],x); /* solver_field[0]= A*x */
  diff(solver_field[1],b,solver_field[0],N);  /* solver_filed[1]=b-A*x */	
  normsq=square_norm(solver_field[1],N,parallel);
  if(g_debug_level > 0 && g_proc_id == g_stdio_proc)
  {
    fprintf(stdout, "For this rhs:\n");
    fprintf(stdout, "Total initCG Wallclock : %-f\n", wI);
    fprintf(stdout, "Total eigpcg Wallclock : %-f\n", wE);
    fprintf(stdout, "Iterations: %-d\n", numIts); 
    fprintf(stdout, "Residual: %e, Actual Resid of LinSys  : %e\n", cur_res,normsq);
    if (flag != 0) {
      fprintf(stderr, "Error: eigcg returned with nonzero exit status\n");
      return flag;
      fflush(stderr);
    }
    fflush(stdout);
  }
  /* ------------------------------------------------------------------- */
  /* ------------------------------------------------------------------- */
  /* Update the evecs and the factorization of evecs'*A*evecs            */
  /* ------------------------------------------------------------------- */
  if (nev > 0) 
  {

    wt1 = gettime();

    /* Append new Ritz vectors to the basis and orthogonalize them to evecs */
    for(i=0; i<nev_used; i++)
      assign(evecs[i+ncurEvals],&V[i*LDN],N);
    
    nAdded = ortho_new_vectors(evecs,N,ncurEvals,nev_used,machEps);

    /* expand H */
    for(j=ncurEvals; j< (ncurEvals+nAdded); j++)
    {
      f(solver_field[0],evecs[j]);
      
      for(i=0; i<=j; i++)
      {
	       H[i+j*ldh] = scalar_prod(evecs[i],solver_field[0],N,parallel);
	       H[j+i*ldh]=  conj(H[i+j*ldh]);
	       //H[j+i*ldh].re =  H[i+j*ldh].re;
	       //H[j+i*ldh].im = -H[i+j*ldh].im;
      }
      
    }
    
    /* update the number of vectors in the basis */
    ncurEvals = ncurEvals + nAdded;

    /* ---------- */
    /* Reporting  */
    /* ---------- */

    wt2 = gettime();

    
    if(g_proc_id == g_stdio_proc && g_debug_level > 0)
    {
      fprintf(stdout,"ncurRHS %d\n",ncurRHS);
      fprintf(stdout,"ncurEvals %d \n",ncurEvals);
      fprintf(stdout,"Update\n");
      fprintf(stdout,"Added %d vecs\n",nAdded);
      fprintf(stdout,"U Wallclock : %-f\n", wt2-wt1);
      fprintf(stdout,"Note: Update Wall time doesn't include time for computing eigenvalues and their residuals.\n"); 
      fflush(stdout);     
    }
    
    if(g_debug_level > 3)  /*compute eigenvalues and their residuals if requested*/
    {
      /* copy H into HU */
      tmpsize=ldh*ncurEvals;
      _FT(zcopy) (&tmpsize,H,&ONE,HU,&ONE);

      /* compute eigenvalues and eigenvectors of HU (using V and spinor fields as tmp work spaces)*/
      _FT(zheev)(&cV, &cU, &ncurEvals, HU, &ldh, evals, work, &lwork, rwork, &info,1,1);

      if(info != 0)
      {
	if(g_proc_id == g_stdio_proc) 
	{
	  fprintf(stderr,"Error in ZHEEV:, info =  %d\n",info); 
          fflush(stderr);
	}
	exit(1);
      }

      /* compute residuals and print out results */
      for(i=0; i<ncurEvals; i++)
      {
	    tmpi=12*N;
            tmpsize=12*LDN;

	    
            _FT(zgemv)(&cN,&tmpi,&ncurEvals,&tpone,(_Complex double  *)evecs[0],&tmpsize,
			                 &HU[i*ldh], &ONE,&tzero,(_Complex double  *) solver_field[0],&ONE,1);

            normsq=square_norm(solver_field[0],N,parallel);
            
            f(solver_field[1],solver_field[0]);

            tempz = scalar_prod(solver_field[0],solver_field[1],N,parallel);

            evals[i] = creal(tempz)/normsq;

            mul_r(solver_field[2],evals[i],solver_field[0],N);

            diff(solver_field[3],solver_field[1],solver_field[2], N);
	    
	    tmpd2= square_norm(solver_field[3],N,parallel);

            tmpd= sqrt(tmpd2/normsq);
	    
	    if(g_proc_id == g_stdio_proc)
	    {fprintf(stdout,"RR Eval[%d]: %22.15E rnorm: %22.15E\n", i+1, evals[i], tmpd); fflush(stdout);}
	
      } 
       
    }/*if(plvl >= 2)*/
  } /* if(nev>0) */

  /*--------------------------------------*/
  /*free memory that is no longer needed  */
  /* and reset ncurRHS and ncurEvals      */
  /*--------------------------------------*/

  if(ncurRHS == nrhs) /*this was the last system to be solved */
  {
     ncurRHS=0;
     ncurEvals=0;
     finalize_solver(solver_field,nrsf);
  }

  if( (ncurRHS == nrhs) && (nev >0) )/*this was the last system to be solved and there were allocated memory for eigenvector computation*/
  {
     finalize_solver(evecs,ldh);
     #if (defined SSE || defined SSE2 || defined SSE3)
     free(_v);
     free(_h);
     free(_hu);
     free(_ework);
     free(_initwork);
     free(_IPIV);
     free(_evals);
     free(_rwork);
     free(_work);
     #else
     free(V);
     free(H);
     free(HU);
     free(ework);
     free(initwork);
     free(IPIV);
     free(evals);
     free(rwork);
     free(work);
     #endif
  }

  return numIts;
}

Example #11

Show file

File: gmres_dr.c Project: palao/tmLQCD

int gmres_dr(spinor * const P,spinor * const Q, 
	  const int m, const int nr_ev, const int max_restarts,
	  const double eps_sq, const int rel_prec,
	  const int N, matrix_mult f){

  int restart=0, i, j, k, l;
  double beta, eps, norm, beta2=0.;
  complex *lswork = NULL;
  int lwork;
  complex tmp1, tmp2;
  int info=0;
  int _m = m, mp1 = m+1, np1 = nr_ev+1, ne = nr_ev, V2 = 12*(VOLUMEPLUSRAND)/2, _N = 12*N;
  spinor ** solver_field = NULL;
  const int nr_sf = 3;

  if(N == VOLUME) {
    init_solver_field(&solver_field, VOLUMEPLUSRAND, nr_sf);
  }
  else {
    init_solver_field(&solver_field, VOLUMEPLUSRAND/2, nr_sf);
  }
  double err=0.;
  spinor * r0, * x0;

  cmone.re = -1.; cmone.im=0.;
  cpone.re = 1.; cpone.im=0.;
  czero.re = 0.; czero.im = 0.;
  
  r0 = solver_field[0];
  x0 = solver_field[2];
  eps=sqrt(eps_sq);  
  init_gmres_dr(m, (VOLUMEPLUSRAND));
  norm = sqrt(square_norm(Q, N, 1));

  assign(x0, P, N);

  /* first normal GMRES cycle */
  /* r_0=Q-AP  (b=Q, x+0=P) */
  f(r0, x0);
  diff(r0, Q, r0, N);
  
  /* v_0=r_0/||r_0|| */
  alpha[0].re=sqrt(square_norm(r0, N, 1));
  err = alpha[0].re;
  
  if(g_proc_id == g_stdio_proc && g_debug_level > 0){
    printf("%d\t%e true residue\n", restart*m, alpha[0].re*alpha[0].re); 
    fflush(stdout);
  }
  
  if(alpha[0].re==0.){
    assign(P, x0, N);
    finalize_solver(solver_field, nr_sf);
    return(restart*m);
  }
  
  mul_r(V[0], 1./alpha[0].re, r0, N);
  
  for(j = 0; j < m; j++){
    /* solver_field[0]=A*v_j */

    /* Set h_ij and omega_j */
    /* solver_field[1] <- omega_j */    
    f(solver_field[1], V[j]);
/*     assign(solver_field[1], solver_field[0], N); */
    for(i = 0; i <= j; i++){
      H[i][j] = scalar_prod(V[i], solver_field[1], N, 1);
      /* G, work and work2 are in Fortran storage: columns first */
      G[j][i] = H[i][j];
      work2[j][i] = H[i][j];
      work[i][j].re = H[i][j].re;
      work[i][j].im = -H[i][j].im;
      assign_diff_mul(solver_field[1], V[i], H[i][j], N);
    }
    
    _complex_set(H[j+1][j], sqrt(square_norm(solver_field[1], N, 1)), 0.);
    G[j][j+1] = H[j+1][j];
    work2[j][j+1] = H[j+1][j];
    work[j+1][j].re =  H[j+1][j].re;
    work[j+1][j].im =  -H[j+1][j].im;
    beta2 = H[j+1][j].re*H[j+1][j].re; 
    for(i = 0; i < j; i++){
      tmp1 = H[i][j];
      tmp2 = H[i+1][j];
      _mult_real(H[i][j], tmp2, s[i]);
      _add_assign_complex_conj(H[i][j], c[i], tmp1);
      _mult_real(H[i+1][j], tmp1, s[i]);
      _diff_assign_complex(H[i+1][j], c[i], tmp2);
    }
    
    /* Set beta, s, c, alpha[j],[j+1] */
    beta = sqrt(_complex_square_norm(H[j][j]) + _complex_square_norm(H[j+1][j]));
    s[j] = H[j+1][j].re / beta;
    _mult_real(c[j], H[j][j], 1./beta);
    _complex_set(H[j][j], beta, 0.);
    _mult_real(alpha[j+1], alpha[j], s[j]);
    tmp1 = alpha[j];
    _mult_assign_complex_conj(alpha[j], c[j], tmp1);
    
    /* precision reached? */
    if(g_proc_id == g_stdio_proc && g_debug_level > 0){
      printf("%d\t%e residue\n", restart*m+j, alpha[j+1].re*alpha[j+1].re); 
      fflush(stdout);
    }
    if(((alpha[j+1].re <= eps) && (rel_prec == 0)) || ((alpha[j+1].re <= eps*norm) && (rel_prec == 1))){
      _mult_real(alpha[j], alpha[j], 1./H[j][j].re);
      assign_add_mul(x0, V[j], alpha[j], N);
      for(i = j-1; i >= 0; i--){
	for(k = i+1; k <= j; k++){
	  _mult_assign_complex(tmp1, H[i][k], alpha[k]); 
	  /* alpha[i] -= tmp1 */
	  _diff_complex(alpha[i], tmp1);
	}
	_mult_real(alpha[i], alpha[i], 1./H[i][i].re);
	assign_add_mul(x0, V[i], alpha[i], N);
      }
      for(i = 0; i < m; i++){
	alpha[i].im = 0.;
      }
      assign(P, x0, N);
      finalize_solver(solver_field, nr_sf);
      return(restart*m+j);
    }
    /* if not */
    else {
      mul_r(V[(j+1)], 1./H[j+1][j].re, solver_field[1], N); 
    }
    
  }
  j=m-1;
  /* prepare for restart */
  _mult_real(alpha[j], alpha[j], 1./H[j][j].re);
  assign_add_mul(x0, V[j], alpha[j], N);
  if(g_proc_id == 0 && g_debug_level > 3) {
    printf("alpha: %e %e\n", alpha[j].re, alpha[j].im);
  }
  for(i = j-1; i >= 0; i--){
    for(k = i+1; k <= j; k++){
      _mult_assign_complex(tmp1, H[i][k], alpha[k]);
      _diff_complex(alpha[i], tmp1);
    }
    _mult_real(alpha[i], alpha[i], 1./H[i][i].re);
    if(g_proc_id == 0 && g_debug_level > 3) {
      printf("alpha: %e %e\n", alpha[i].re, alpha[i].im);
    }
    assign_add_mul(x0, V[i], alpha[i], N);
  }

  /* This produces c=V_m+1*r0 */
  for(i = 0; i < mp1; i++) {
    c[i] = scalar_prod(V[i], r0, N, 1); 
    if(g_proc_id == 0 && g_debug_level > 3) {
      printf("c: %e %e err = %e\n", c[i].re, c[i].im, err);
    }
  }

  for(restart = 1; restart < max_restarts; restart++) {  

    /* compute c-\bar H \alpha */
    _FT(zgemv) ("N", &mp1, &_m, &cmone, G[0], &mp1, alpha, &one, &cpone, c, &one, 1);
    err = sqrt(short_scalar_prod(c, c, mp1).re);
    if(g_proc_id == 0 && g_debug_level > 0) {
      printf("%d\t %e short residue\n", m*restart, err*err);
    } 
    
    /* Compute new residual r0 */
    /* r_0=Q-AP  (b=Q, x+0=P) */
    if(g_debug_level > 0) {
      f(r0, x0);
      diff(r0, Q, r0, N);
      tmp1.im=sqrt(square_norm(r0, N, 1));
      if(g_proc_id == g_stdio_proc){
	printf("%d\t%e true residue\n", m*restart, tmp1.im*tmp1.im); 
	fflush(stdout);
      }
    }
    mul(r0, c[0], V[0], N);
    for(i = 1; i < mp1; i++) {
      assign_add_mul(r0, V[i], c[i], N);
    } 
    if(g_debug_level > 3) {
      tmp1.im=sqrt(square_norm(r0, N, 1));
      if(g_proc_id == g_stdio_proc){
	printf("%d\t%e residue\n", m*restart, tmp1.im*tmp1.im); 
	fflush(stdout);
      }
    }
    /* Stop if satisfied */
    if(err < eps){
      assign(P, x0, N);
      finalize_solver(solver_field, nr_sf);
      return(restart*m);
    }

    /* Prepare to compute harmonic Ritz pairs */
    for(i = 0; i < m-1; i++){
      alpha[i].re = 0.;
      alpha[i].im = 0.;
    }
    alpha[m-1].re = 1.;
    alpha[m-1].im = 0.;
    _FT(zgesv) (&_m, &one, work[0], &mp1, idx, alpha, &_m, &info); 
    for(i = 0; i < m; i++) {
      G[m-1][i].re += (beta2*alpha[idx[i]-1].re);
      G[m-1][i].im += (beta2*alpha[idx[i]-1].im);
    }
    if(g_proc_id == 0 && g_debug_level > 3){
      printf("zgesv returned info = %d, c[m-1]= %e, %e , idx[m-1]=%d\n", 
	     info, alpha[idx[m-1]-1].re, alpha[idx[m-1]-1].im, idx[m-1]);
    }
    /* c - \bar H * d -> c */
    /* G contains H + \beta^2 H^-He_n e_n^H */

    /* Compute harmonic Ritz pairs */
    diagonalise_general_matrix(m, G[0], mp1, alpha, evalues);
    for(i = 0; i < m; i++) {
      sortarray[i] = _complex_square_norm(evalues[i]);
      idx[i] = i;
    }
    quicksort(m, sortarray, idx);
    if(g_proc_id == g_stdio_proc && g_debug_level > 1) {
      for(i = 0; i < m; i++) {
	printf("# Evalues %d %e  %e \n", i, evalues[idx[i]].re, evalues[idx[i]].im);
      }
      fflush(stdout);
    }
    
    /* Copy the first nr_ev eigenvectors to work */
    for(i = 0; i < ne; i++) {
      for(l = 0; l < m; l++) {
	work[i][l] = G[idx[i]][l];
      }
    }
    /* Orthonormalize them */
    for(i = 0; i < ne; i++) {
      work[i][m].re = 0.;
      work[i][m].im = 0.;
      short_ModifiedGS(work[i], m, i, work[0], mp1); 
    }
    /* Orthonormalize c - \bar H d to work */
    short_ModifiedGS(c, m+1, ne, work[0], mp1);
    for(i = 0; i < mp1; i++) {
      work[nr_ev][i] = c[i];
    }
    /* Now compute \bar H = P^T_k+1 \bar H_m P_k */
    for(i = 0; i < mp1; i++) {
      for(l = 0; l < mp1; l++) {
	H[i][l].re = 0.;
	H[i][l].im = 0.;
      }
    }    

    _FT(zgemm) ("N", "N", &mp1, &ne, &_m, &cpone, work2[0], &mp1, work[0], &mp1, &czero, G[0], &mp1, 1, 1); 
    _FT(zgemm) ("C", "N", &np1, &ne , &mp1, &cpone, work[0], &mp1, G[0], &mp1, &czero, H[0], &mp1, 1, 1);

    if(g_debug_level > 3) {
      for(i = 0; i < ne+1; i++) {
	for(l = 0; l < ne+1; l++) {
	  if(g_proc_id == 0) {
	    printf("(g[%d], g[%d]) = %e, %e\n", i, l, short_scalar_prod(work[i], work[l], m+1).re, 
		   short_scalar_prod(work[i], work[l], m+1).im);
	    printf("(g[%d], g[%d]) = %e, %e\n", l, i, short_scalar_prod(work[l], work[i], m+1).re, 
		   short_scalar_prod(work[l], work[i], m+1).im);
	  }
	}
      }
    }
    /* V_k+1 = V_m+1 P_k+1 */
/*     _FT(zgemm) ("N", "N", &_N, &np1, &mp1, &cpone, (complex*)V[0], &V2, work[0], &mp1, &czero, (complex*)Z[0], &V2, 1, 1);  */
    for(l = 0; l < np1; l++) {
      mul(Z[l], work[l][0], V[0], N);
      for(i = 1; i < mp1; i++) {
	assign_add_mul(Z[l], V[i], work[l][i], N);
      }
    }
    /* copy back to V */
    for(i = 0; i < np1; i++) {
      assign(V[i], Z[i], N); 
    }
    /* Reorthogonalise v_nr_ev */
    ModifiedGS((complex*)V[nr_ev], _N, nr_ev, (complex*)V[0], V2);  
    if(g_debug_level > 3) {
      for(i = 0; i < np1; i++) {
	for(l = 0; l < np1; l++) {
	  tmp1 = scalar_prod(V[l], V[i], N, 1);
	  if(g_proc_id == 0) {
	    printf("(V[%d], V[%d]) = %e %e %d %d %d %d %d %d %e %e\n", l, i, tmp1.re, tmp1.im, np1, mp1, ne, _m, _N, V2, H[l][i].re, H[l][i].im);
	  }
	}
      }
    }
    /* Copy the content of H to work, work2 and G */
    for(i=0; i < mp1; i++) { 
      for(l = 0; l < mp1; l++) { 
 	G[i][l] = H[i][l];
	work2[i][l] = H[i][l];
	work[l][i].re = H[i][l].re;
	work[l][i].im = -H[i][l].im;
      }
    }

    for(j = ne; j < m; j++) {
      /* solver_field[0]=A*v_j */
      f(solver_field[1], V[j]);
      
      /* Set h_ij and omega_j */
      /* solver_field[1] <- omega_j */
/*       assign(solver_field[1], solver_field[0], N); */
      for(i = 0; i <= j; i++){
	H[j][i] = scalar_prod(V[i], solver_field[1], N, 1);  
	/* H, G, work and work2 are now all in Fortran storage: columns first */
	G[j][i] = H[j][i];
	work2[j][i] = H[j][i];
	work[i][j].re = H[j][i].re;
	work[i][j].im = -H[j][i].im;
	assign_diff_mul(solver_field[1], V[i], H[j][i], N);
      }
      beta2 = square_norm(solver_field[1], N, 1);
      _complex_set(H[j][j+1], sqrt(beta2), 0.);
      G[j][j+1] = H[j][j+1];
      work2[j][j+1] = H[j][j+1];
      work[j+1][j].re =  H[j][j+1].re;
      work[j+1][j].im =  -H[j][j+1].im;
      mul_r(V[(j+1)], 1./H[j][j+1].re, solver_field[1], N);
    }

    /* Solve the least square problem for alpha*/
    /* This produces c=V_m+1*r0 */
    for(i = 0; i < mp1; i++) {      
      c[i] = scalar_prod(V[i], r0, N, 1);  
      alpha[i] = c[i];
      if(g_proc_id == 0 && g_debug_level > 3) {
	printf("c: %e %e err = %e\n", c[i].re, c[i].im, err);
      }
    }
    if(lswork == NULL) {
      lwork = -1;
      _FT(zgels) ("N", &mp1, &_m, &one, H[0], &mp1, alpha, &mp1, &tmp1, &lwork, &info, 1);
      lwork = (int)tmp1.re;
      lswork = (complex*)malloc(lwork*sizeof(complex));
    }
    _FT(zgels) ("N", &mp1, &_m, &one, H[0], &mp1, alpha, &mp1, lswork, &lwork, &info, 1);
    if(g_proc_id == 0 && g_debug_level > 3) {
      printf("zgels returned info = %d\n", info);
      fflush(stdout);
    }
    /* Compute the new solution vector */
    for(i = 0; i < m; i++){
      if(g_proc_id == 0 && g_debug_level > 3) {
	printf("alpha: %e %e\n", alpha[i].re, alpha[i].im);
      }
      assign_add_mul(x0, V[i], alpha[i], N);
    }
  }


  /* If maximal number of restart is reached */
  assign(P, x0, N);
  finalize_solver(solver_field, nr_sf);
  return(-1);
}

Example #12

Show file

File: eigcg.c Project: Marcogarofalo/tmLQCD

void eigcg(int n, int lde, spinor * const x, spinor * const b, double *normb, 
           const double eps_sq, double restart_eps_sq, const int rel_prec, int maxit, int *iter, 
           double *reshist, int *flag, spinor **work, matrix_mult f, 
           int nev, int v_max, spinor *V, int esize, _Complex double *ework)
{
  double tolb;        
  double alpha, beta; /* CG scalars */
  double rho, rhoprev;
  double pAp;
  int it;   /* current iteration number */
  int i, j; /* loop variables */
  int zs,ds,tmpsize;
  spinor *r, *p, *Ap;   /* ptrs in work for CG vectors */
  _Complex double tempz;        /* double precision complex temp var */
  double tempd;         /* double temp var */
  int tempi;            /* int temp var */
  int ONE = 1;          /* var for passing 1 into BLAS routines */
  /*----------------------------------------------------------------------
         Eigen variables and setup    
    ----------------------------------------------------------------------*/
  /* Some constants */
  char cR = 'R'; char cL = 'L'; char cN ='N'; 
  char cV = 'V'; char cU = 'U'; char cC ='C';
  double betaprev, alphaprev;     /* remember the previous iterations scalars */
  int v_size;                     /* tracks the size of V */
  int lwork = 3*v_max;            /* the size of zwork */
  spinor *Ap_prev;
  void *_h;     
  _Complex double *H;         /* the V'AV projection matrix */
  void *_hevecs;
  _Complex double *Hevecs;    /* the eigenvectors of H */
  void *_hevecsold;
  _Complex double *Hevecsold; /* the eigenvectors of H(v_max-1,v_max-1) */
  void *_hevals;
  double    *Hevals;    /* the eigenvalues of H */
  void *_hevalsold;
  double    *Hevalsold; /* the eigenvalues of H(m-1,m-1) */
  void *_tau;
  _Complex double *TAU;	         
  void *_zwork;
  _Complex double *zwork;        /* double complex work array needed by zheev */
  void *_rwork;
  double *rwork;        /* double work array needed by zheev */

  int parallel;
  
  double tmpd;
  _Complex double tmpz;

  zs = sizeof(_Complex double);  
  ds = sizeof(double);

  int info, allelems = v_max*v_max;
  
#ifdef MPI
  parallel=1;
#else
  parallel=0;
#endif

  if(nev > 0)   /*allocate memory only if eigenvalues will be used */
  {
    #if (defined SSE || defined SSE2 || defined SSE3)
    if ((_h = calloc(v_max*v_max+ALIGN_BASE,zs)) == NULL)
    {
      if( g_proc_id == g_stdio_proc) 
      {fprintf(stderr,"ERROR Could not allocate H\n"); exit(1);}  
    }
    else
      H = (_Complex double *)(((unsigned long int)(_h)+ALIGN_BASE)&~ALIGN_BASE);
  
  
    if ((_hevecs = calloc(v_max*v_max+ALIGN_BASE,zs)) == NULL)
    {
      if( g_proc_id == g_stdio_proc ) 
      {fprintf(stderr, "ERROR Could not allocate Hevecs\n"); exit(1);}
    }else
      Hevecs = (_Complex double *)(((unsigned long int)(_hevecs)+ALIGN_BASE)&~ALIGN_BASE);
  
    if ((_hevecsold = calloc(v_max*v_max+ALIGN_BASE,zs)) == NULL)
    {
      if( g_proc_id == g_stdio_proc ) 
        {fprintf(stderr, "ERROR Could not allocate Hevecsold\n"); exit(1);}  
    }else
      Hevecsold = (_Complex double *)(((unsigned long int)(_hevecsold)+ALIGN_BASE)&~ALIGN_BASE);
  
    if ((_hevals = calloc(v_max+ALIGN_BASE,ds)) == NULL)
    {
      if( g_proc_id == g_stdio_proc) 
        {fprintf(stderr, "ERROR Could not allocate Hevals\n"); exit(1);}
    
    }else
      Hevals = (double *)(((unsigned long int)(_hevals)+ALIGN_BASE)&~ALIGN_BASE);
  
    if ((_hevalsold = calloc(v_max+ALIGN_BASE,ds)) == NULL) 
    {
      if( g_proc_id == g_stdio_proc)
        {fprintf(stderr, "ERROR Could not allocate Hevalsold\n"); exit(1); }
    
    }else
      Hevalsold = (double *)(((unsigned long int)(_hevalsold)+ALIGN_BASE)&~ALIGN_BASE);
  
    if ((_tau = calloc(2*nev+ALIGN_BASE,zs)) == NULL)  
    {
      if( g_proc_id == g_stdio_proc ) 
        {fprintf(stderr, "ERROR Could not allocate TAU\n"); exit(1); }
    
    }else
      TAU = (_Complex double *)(((unsigned long int)(_tau)+ALIGN_BASE)&~ALIGN_BASE);
  
    if ((_zwork = calloc(lwork+ALIGN_BASE,zs)) == NULL)   
    {
      if( g_proc_id == g_stdio_proc)
      {fprintf(stderr, "ERROR Could not allocate zwork\n"); exit(1);}
    
    }else
      zwork = (_Complex double *)(((unsigned long int)(_zwork)+ALIGN_BASE)&~ALIGN_BASE);
  
    if ((_rwork = calloc(3*v_max+ALIGN_BASE,ds)) == NULL) 
    {
      if( g_proc_id == g_stdio_proc)
        {fprintf(stderr, "ERROR Could not allocate rwork\n"); exit(1);}
    
    }else
      rwork = (double *)(((unsigned long int)(_rwork)+ALIGN_BASE)&~ALIGN_BASE);
  
    #else
  
    if ((H = (_Complex double *) calloc(v_max*v_max, zs)) == NULL)
    {
      if( g_proc_id == g_stdio_proc) 
        {fprintf(stderr, "ERROR Could not allocate H\n"); exit(1);}
    }

    if ((Hevecs = (_Complex double *) calloc(v_max*v_max, zs)) == NULL)
    {
      if( g_proc_id == g_stdio_proc ) 
        {fprintf(stderr, "ERROR Could not allocate Hevecs\n"); exit(1);}
    }

    if ((Hevecsold = (_Complex double *) calloc(v_max*v_max, zs)) == NULL)
    {
      if( g_proc_id == g_stdio_proc ) 
      {fprintf(stderr, "ERROR Could not allocate Hevecsold\n"); exit(1);}
    }

    if ((Hevals = (double *) calloc(v_max, ds)) == NULL)
    {
      if( g_proc_id == g_stdio_proc) 
        {fprintf(stderr, "ERROR Could not allocate Hevals\n"); exit(1);}
    }
     

    if ((Hevalsold = (double *) calloc(v_max, ds)) == NULL) 
    {
      if( g_proc_id == g_stdio_proc)
        {fprintf(stderr, "ERROR Could not allocate Hevalsold\n"); exit(1); }
    }


    if ((TAU = (_Complex double *) calloc(2*nev, zs)) == NULL)
    {
      if( g_proc_id == g_stdio_proc ) 
       {fprintf(stderr, "ERROR Could not allocate TAU\n"); exit(1); }
    
    }
  
  
    if ((zwork = (_Complex double *) calloc(lwork, zs)) == NULL) 
    {
      if( g_proc_id == g_stdio_proc)
      {fprintf(stderr, "ERROR Could not allocate zwork\n"); exit(1);}
    
    }
  
    if ((rwork = (double *) calloc(3*v_max, ds)) == NULL) 
    {
      if( g_proc_id == g_stdio_proc)
      {fprintf(stderr, "ERROR Could not allocate rwork\n"); exit(1);}
    
    }

    #endif 
  } /* end if (nev > 0) */  

  /*----------------------------------------------------------------------*/

  /* setup pointers into work */
  r = work[0];
  p = work[1];
  Ap = work[2];
  Ap_prev = work[3];
  


  /*--------------------------------------------------------------------
     Initialization phase 
    --------------------------------------------------------------------*/
  
  if (*flag != 3) 
  {
    
    /* If flag == 3, the eigCG is called after restart with the same b 
     * whose norm is already known in normb, so no need for these    */
    
    tempd = square_norm(b,n,parallel); /* Norm of rhs, b */
    *normb = sqrt(tempd);

    /* If right hand side is zero return zero solution. ITER stays the same */
    if (*normb == 0.0) 
    {
      for (i=0; i<n; i++) 
      {
	_vector_null(x[i].s0);
        _vector_null(x[i].s1);
        _vector_null(x[i].s2);
        _vector_null(x[i].s3);
      }       
    
      *flag = 0;		
      *reshist = 0.0;
      if( g_debug_level > 0 && g_proc_id == g_stdio_proc)
        displayInfo(eps_sq,maxit,*flag,*iter,*reshist);
      return;
     }
     
  }
  
  /* Set up for the method */
  *flag = 1;
  tolb = eps_sq * (*normb)*(*normb);	/* Relative to b tolerance */

  /* Zero-th residual: r = b - A*x  */
  f(r,x);
  diff(r,b,r,n);
  
  rho = 0.0;
  alpha = 1.0;
  beta = 0.0;
  v_size = 0;

  double reshist_init=square_norm(r,n,parallel);

  //if( g_proc_id == g_stdio_proc )
    //fprintf(stdout, "reshist init %f\n", reshist_init);
  
  /*--------------------------------------------------------------------
     main CG loop
    --------------------------------------------------------------------*/
  for (it = 0; it < maxit; it++) {
   
    rhoprev = rho;
    rho=square_norm(r,n,parallel);
    *reshist = rho;
    if ( (g_debug_level > 2) && (g_proc_id == g_stdio_proc) )
    { fprintf(stdout, " Linsys res( %d ): %g\n",*iter+it,*reshist); fflush(stdout); }

    /* Convergence test */
    if ( ( (*reshist < eps_sq) && (rel_prec==0) ) || ( (*reshist < eps_sq*(*normb)*(*normb)) && (rel_prec ==1 ) )   ) 
    { 
       *flag = 0;
       break;  /* break do not return */
    }
    
    /* Restart test */
    if(nev==0)
    {
       if (*reshist < (restart_eps_sq*reshist_init) ) 
       {  
           *flag = 3;
            break;  /* break do not return */
       }
    }

    if (it == 0)
      assign(p,r,n);
    else {
      betaprev = beta;
      beta = rho / rhoprev;
      if (beta == 0.0) {
	       *flag = 2;
	       break;
      }
      assign_mul_add_r(p,beta,r,n); /* p = beta*p + r */
    }

    /*----- eigCG specific code -------------------------------------------*/
    /* Remember Ap from previous iteration to be used at restart */
    if (nev > 0 && v_size == v_max)
      assign(Ap_prev,Ap,n); 
    /*---------------------------------------------------------------------*/

    f(Ap,p);

    /*----- eigCG specific code -------------------------------------------*/
    if (nev > 0) {
      /* record the diagonal vAv for the previous vector */
      if (it > 0) {
	H[(v_size-1)*v_max+v_size-1]= 1.0/alpha + betaprev/alphaprev;
	//H[(v_size-1)*v_max+v_size-1].im = 0.0;
      }
      
      /* Restarting V */
      if (v_size == v_max) {
	/* Solve (v_max) and (v_max-1) eigenproblems */
	tempi = v_max;
	allelems=v_max*v_max;
	_FT(zcopy)(&allelems, H, &ONE, Hevecs, &ONE);
	_FT(zheev)(&cV,&cU,&tempi,Hevecs,&v_max,Hevals,zwork,&lwork,rwork,&info,1,1);
	if( (info != 0 ) && (g_proc_id==g_stdio_proc))
	{fprintf(stderr, "Error: ZHEEV in eigcg at v_max step, info %d\n",info); exit(1);}
	
	tempi = v_max-1;
	_FT(zcopy)(&allelems, H, &ONE, Hevecsold, &ONE);
	_FT(zheev)(&cV,&cU,&tempi,Hevecsold,&v_max,Hevalsold,zwork,&lwork,rwork,&info,1,1);
	       
	if( (info != 0 ) && (g_proc_id==g_stdio_proc))
	{fprintf(stderr, "Error: ZHEEV in eigcg at (v_max-1) step, info %d\n",info); exit(1);}
	       
	
	/* fill 0s in vmax-th elem of oldevecs to match Hevecs */
	for(i=1; i <= v_max ; i++)
	{Hevecsold[i*v_max-1] = 0.0 ;}

	/* Attach the first nev oldevecs at the end of the nev latest ones */
	tempi = nev*v_max;
	_FT(zcopy)(&tempi,Hevecsold,&ONE,&Hevecs[tempi],&ONE);

        /* Orthogonalize the 2*nev (new+old) vectors Hevecs=QR */
	v_size = 2*nev; 
	_FT(zgeqrf)(&v_max,&v_size,Hevecs,&v_max,TAU,zwork,&lwork,&info) ;
 
	if( (info != 0 ) && (g_proc_id==g_stdio_proc))
	{fprintf(stderr, "Error: ZGEQRF in eigcg info %d\n",info); exit(1);}
	
	/* use as a temp space Hevecsold = Q^THQ */
	_FT(zcopy)(&allelems,H,&ONE,Hevecsold,&ONE); 
	_FT(zunmqr)(&cR,&cN,&v_max,&v_max,&v_size,Hevecs,&v_max,
		               TAU,Hevecsold,&v_max,zwork,&lwork,&info);
	
	if( (info != 0 ) && (g_proc_id==g_stdio_proc))
	{fprintf(stderr, "Error: ZGEQRF call 1 in eigcg info %d\n",info); exit(1);}
	
	_FT(zunmqr)(&cL,&cC,&v_max,&v_size,&v_size,Hevecs,&v_max,
		               TAU,Hevecsold,&v_max,zwork,&lwork,&info);
	
	if( (info != 0 ) && (g_proc_id==g_stdio_proc))
	{fprintf(stderr, "Error: ZGEQRF call 2 in eigcg info %d\n",info); exit(1);}

        /* solve the small Hevecsold v_size x v_size eigenproblem */
	_FT(zheev)(&cV,&cU,&v_size,Hevecsold,&v_max,Hevals, zwork,&lwork,rwork,&info,1,1);
	if( (info != 0 ) && (g_proc_id==g_stdio_proc))
	{fprintf(stderr, "Error: ZHEEV in eigcg info %d\n",info); exit(1);}



	/* zero out unused part of eigenectors in Hevecsold */
	tempi = 0;
	for(i = 0; i < v_size; i++ ) 
	{
	  for(j = v_size; j < v_max; j++)
	  {Hevecsold[tempi + j]=0.0;}
	  tempi += v_max;
	  
	}


	/* Compute the Hevecsold = Hevecs*Hevecsold */
	_FT(zunmqr)(&cL,&cN,&v_max,&v_size,&v_size,Hevecs,&v_max,
		               TAU,Hevecsold,&v_max,zwork,&lwork,&info);

	           
	if( (info != 0 ) && (g_proc_id==g_stdio_proc))
	{fprintf(stderr, "Error: ZUNMQR, info %d\n",info); exit(1);}   
	      
	  
	/* Restart V = V(n,v_max)*Hevecsold(v_max,v_size) */
	Zrestart_X((_Complex double *) V, 12*lde, Hevecsold, 12*n, v_max, v_size, ework, esize); 
	
	/* Restart H = diag(Hevals) plus a column and a row */
	for (i = 0; i < allelems; i++ )  {H[i] = 0.0; }
    	for (i = 0; i < v_size; i++) H[i*(v_max+1)]= Hevals[i];

	 
	  
        /* The next residual to be added (v = r/sqrt(rho)) 
     	 * needs the (nev+1)-th column and row, through V(:,1:vs)'*A*v. 
	 * Instead of a matvec, we use the Ap and Ap_prev to obtain this:
	 * V(:,1:vs)'*A*V(:,vs+1) = V(:,1:vs)'*A*r/sqrt(rho) = 
	 * V'(A(p-beta*p_prev))/sqrt(rho) = V'(Ap - beta*Ap_prev)/sqrt(rho)*/
	  
	tmpd=-beta;
	assign_mul_add_r(Ap_prev,tmpd,Ap,n);   /* Ap_prev=Ap-beta*Ap_prev */
	  
	tempi=v_size*v_max;
	for (i=0; i<v_size; i++){
	  tmpz=scalar_prod(&V[i*lde],Ap_prev,n,parallel);
	  H[v_size+i*v_max]=tmpz/sqrt(rho);
	  H[i+tempi]=conj(tmpz)/sqrt(rho);
	}
	
      } /* end of if v_size == v_max */
      else 
      {
	/* update (vs+1,vs),(vs,vs+1) elements of tridigonal which are real*/
        if ( it > 0) 
	{
	  H[(v_size-1)*v_max + v_size]= -sqrt(beta)/alpha;
	  H[v_size*v_max + v_size-1] = creal(H[(v_size-1)*v_max + v_size]);
	}
	
      } /* of else */
      /* Augment V with the current CG residual r normalized by sqrt(rho) */

      tmpd=1.0/sqrt(rho);
      mul_r(&V[v_size*lde],tmpd,r,n);
      v_size++;
    } /* end of if nev >0 , ie., the eigCG specific code */
    /*---------------------------------------------------------------------*/

    /* pAp = p' * Ap */
    tempz=scalar_prod(p,Ap,n,parallel);
    pAp = creal(tempz);
    if (pAp == 0.0) {
      *flag = 2;
      break;
    } 

    alphaprev = alpha;
    alpha = rho / pAp;
    
    assign_add_mul_r(x,p,alpha,n);  /*update x*/
    tmpd=-alpha;
    assign_add_mul_r(r,Ap,tmpd,n);   /*update r*/
    
    //next line useful for debugging
    //printf("%d beta, alpha, rho, pAp %le %le %le %le\n",it,beta,alpha,rho,pAp);
  } /* for it = 0 : maxit-1 */
  
  *iter = *iter + it+1; /* record the number of CG iterations plus any older */
  if( g_proc_id == g_stdio_proc && g_debug_level > 0)
    displayInfo(eps_sq,maxit,*flag,*iter-1,*reshist);

  
  if(nev > 0 )
  {
    #if (defined SSE || defined SSE2 || defined SSE3)
    H= NULL;
    free(_h);
    Hevecs=NULL;
    free(_hevecs);
    Hevecsold=NULL;
    free(_hevecsold);
    Hevals=NULL;
    free(_hevals);
    Hevalsold=NULL;
    free(_hevalsold);
    TAU=NULL;
    free(_tau);
    zwork=NULL;
    free(_zwork);
    rwork=NULL;
    free(_rwork);
    #else
    free(H);
    free(Hevecs);
    free(Hevecsold);
    free(Hevals);
    free(Hevalsold);
    free(TAU);
    free(zwork);
    free(rwork);
    #endif
  }

 return;
}

Example #13

Show file

File: prepare_source.c Project: Marcogarofalo/tmLQCD

void prepare_source(const int nstore, const int isample, const int ix, const int op_id, 
                    const int read_source_flag,
                    const int source_location) {

  FILE * ifs = NULL;
  int is = ix / 3, ic = ix %3, err = 0, rstat=0, t = 0;
  operator * optr = &operator_list[op_id];
  char source_filename[100];
  int source_type = SourceInfo.type;
  static int nstore_ = -1;
  static int isample_ = -1;
  static int ix_ = -1;
  static int op_id_ = -1;

  SourceInfo.nstore = nstore;
  SourceInfo.sample = isample;
  SourceInfo.ix = ix;

  if(optr->type != DBTMWILSON && optr->type != DBCLOVER && optr->type != BSM && optr->type != BSM2b && optr->type != BSM2m ) {
    SourceInfo.no_flavours = 1;
    /* no volume sources */
    if(source_type != 1) {
      /* either "Don't read inversion source from file" or                    */
      /* "Don't read inversion source from file, but save the one generated" */
      if (read_source_flag == 0 || read_source_flag == 2) {
        if (source_location == 0) {
          source_spinor_field(g_spinor_field[0], g_spinor_field[1], is, ic);
        }
        else {
          source_spinor_field_point_from_file(g_spinor_field[0], g_spinor_field[1], is, ic, source_location);
        }
      }
      /* "Read inversion source from file" */
      else {
        if (SourceInfo.splitted) {
	  /* timeslice needs to be put into filename */
	  if(SourceInfo.automaticTS) {
	    /* automatic timeslice detection */
	    if(g_proc_id == 0) {
	      for(t = 0; t < g_nproc_t*T; t++) {
		if(T_global > 99) sprintf(source_filename, "%s.%.4d.%.3d.%.2d", SourceInfo.basename, nstore, t, ix);
                else sprintf(source_filename, "%s.%.4d.%.2d.%.2d", SourceInfo.basename, nstore, t, ix);
		if( (ifs = fopen(source_filename, "r")) != NULL) {
		  fclose(ifs);
		  break;
		}
	      }
	    }
#ifdef MPI
	    MPI_Bcast(&t, 1, MPI_INT, 0, MPI_COMM_WORLD);
#endif
	    SourceInfo.t = t;
	  }
          if(T_global > 99) sprintf(source_filename, "%s.%.4d.%.3d.%.2d", SourceInfo.basename, nstore, SourceInfo.t, ix);
          else sprintf(source_filename, "%s.%.4d.%.2d.%.2d", SourceInfo.basename, nstore, SourceInfo.t, ix);
          if (g_cart_id == 0) {
            printf("# Trying to read source from %s\n", source_filename);
          }
          rstat = read_spinor(g_spinor_field[0], g_spinor_field[1], source_filename, 0);
        }
        else {
          sprintf(source_filename, "%s", SourceInfo.basename);
          if (g_cart_id == 0) {
            printf("# Trying to read source no %d from %s\n", ix, source_filename);
          }
          rstat = read_spinor(g_spinor_field[0], g_spinor_field[1], source_filename, ix);
        }
        if(rstat) {
          fprintf(stderr, "Error reading file %s in prepare_source.c\nUnable to proceed, aborting....\n", source_filename);
          exit(-1);
        }
      }
      if (PropInfo.splitted) {
        if(T_global > 99) sprintf(source_filename, "%s.%.4d.%.3d.%.2d.inverted", PropInfo.basename, nstore, SourceInfo.t, ix);
        else sprintf(source_filename, "%s.%.4d.%.2d.%.2d.inverted", PropInfo.basename, nstore, SourceInfo.t, ix);
      }
      else {
        if(T_global > 99) sprintf(source_filename, "%s.%.4d.%.3d.inverted", PropInfo.basename, nstore, SourceInfo.t);
        else sprintf(source_filename, "%s.%.4d.%.2d.inverted", PropInfo.basename, nstore, SourceInfo.t);
      }
    }
    else if(source_type == 1) {
      /* Volume sources */
      if(read_source_flag == 0 || read_source_flag == 2) {
        if(g_proc_id == 0 && g_debug_level > 0) {
          printf("# Preparing 1 flavour volume source\n");
        }
        gaussian_volume_source(g_spinor_field[0], g_spinor_field[1], isample, nstore, 0);
      }
      else {
        sprintf(source_filename, "%s.%.4d.%.5d", SourceInfo.basename, nstore, isample);
        if (g_cart_id == 0) {
          printf("# Trying to read source from %s\n", source_filename);
        }
        rstat = read_spinor(g_spinor_field[0], g_spinor_field[1], source_filename, 0);
        if(rstat) {
          fprintf(stderr, "Error reading file %s in prepare_source.c.\nUnable to proceed, aborting....\n", source_filename);
          exit(-1);
        }
      }
      sprintf(source_filename, "%s.%.4d.%.5d.inverted", PropInfo.basename, nstore, isample);
    }
    optr->sr0 = g_spinor_field[0];
    optr->sr1 = g_spinor_field[1];
    optr->prop0 = g_spinor_field[2];
    optr->prop1 = g_spinor_field[3];


    /* If the solver is _not_ CG we might read in */
    /* here some better guess                     */
    /* This also works for re-iteration           */
    if (optr->solver != CG && optr->solver != PCG && optr->solver != MIXEDCG && optr->solver != RGMIXEDCG) {
      ifs = fopen(source_filename, "r");
      if (ifs != NULL) {
        if (g_cart_id == 0) {
          printf("# Trying to read guess from file %s\n", source_filename);
          fflush(stdout);
        }
        fclose(ifs);
        err = 0;
        /* iter = get_propagator_type(source_filename); */
        rstat = read_spinor(optr->prop0, optr->prop1, source_filename, (PropInfo.splitted ? 0 : ix));
        if(rstat) {
          fprintf(stderr, "Error reading file %s in prepare_source.c, rstat = %d\n", source_filename, rstat);
          exit(-1);
        }
        if (g_kappa != 0.) {
          mul_r(optr->prop1, 1. / (2*optr->kappa), optr->prop1, VOLUME / 2);
          mul_r(optr->prop0, 1. / (2*optr->kappa), optr->prop0, VOLUME / 2);
        }

        if (err != 0) {
          zero_spinor_field(optr->prop0, VOLUME / 2);
          zero_spinor_field(optr->prop1, VOLUME / 2);
        }
      }
      else {
        zero_spinor_field(optr->prop0, VOLUME / 2);
        zero_spinor_field(optr->prop1, VOLUME / 2);
      }
    }
    else {
      zero_spinor_field(optr->prop0, VOLUME / 2);
      zero_spinor_field(optr->prop1, VOLUME / 2);
    }
    /*     if(optr->even_odd_flag) { */
    /*       assign(optr->sr0, g_spinor_field[0], VOLUME/2); */
    /*       assign(optr->sr1, g_spinor_field[1], VOLUME/2); */
    /*     } */
    /*     else { */
    /*       convert_eo_to_lexic(optr->sr0, g_spinor_field[0], g_spinor_field[1]); */
    /*     } */
  }
  else { /* for the ND 2 flavour twisted operator and BSM(2) */
    SourceInfo.no_flavours = 2;
    zero_spinor_field(g_spinor_field[0], VOLUME/2);
    zero_spinor_field(g_spinor_field[1], VOLUME/2);
    if(source_type != 1) {
      if(read_source_flag == 0 || read_source_flag == 2) {
        if(source_location == 0) {
          source_spinor_field(g_spinor_field[2], g_spinor_field[3], is, ic);
        }
        else {
          source_spinor_field_point_from_file(g_spinor_field[2], g_spinor_field[3], 
					      is, ic, source_location);
        }
      }
      else {
        if(SourceInfo.splitted) {
          if(T_global > 99) sprintf(source_filename, "%s.%.4d.%.3d.%.2d", SourceInfo.basename, nstore, SourceInfo.t, ix);
          else sprintf(source_filename, "%s.%.4d.%.2d.%.2d", SourceInfo.basename, nstore, SourceInfo.t, ix);
        }
        else {
          sprintf(source_filename,"%s", SourceInfo.basename);
        }
        if(g_proc_id == 0) {
          printf("# Trying to read source from %s\n", source_filename);
        }
        if(read_spinor(g_spinor_field[2], g_spinor_field[3], source_filename, 0) != 0) {
          fprintf(stderr, "Error reading source! Aborting...\n");
#ifdef MPI
          MPI_Abort(MPI_COMM_WORLD, 1);
          MPI_Finalize();
#endif
          exit(-1);
        }
      }
    }
    else if(source_type == 1) {
      /* Volume sources */
      if(g_proc_id == 0 && g_debug_level > 0) {
        printf("# Preparing 2 flavour volume source\n");
      }
      gaussian_volume_source(g_spinor_field[0], g_spinor_field[1],
                             isample, nstore, 1);
      gaussian_volume_source(g_spinor_field[2], g_spinor_field[3],
                             isample, nstore, 2);
    }
    if( optr->type != BSM && optr->type != BSM2b && optr->type != BSM2m ) {
      mul_one_pm_itau2(g_spinor_field[4], g_spinor_field[6], g_spinor_field[0], g_spinor_field[2], +1., VOLUME/2);
      mul_one_pm_itau2(g_spinor_field[5], g_spinor_field[7], g_spinor_field[1], g_spinor_field[3], +1., VOLUME/2);
      assign(g_spinor_field[0], g_spinor_field[4], VOLUME/2);
      assign(g_spinor_field[1], g_spinor_field[5], VOLUME/2);
      assign(g_spinor_field[2], g_spinor_field[6], VOLUME/2);
      assign(g_spinor_field[3], g_spinor_field[7], VOLUME/2);
    }
    
    optr->sr0 = g_spinor_field[0];
    optr->sr1 = g_spinor_field[1];
    optr->sr2 = g_spinor_field[2];
    optr->sr3 = g_spinor_field[3];
    optr->prop0 = g_spinor_field[4];
    optr->prop1 = g_spinor_field[5];
    optr->prop2 = g_spinor_field[6];
    optr->prop3 = g_spinor_field[7];
  }
  nstore_ = nstore;
  isample_ = isample;
  ix_ = ix;
  op_id_ = op_id;
  return;
}

Example #14

Show file

File: gmres.c Project: annube/tmLQCD

int gmres(spinor * const P,spinor * const Q, 
	  const int m, const int max_restarts,
	  const double eps_sq, const int rel_prec,
	  const int N, const int parallel, matrix_mult f){

  int restart, i, j, k;
  double beta, eps, norm;
  complex tmp1, tmp2;
  spinor ** solver_field = NULL;
  const int nr_sf = 3;

  if(N == VOLUME) {
    init_solver_field(&solver_field, VOLUMEPLUSRAND, nr_sf);
  }
  else {
    init_solver_field(&solver_field, VOLUMEPLUSRAND/2, nr_sf);
  }

  eps=sqrt(eps_sq);
  init_gmres(m, VOLUMEPLUSRAND);

  norm = sqrt(square_norm(Q, N, parallel));

  assign(solver_field[2], P, N);
  for(restart = 0; restart < max_restarts; restart++){
    /* r_0=Q-AP  (b=Q, x+0=P) */
    f(solver_field[0], solver_field[2]);
    diff(solver_field[0], Q, solver_field[0], N);

    /* v_0=r_0/||r_0|| */
    alpha[0].re=sqrt(square_norm(solver_field[0], N, parallel));

    if(g_proc_id == g_stdio_proc && g_debug_level > 1){
      printf("%d\t%g true residue\n", restart*m, alpha[0].re*alpha[0].re); 
      fflush(stdout);
    }

    if(alpha[0].re==0.){
      assign(P, solver_field[2], N);
      finalize_solver(solver_field, nr_sf);
      return(restart*m);
    }

    mul_r(V[0], 1./alpha[0].re, solver_field[0], N);

    for(j = 0; j < m; j++){
      /* solver_field[0]=A*v_j */

      f(solver_field[0], V[j]);

      /* Set h_ij and omega_j */
      /* solver_field[1] <- omega_j */
      assign(solver_field[1], solver_field[0], N);
      for(i = 0; i <= j; i++){
	H[i][j] = scalar_prod(V[i], solver_field[1], N, parallel);
	assign_diff_mul(solver_field[1], V[i], H[i][j], N);
      }

      _complex_set(H[j+1][j], sqrt(square_norm(solver_field[1], N, parallel)), 0.);
      for(i = 0; i < j; i++){
	tmp1 = H[i][j];
	tmp2 = H[i+1][j];
	_mult_real(H[i][j], tmp2, s[i]);
	_add_assign_complex_conj(H[i][j], c[i], tmp1);
	_mult_real(H[i+1][j], tmp1, s[i]);
	_diff_assign_complex(H[i+1][j], c[i], tmp2);
      }

      /* Set beta, s, c, alpha[j],[j+1] */
      beta = sqrt(_complex_square_norm(H[j][j]) + _complex_square_norm(H[j+1][j]));
      s[j] = H[j+1][j].re / beta;
      _mult_real(c[j], H[j][j], 1./beta);
      _complex_set(H[j][j], beta, 0.);
      _mult_real(alpha[j+1], alpha[j], s[j]);
      tmp1 = alpha[j];
      _mult_assign_complex_conj(alpha[j], c[j], tmp1);

      /* precision reached? */
      if(g_proc_id == g_stdio_proc && g_debug_level > 1){
	printf("%d\t%g residue\n", restart*m+j, alpha[j+1].re*alpha[j+1].re); 
	fflush(stdout);
      }
      if(((alpha[j+1].re <= eps) && (rel_prec == 0)) || ((alpha[j+1].re <= eps*norm) && (rel_prec == 1))){
	_mult_real(alpha[j], alpha[j], 1./H[j][j].re);
	assign_add_mul(solver_field[2], V[j], alpha[j], N);
	for(i = j-1; i >= 0; i--){
	  for(k = i+1; k <= j; k++){
 	    _mult_assign_complex(tmp1, H[i][k], alpha[k]); 
	    _diff_complex(alpha[i], tmp1);
	  }
	  _mult_real(alpha[i], alpha[i], 1./H[i][i].re);
	  assign_add_mul(solver_field[2], V[i], alpha[i], N);
	}
	for(i = 0; i < m; i++){
	  alpha[i].im = 0.;
	}
	assign(P, solver_field[2], N);
	finalize_solver(solver_field, nr_sf);
	return(restart*m+j);
      }
      /* if not */
      else{
	if(j != m-1){
	  mul_r(V[(j+1)], 1./H[j+1][j].re, solver_field[1], N);
	}
      }

    }
    j=m-1;
    /* prepare for restart */
    _mult_real(alpha[j], alpha[j], 1./H[j][j].re);
    assign_add_mul(solver_field[2], V[j], alpha[j], N);
    for(i = j-1; i >= 0; i--){
      for(k = i+1; k <= j; k++){
	_mult_assign_complex(tmp1, H[i][k], alpha[k]);
	_diff_complex(alpha[i], tmp1);
      }
      _mult_real(alpha[i], alpha[i], 1./H[i][i].re);
      assign_add_mul(solver_field[2], V[i], alpha[i], N);
    }
    for(i = 0; i < m; i++){
      alpha[i].im = 0.;
    }
  }

  /* If maximal number of restarts is reached */
  assign(P, solver_field[2], N);
  finalize_solver(solver_field, nr_sf);
  return(-1);
}

Example #15

Show file

File: poly_precon.c Project: palao/tmLQCD

void poly_nonherm_precon(spinor * const R, spinor * const S, 
			 const double e, const double d, const int n, const int N) {
  int j;
  double a1, a2, dtmp;
  static spinor *work, *work_;
  static int initpnH = 0;
  spinor * psi, * chi, *tmp0, *tmp1, *cptmp;

  
  if(initpnH == 0) {
    work_  = calloc(4*VOLUMEPLUSRAND+1, sizeof(spinor));
#if (defined SSE || defined SSE2 || defined SSE3)
    work   = (spinor *)(((unsigned long int)(work_)+ALIGN_BASE)&~ALIGN_BASE);
#else 
    work = work_;
#endif
    initpnH = 1;
  }
  psi = work;
  chi = &work[VOLUMEPLUSRAND];
  tmp0 = &work[2*VOLUMEPLUSRAND];
  tmp1 = &work[3*VOLUMEPLUSRAND];

  /* signs to be clarified!! */
  /* P_0 * S */
  mul_r(psi, 1./d, S, N);
  /* P_1 * S = a_1(1+kappa*H) * S */
  a1 = d/(d*d-e*e/2.);
  boundary(g_kappa/d);
  dtmp = g_mu;
  g_mu = g_mu/d;
  D_psi(chi, S);
  mul_r(chi, a1, chi, N);
  boundary(g_kappa);
  g_mu = dtmp;
/*   boundary(-g_kappa); */
/*   g_mu = -g_mu; */
/*   D_psi(aux, chi); */
/*   diff(aux, aux, S, N); */
/*   dtmp = square_norm(aux, N, 1); */
/*   printf("1 %1.3e\n", dtmp); */
/*   boundary(-g_kappa); */
/*   g_mu = -g_mu; */

/*   assign(chi, d, N); */
  for(j = 2; j < n+1; j++) {
    /* a_n */
    a2 = 1./(d-a1*e*e/4.);
    /* 1-a_n */
    a1 = 1.-d*a2;
    /* aux = a_n*S + (1-a_n) psi */
    mul_add_mul_r(tmp0, S, psi, a2, a1, N);
    /* sv = kappa H chi = (D_psi(-kappa, -2kappamu) - 1) chi */
    D_psi(tmp1, chi);
    /* why is the following sign like this? */
    diff(tmp1, chi, tmp1, N);
    /* psi = aux + a_n * sv */
    mul_add_mul_r(psi, tmp0, tmp1, 1., a2, N);
    cptmp = psi;
    psi = chi;
    chi = cptmp;

/*     boundary(-g_kappa); */
/*     g_mu = -g_mu; */
    if(g_debug_level>4) {
      D_psi(tmp0, chi);
      diff(tmp0, tmp0, S, N);
      dtmp = square_norm(tmp0, N, 1);
      if(g_proc_id == 0) printf("poly %d %1.3e\n", j, dtmp);
    }
/*     boundary(-g_kappa); */
/*     g_mu = -g_mu; */
    a1 = a2;
  }
  assign(R, chi, N);
  boundary(g_kappa);
  g_mu = dtmp;

 
  return;
}