コード例 #1
0
ファイル: cgne4complex.c プロジェクト: Finkenrath/tmLQCD
int cgne4complex(_Complex double * const P, _Complex double * const Q, 
		 const int max_iter, const double eps_sq, const int rel_prec,
		 const int N, const int lda, c_matrix_mult f) {
  
  double normsq, pro, err, alpha_cg, beta_cg, squarenorm;
  _Complex double *w_f[3], * _w_f, *stmp;
  double atime, etime;
  int iter;
  
  _w_f = (_Complex double *)malloc(3*lda*sizeof(_Complex double));
  w_f[0] = _w_f; w_f[1] = _w_f+lda; w_f[2] = _w_f+2*lda;
  
    /* initialize residue r and search vector p */
  atime = gettime();
  squarenorm = lsquare_norm(Q, N, 1);

  f(w_f[0], P);  

  ldiff(w_f[1], Q, w_f[0], N);
  memcpy(w_f[2], w_f[1], N*sizeof(_Complex double));
  normsq=lsquare_norm(w_f[1], N, 1);

  /* main loop */
  for(iter = 1; iter <= max_iter; iter++) {
    f(w_f[0], w_f[2]);
    pro = lscalar_prod_r(w_f[2], w_f[0], N, 1);
    alpha_cg = normsq / pro;
    lassign_add_mul_r(P, w_f[2], alpha_cg, N);

    lassign_mul_add_r(w_f[0], -alpha_cg, w_f[1], N);
    err = lsquare_norm(w_f[0], N, 1);
    if(g_proc_id == g_stdio_proc && g_debug_level > 2) {
      printf("lCG: iterations: %d res^2 %e\n", iter, err);
      fflush(stdout);
    }

    if (((err <= eps_sq) && (rel_prec == 0)) || ((err <= eps_sq*squarenorm) && (rel_prec == 1))) {
      break;
    }

    beta_cg = err / normsq;
    lassign_mul_add_r(w_f[2], beta_cg, w_f[0], N);
    stmp = w_f[0];
    w_f[0] = w_f[1];
    w_f[1] = stmp;
    normsq = err;
  }
  etime = gettime();
  if(g_debug_level > 0 && g_proc_id == 0) {
    printf("# lCG: iter: %d eps_sq: %1.4e t/s: %1.4e\n", iter, eps_sq, etime-atime); 
  }
  free(_w_f);
  if(iter > max_iter) return(-1);
  return(iter);

}
コード例 #2
0
ファイル: gcr4complex.c プロジェクト: annube/tmLQCD
int gcr4complex(complex * const P, complex * const Q, 
		const int m, const int max_restarts,
		const double eps_sq, const int rel_prec,
		const int N, const int parallel, 
		const int lda, c_matrix_mult f) {
  
  int k, l, restart, i, p=0;
  double norm_sq, err;
  complex ctmp;

  init_lgcr(m, lda);

  norm_sq = lsquare_norm(Q, N, parallel);
  if(norm_sq < 1.e-20) {
    norm_sq = 1.;
  }
  for(restart = 0; restart < max_restarts; restart++) {
    f(tmp, P);
    ldiff(rho, Q, tmp, N);
    err = lsquare_norm(rho, N, parallel);
    if(g_proc_id == g_stdio_proc && g_debug_level > 1){/*CT: was "g_debug_level > 0" */
      printf("lGCR: %d\t%g true residue %1.3e\n", restart * m, err, norm_sq); 
      fflush(stdout);
    }
    if(((err <= eps_sq) && (rel_prec == 0)) || ((err <= eps_sq * norm_sq) && (rel_prec == 1))) {
      if(g_proc_id == 0 && g_debug_level > 1) printf("lgcr: %d %e %e %e %e\n", p, err, norm_sq, err/norm_sq, eps_sq);
      return (p);
    }
    for(k = 0; ; k++) {
      memcpy(xi[k], rho, N*sizeof(complex));
      /* here we could put in a preconditioner */
      f(tmp, xi[k]); 
      /* tmp will become chi[k] */
      for(l = 0; l < k; l++) {
        a[l][k] = lscalar_prod(chi[l], tmp, N, parallel);
        lassign_diff_mul(tmp, chi[l], a[l][k], N);
      }
      b[k] = sqrt(lsquare_norm(tmp, N, parallel));
      lmul_r(chi[k], 1./b[k], tmp, N);
      c[k] = lscalar_prod(chi[k], rho, N, parallel);
      lassign_diff_mul(rho, chi[k], c[k], N);
      err = lsquare_norm(rho, N, parallel);
      if(g_proc_id == g_stdio_proc && g_debug_level > 1){
        printf("lGCR: %d\t%g iterated residue\n", restart*m+k, err); 
        fflush(stdout);
      }
      p++;
      /* Precision reached? */
      if((k == m-1) || ((err <= eps_sq) && (rel_prec == 0)) || ((err <= eps_sq*norm_sq) && (rel_prec == 1))) {
	break;
      }
    }
    /* prepare for restart */
    _mult_real(c[k], c[k], 1./b[k]);
    lassign_add_mul(P, xi[k], c[k], N);
    for(l = k-1; l >= 0; l--) {
      for(i = l+1; i <= k; i++) {
        _mult_assign_complex(ctmp, a[l][i], c[i]);
        /* c[l] -= ctmp */
        _diff_complex(c[l], ctmp);
      }
      _mult_real(c[l], c[l], 1./b[l]);
      lassign_add_mul(P, xi[l], c[l], N);
    }
  }
  if(g_proc_id == 0 && g_debug_level > 1) printf("lgcr: for -1 %d %e %e %e %e\n", p, err, norm_sq, err/norm_sq, eps_sq);
  return(-1);
}