int cgne4complex(_Complex double * const P, _Complex double * const Q, const int max_iter, const double eps_sq, const int rel_prec, const int N, const int lda, c_matrix_mult f) { double normsq, pro, err, alpha_cg, beta_cg, squarenorm; _Complex double *w_f[3], * _w_f, *stmp; double atime, etime; int iter; _w_f = (_Complex double *)malloc(3*lda*sizeof(_Complex double)); w_f[0] = _w_f; w_f[1] = _w_f+lda; w_f[2] = _w_f+2*lda; /* initialize residue r and search vector p */ atime = gettime(); squarenorm = lsquare_norm(Q, N, 1); f(w_f[0], P); ldiff(w_f[1], Q, w_f[0], N); memcpy(w_f[2], w_f[1], N*sizeof(_Complex double)); normsq=lsquare_norm(w_f[1], N, 1); /* main loop */ for(iter = 1; iter <= max_iter; iter++) { f(w_f[0], w_f[2]); pro = lscalar_prod_r(w_f[2], w_f[0], N, 1); alpha_cg = normsq / pro; lassign_add_mul_r(P, w_f[2], alpha_cg, N); lassign_mul_add_r(w_f[0], -alpha_cg, w_f[1], N); err = lsquare_norm(w_f[0], N, 1); if(g_proc_id == g_stdio_proc && g_debug_level > 2) { printf("lCG: iterations: %d res^2 %e\n", iter, err); fflush(stdout); } if (((err <= eps_sq) && (rel_prec == 0)) || ((err <= eps_sq*squarenorm) && (rel_prec == 1))) { break; } beta_cg = err / normsq; lassign_mul_add_r(w_f[2], beta_cg, w_f[0], N); stmp = w_f[0]; w_f[0] = w_f[1]; w_f[1] = stmp; normsq = err; } etime = gettime(); if(g_debug_level > 0 && g_proc_id == 0) { printf("# lCG: iter: %d eps_sq: %1.4e t/s: %1.4e\n", iter, eps_sq, etime-atime); } free(_w_f); if(iter > max_iter) return(-1); return(iter); }
int gcr4complex(complex * const P, complex * const Q, const int m, const int max_restarts, const double eps_sq, const int rel_prec, const int N, const int parallel, const int lda, c_matrix_mult f) { int k, l, restart, i, p=0; double norm_sq, err; complex ctmp; init_lgcr(m, lda); norm_sq = lsquare_norm(Q, N, parallel); if(norm_sq < 1.e-20) { norm_sq = 1.; } for(restart = 0; restart < max_restarts; restart++) { f(tmp, P); ldiff(rho, Q, tmp, N); err = lsquare_norm(rho, N, parallel); if(g_proc_id == g_stdio_proc && g_debug_level > 1){/*CT: was "g_debug_level > 0" */ printf("lGCR: %d\t%g true residue %1.3e\n", restart * m, err, norm_sq); fflush(stdout); } if(((err <= eps_sq) && (rel_prec == 0)) || ((err <= eps_sq * norm_sq) && (rel_prec == 1))) { if(g_proc_id == 0 && g_debug_level > 1) printf("lgcr: %d %e %e %e %e\n", p, err, norm_sq, err/norm_sq, eps_sq); return (p); } for(k = 0; ; k++) { memcpy(xi[k], rho, N*sizeof(complex)); /* here we could put in a preconditioner */ f(tmp, xi[k]); /* tmp will become chi[k] */ for(l = 0; l < k; l++) { a[l][k] = lscalar_prod(chi[l], tmp, N, parallel); lassign_diff_mul(tmp, chi[l], a[l][k], N); } b[k] = sqrt(lsquare_norm(tmp, N, parallel)); lmul_r(chi[k], 1./b[k], tmp, N); c[k] = lscalar_prod(chi[k], rho, N, parallel); lassign_diff_mul(rho, chi[k], c[k], N); err = lsquare_norm(rho, N, parallel); if(g_proc_id == g_stdio_proc && g_debug_level > 1){ printf("lGCR: %d\t%g iterated residue\n", restart*m+k, err); fflush(stdout); } p++; /* Precision reached? */ if((k == m-1) || ((err <= eps_sq) && (rel_prec == 0)) || ((err <= eps_sq*norm_sq) && (rel_prec == 1))) { break; } } /* prepare for restart */ _mult_real(c[k], c[k], 1./b[k]); lassign_add_mul(P, xi[k], c[k], N); for(l = k-1; l >= 0; l--) { for(i = l+1; i <= k; i++) { _mult_assign_complex(ctmp, a[l][i], c[i]); /* c[l] -= ctmp */ _diff_complex(c[l], ctmp); } _mult_real(c[l], c[l], 1./b[l]); lassign_add_mul(P, xi[l], c[l], N); } } if(g_proc_id == 0 && g_debug_level > 1) printf("lgcr: for -1 %d %e %e %e %e\n", p, err, norm_sq, err/norm_sq, eps_sq); return(-1); }