int cr(spinor * const P, spinor * const Q, const int m, const int max_restarts, const double eps_sq, const int rel_prec, const int N, const int precon, matrix_mult f) { int k, l, restart, i, iter = 0; double norm_sq, err; spinor * xi, * Axi, * chi, * Achi, *tmp; _Complex double alpha, beta; static _Complex double one = 1.0; double norm, rAr, newrAr; double atime, etime; spinor ** solver_field = NULL; const int nr_sf = 5; int save_sloppy = g_sloppy_precision; if(N == VOLUME) { init_solver_field(&solver_field, VOLUMEPLUSRAND, nr_sf); } else { init_solver_field(&solver_field, VOLUMEPLUSRAND/2, nr_sf); } atime = gettime(); xi = solver_field[0]; Axi = solver_field[1]; chi = solver_field[2]; Achi = solver_field[3]; tmp = solver_field[4]; norm_sq = square_norm(Q, N, 1); if(norm_sq < 1.e-32) { norm_sq = 1.; } dfl_sloppy_prec = 0; f(tmp, P); diff(chi, Q, tmp, N); assign(xi, chi, N); f(Axi, xi); f(Achi, chi); rAr = scalar_prod(chi, Achi, N, 1); err = square_norm(chi, N, 1); if(((err <= eps_sq) && (rel_prec == 0)) || ((err <= eps_sq*norm_sq) && (rel_prec == 1))) { finalize_solver(solver_field, nr_sf); return(iter); } for(k = 0; k < m; k++) { dfl_sloppy_prec = 1; norm = square_norm(Axi, N, 1); alpha = rAr/norm; assign_add_mul(P, xi, alpha, N); /* get the new residual */ assign_diff_mul(chi, Axi, alpha, N); err = square_norm(chi, N, 1); iter ++; etime = gettime(); if(g_proc_id == g_stdio_proc && g_debug_level > 3){ printf("# CR: %d\t%g iterated residue, time spent %f s\n", iter, err, (etime - atime)); fflush(stdout); } /* Precision reached? */ if((k == m-1) || ((err <= eps_sq) && (rel_prec == 0)) || ((err <= eps_sq*norm_sq) && (rel_prec == 1))) { break; } #ifdef _USE_HALFSPINOR if(((err*err <= eps_sq) && (rel_prec == 0)) || ((err*err <= eps_sq*norm_sq) && (rel_prec == 1))) { if (g_sloppy_precision_flag == 1) { g_sloppy_precision = 1; if(g_debug_level > 2 && g_proc_id == g_stdio_proc) { printf("sloppy precision on\n"); fflush( stdout); } } } #endif f(Achi, chi); newrAr = scalar_prod(chi, Achi, N, 1); beta = newrAr/rAr; assign_mul_add_mul(xi, beta, chi, one, N); assign_mul_add_mul(Axi,beta, Achi, one, N); rAr = newrAr; } g_sloppy_precision = save_sloppy; finalize_solver(solver_field, nr_sf); return(-1); }
int update() //Basic HMC update step { double squnrm; int i, acc; double exphdiff; /* the new impulses and the 'generator' of the arbitrary pseudofield */ /* calculate the hamiltonian of this state: new impulses + action */ /* g_X is ab-used a bit - here it is \xi = (gamma5 D)^{-1} \phi */ ham_old = s_g_old; for(i=0; i<GRIDPOINTS; i++) { gp1[i] = gauss(); gp2[i] = gauss(); ham_old += 0.5*(gp1[i]*gp1[i] + gp2[i]*gp2[i]); } /* Now create the field and calculate its contributions to the action (end of the 'misuse') */ /* squnrm is the fermion part of the action : */ /* S = R^dagger * R = g_fermion^dag * D^{-1 dag} * D^{-1} * g_fermion = g_fermion Q^-1 g_fermion */ /* PF1 det(1/(Q^2 + mu^2)) */ for(i=0; i<GRIDPOINTS; i++) { g_X[i].s1 = (gauss() + I*gauss())/sqrt(2); //Gaussian fields R g_X[i].s2 = (gauss() + I*gauss())/sqrt(2); } squnrm = square_norm(g_X); // step iv): g_fermion = \phi = K^dag * g_X = K^dag * \xi gam5D_wilson(g_fermion, g_X); assign_diff_mul(g_fermion, g_X, 0.+I*sqrt(g_musqr)); ham_old += squnrm; /* PF2 det((Q^2 + mu^2)/Q^2) */ if(no_timescales > 2) { for(i=0; i<GRIDPOINTS; i++) { g_X[i].s1 = (gauss() + I*gauss())/sqrt(2); //Gaussian fields R g_X[i].s2 = (gauss() + I*gauss())/sqrt(2); } squnrm = square_norm(g_X); cg(g_fermion2, g_X, ITER_MAX, DELTACG, &gam5D_SQR_musqr_wilson); gam5D_wilson(g_gam5DX, g_fermion2); assign_add_mul(g_gam5DX, g_fermion2, 0.+I*sqrt(g_musqr)); gam5D_wilson(g_fermion2, g_gam5DX); ham_old += squnrm; } // Add the part for the fermion fields // Do the molecular dynamic chain /* the simple LF scheme */ /* the second order minimal norm multi-timescale integrator*/ /* MN2_integrator(g_steps, 2, g_steps*g_stepsize, 0.2); */ /* This is the recursive implementation */ /* in can be found in rec_lf_integrator.c|h */ if (no_timescales == 1) leapfrog(n_steps[0], tau/n_steps[0]); else integrate_leap_frog(tau/n_steps[no_timescales-1], no_timescales-1, no_timescales, n_steps, 1, up_momenta); // Calculate the new action and hamiltonian ham = 0; s_g = 0; for (i=0; i<GRIDPOINTS; i++) { s_g += S_G(i); ham += 0.5*(gp1[i]*gp1[i] + gp2[i]*gp2[i]); } /* Sum_ij [(g_fermion^*)_i (Q^-1)_ij (g_fermion)_j] = Sum_ij [(g_fermion^*)_i (g_X)_i] */ ham += s_g; // add in the part for the fermion fields. cg(g_X, g_fermion, ITER_MAX, DELTACG, &gam5D_SQR_musqr_wilson); ham += scalar_prod_r(g_fermion, g_X); if(no_timescales > 2) { cg(g_gam5DX, g_fermion2, ITER_MAX, DELTACG, &gam5D_SQR_wilson); gam5D_SQR_musqr_wilson(g_X, g_temp, g_gam5DX); ham += scalar_prod_r(g_fermion2, g_X); } exphdiff = exp(ham_old-ham); acc = accept(exphdiff); for(i=0; i<GRIDPOINTS; i++) { gauge1_old[i]=gauge1[i]; gauge2_old[i]=gauge2[i]; } s_g_old = s_g; return(acc); }
int chrono_guess(spinor * const trial, spinor * const phi, spinor ** const v, int index_array[], const int _N, const int _n, const int V, matrix_mult f) { int info = 0; int i, j, N=_N, n=_n; _Complex double s; static int init_csg = 0; static _Complex double *bn = NULL; static _Complex double *G = NULL; int max_N = 20; if(N > 0) { if(g_proc_id == 0 && g_debug_level > 1) { printf("CSG: preparing trial vector \n"); fflush(stdout); } if(init_csg == 0) { init_csg = 1; bn = (_Complex double*) malloc(max_N*sizeof(_Complex double)); G = (_Complex double*) malloc(max_N*max_N*sizeof(_Complex double)); } /* Construct an orthogonal basis */ for(j = n-1; j > n-2; j--) { for(i = j-1; i > -1; i--) { s = scalar_prod(v[index_array[j]], v[index_array[i]], V, 1); assign_diff_mul(v[index_array[i]], v[index_array[j]], s, V); if(g_debug_level > 2) { s = scalar_prod(v[index_array[i]], v[index_array[j]], V, 1); if(g_proc_id == 0) { printf("CSG: <%d,%d> = %e +i %e \n", i, j, creal(s), cimag(s));fflush(stdout); } } } } /* Generate "interaction matrix" V^\dagger f V */ /* We assume that f is hermitian */ /* Generate also the right hand side */ for (j = 0; j < n; j++){ f(trial, v[index_array[j]]); /* Only the upper triangular part is stored */ for(i = 0; i < j+1; i++){ G[i*N + j] = scalar_prod(v[index_array[i]], trial, V, 1); if(j != i) { (G[j*N + i]) = conj(G[i*N + j]); } if(g_proc_id == 0 && g_debug_level > 2) { printf("CSG: G[%d*N + %d]= %e + i %e \n", i, j, creal(G[i*N + j]), cimag(G[i*N + j])); fflush(stdout); } } /* The right hand side */ bn[j] = scalar_prod(v[index_array[j]], phi, V, 1); } /* Solver G y = bn for y and store it in bn */ LUSolve(n, G, N, bn); /* Construct the new guess vector */ if(info == 0) { mul(trial, bn[n-1], v[index_array[n-1]], V); if(g_proc_id == 0 && g_debug_level > 2) { printf("CSG: bn[%d] = %f %f\n", index_array[n-1], creal(bn[index_array[n-1]]), cimag(bn[index_array[n-1]])); } for(i = n-2; i > -1; i--) { assign_add_mul(trial, v[index_array[i]], bn[i], V); if(g_proc_id == 0 && g_debug_level > 2) { printf("CSG: bn[%d] = %f %f\n", index_array[i], creal(bn[index_array[i]]), cimag(bn[index_array[i]])); } } } else { assign(trial, phi, V); } if(g_proc_id == 0 && g_debug_level > 1) { printf("CSG: done! n= %d N=%d \n", n, N);fflush(stdout); } } else { if(g_proc_id == 0 && g_debug_level > 1) { printf("CSG: using zero trial vector \n"); fflush(stdout); } zero_spinor_field(trial, V); } return(info); }
/* k output , l input */ int bicg(spinor * const k, spinor * const l, double eps_sq, const int rel_prec) { double err, d1, squarenorm=0.; complex rho0, rho1, omega, alpha, beta, nom, denom; int iteration, N=VOLUME/2; spinor * r, * p, * v, *hatr, * s, * t, * P, * Q; if(ITER_MAX_BCG > 0) { hatr = g_spinor_field[DUM_SOLVER]; r = g_spinor_field[DUM_SOLVER+1]; v = g_spinor_field[DUM_SOLVER+2]; p = g_spinor_field[DUM_SOLVER+3]; s = g_spinor_field[DUM_SOLVER+4]; t = g_spinor_field[DUM_SOLVER+5]; P = k; Q = l; squarenorm = square_norm(Q, VOLUME/2, 1); Mtm_plus_psi(r, P); gamma5(g_spinor_field[DUM_SOLVER], l, VOLUME/2); diff(p, hatr, r, N); assign(r, p, N); assign(hatr, p, N); rho0 = scalar_prod(hatr, r, N, 1); for(iteration = 0; iteration < ITER_MAX_BCG; iteration++){ err = square_norm(r, N, 1); if(g_proc_id == g_stdio_proc && g_debug_level > 1) { printf("BiCGstab: iterations: %d res^2 %e\n", iteration, err); fflush(stdout); } if (((err <= eps_sq) && (rel_prec == 0)) || ((err <= eps_sq*squarenorm) && (rel_prec == 1))){ break; } Mtm_plus_psi(v, p); denom = scalar_prod(hatr, v, N, 1); _div_complex(alpha, rho0, denom); assign(s, r, N); assign_diff_mul(s, v, alpha, N); Mtm_plus_psi(t, s); omega = scalar_prod(t,s, N, 1); d1 = square_norm(t, N, 1); omega.re/=d1; omega.im/=d1; assign_add_mul_add_mul(P, p, s, alpha, omega, N); assign(r, s, N); assign_diff_mul(r, t, omega, N); rho1 = scalar_prod(hatr, r, N, 1); _mult_assign_complex(nom, alpha, rho1); _mult_assign_complex(denom, omega, rho0); _div_complex(beta, nom, denom); omega.re=-omega.re; omega.im=-omega.im; assign_mul_bra_add_mul_ket_add(p, v, r, omega, beta, N); rho0.re = rho1.re; rho0.im = rho1.im; } if(g_proc_id==0 && g_debug_level > 0) { printf("BiCGstab: iterations: %d eps_sq: %1.4e\n", iteration, eps_sq); } } else{ iteration = ITER_MAX_BCG; gamma5(k, l, VOLUME/2); } /* if bicg fails, redo with conjugate gradient */ if(iteration>=ITER_MAX_BCG){ iteration = solve_cg(k,l,eps_sq, rel_prec); /* Save the solution for reuse! not needed since Chronological inverter is there */ /* assign(g_spinor_field[DUM_DERI+6], k, VOLUME/2); */ Qtm_minus_psi(k, k);; } return iteration; }
int main(int argc,char *argv[]) { FILE *parameterfile = NULL; char datafilename[206]; char parameterfilename[206]; char conf_filename[50]; char scalar_filename[50]; char * input_filename = NULL; char * filename = NULL; double plaquette_energy; #ifdef _USE_HALFSPINOR #undef _USE_HALFSPINOR printf("# WARNING: USE_HALFSPINOR will be ignored (not supported here).\n"); #endif if(even_odd_flag) { even_odd_flag=0; printf("# WARNING: even_odd_flag will be ignored (not supported here).\n"); } int j,j_max,k,k_max = 2; _Complex double * drvsc; #ifdef HAVE_LIBLEMON paramsXlfInfo *xlfInfo; #endif int status = 0; static double t1,t2,dt,sdt,dts,qdt,sqdt; double antioptaway=0.0; #ifdef MPI static double dt2; DUM_DERI = 6; DUM_SOLVER = DUM_DERI+2; DUM_MATRIX = DUM_SOLVER+6; NO_OF_SPINORFIELDS = DUM_MATRIX+2; #ifdef OMP int mpi_thread_provided; MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_thread_provided); #else MPI_Init(&argc, &argv); #endif MPI_Comm_rank(MPI_COMM_WORLD, &g_proc_id); #else g_proc_id = 0; #endif g_rgi_C1 = 1.; process_args(argc,argv,&input_filename,&filename); set_default_filenames(&input_filename, &filename); /* Read the input file */ if( (j = read_input(input_filename)) != 0) { fprintf(stderr, "Could not find input file: %s\nAborting...\n", input_filename); exit(-1); } if(g_proc_id==0) { printf("parameter rho_BSM set to %f\n", rho_BSM); printf("parameter eta_BSM set to %f\n", eta_BSM); printf("parameter m0_BSM set to %f\n", m0_BSM); } #ifdef OMP init_openmp(); #endif tmlqcd_mpi_init(argc, argv); if(g_proc_id==0) { #ifdef SSE printf("# The code was compiled with SSE instructions\n"); #endif #ifdef SSE2 printf("# The code was compiled with SSE2 instructions\n"); #endif #ifdef SSE3 printf("# The code was compiled with SSE3 instructions\n"); #endif #ifdef P4 printf("# The code was compiled for Pentium4\n"); #endif #ifdef OPTERON printf("# The code was compiled for AMD Opteron\n"); #endif #ifdef _GAUGE_COPY printf("# The code was compiled with -D_GAUGE_COPY\n"); #endif #ifdef BGL printf("# The code was compiled for Blue Gene/L\n"); #endif #ifdef BGP printf("# The code was compiled for Blue Gene/P\n"); #endif #ifdef _USE_HALFSPINOR printf("# The code was compiled with -D_USE_HALFSPINOR\n"); #endif #ifdef _USE_SHMEM printf("# The code was compiled with -D_USE_SHMEM\n"); #ifdef _PERSISTENT printf("# The code was compiled for persistent MPI calls (halfspinor only)\n"); #endif #endif #ifdef MPI #ifdef _NON_BLOCKING printf("# The code was compiled for non-blocking MPI calls (spinor and gauge)\n"); #endif #endif printf("\n"); fflush(stdout); } #ifdef _GAUGE_COPY init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 1); #else init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 0); #endif init_geometry_indices(VOLUMEPLUSRAND + g_dbw2rand); j = init_bispinor_field(VOLUMEPLUSRAND, 12); if ( j!= 0) { fprintf(stderr, "Not enough memory for bispinor fields! Aborting...\n"); exit(0); } j = init_spinor_field(VOLUMEPLUSRAND, 12); if ( j!= 0) { fprintf(stderr, "Not enough memory for spinor fields! Aborting...\n"); exit(0); } int numbScalarFields = 4; j = init_scalar_field(VOLUMEPLUSRAND, numbScalarFields); if ( j!= 0) { fprintf(stderr, "Not enough memory for scalar fields! Aborting...\n"); exit(0); } drvsc = malloc(18*VOLUMEPLUSRAND*sizeof(_Complex double)); if(g_proc_id == 0) { fprintf(stdout,"# The number of processes is %d \n",g_nproc); printf("# The lattice size is %d x %d x %d x %d\n", (int)(T*g_nproc_t), (int)(LX*g_nproc_x), (int)(LY*g_nproc_y), (int)(g_nproc_z*LZ)); printf("# The local lattice size is %d x %d x %d x %d\n", (int)(T), (int)(LX), (int)(LY),(int) LZ); fflush(stdout); } /* define the geometry */ geometry(); j = init_bsm_2hop_lookup(VOLUME); if ( j!= 0) { // this should not be reached since the init function calls fatal_error anyway fprintf(stderr, "Not enough memory for BSM2b 2hop lookup table! Aborting...\n"); exit(0); } /* define the boundary conditions for the fermion fields */ /* for the actual inversion, this is done in invert.c as the operators are iterated through */ // // For the BSM operator we don't use kappa normalisation, // as a result, when twisted boundary conditions are applied this needs to be unity. // In addition, unlike in the Wilson case, the hopping term comes with a plus sign. // However, in boundary(), the minus sign for the Wilson case is implicitly included. // We therefore use -1.0 here. boundary(-1.0); status = check_geometry(); if (status != 0) { fprintf(stderr, "Checking of geometry failed. Unable to proceed.\nAborting....\n"); exit(1); } #if (defined MPI && !(defined _USE_SHMEM)) // fails, we're not using spinor fields // check_xchange(); #endif start_ranlux(1, 123456); // read gauge field if( strcmp(gauge_input_filename, "create_random_gaugefield") == 0 ) { random_gauge_field(reproduce_randomnumber_flag, g_gauge_field); } else { sprintf(conf_filename, "%s.%.4d", gauge_input_filename, nstore); if (g_cart_id == 0) { printf("#\n# Trying to read gauge field from file %s in %s precision.\n", conf_filename, (gauge_precision_read_flag == 32 ? "single" : "double")); fflush(stdout); } int i; if( (i = read_gauge_field(conf_filename,g_gauge_field)) !=0) { fprintf(stderr, "Error %d while reading gauge field from %s\n Aborting...\n", i, conf_filename); exit(-2); } if (g_cart_id == 0) { printf("# Finished reading gauge field.\n"); fflush(stdout); } } // read scalar field if( strcmp(scalar_input_filename, "create_random_scalarfield") == 0 ) { for( int s=0; s<numbScalarFields; s++ ) ranlxd(g_scalar_field[s], VOLUME); } else { sprintf(scalar_filename, "%s.%d", scalar_input_filename, nscalar); if (g_cart_id == 0) { printf("#\n# Trying to read scalar field from file %s in %s precision.\n", scalar_filename, (scalar_precision_read_flag == 32 ? "single" : "double")); fflush(stdout); } int i; if( (i = read_scalar_field(scalar_filename,g_scalar_field)) !=0) { fprintf(stderr, "Error %d while reading scalar field from %s\n Aborting...\n", i, scalar_filename); exit(-2); } if (g_cart_id == 0) { printf("# Finished reading scalar field.\n"); fflush(stdout); } } #ifdef MPI xchange_gauge(g_gauge_field); #endif /*compute the energy of the gauge field*/ plaquette_energy = measure_plaquette( (const su3**) g_gauge_field); if (g_cart_id == 0) { printf("# The computed plaquette value is %e.\n", plaquette_energy / (6.*VOLUME*g_nproc)); fflush(stdout); } #ifdef MPI for( int s=0; s<numbScalarFields; s++ ) generic_exchange(g_scalar_field[s], sizeof(scalar)); #endif /*initialize the bispinor fields*/ j_max=1; sdt=0.; // w random_spinor_field_lexic( (spinor*)(g_bispinor_field[4]), reproduce_randomnumber_flag, RN_GAUSS); random_spinor_field_lexic( (spinor*)(g_bispinor_field[4])+VOLUME, reproduce_randomnumber_flag, RN_GAUSS); // for the D^\dagger test: // v random_spinor_field_lexic( (spinor*)(g_bispinor_field[5]), reproduce_randomnumber_flag, RN_GAUSS); random_spinor_field_lexic( (spinor*)(g_bispinor_field[5])+VOLUME, reproduce_randomnumber_flag, RN_GAUSS); #if defined MPI generic_exchange(g_bispinor_field[4], sizeof(bispinor)); #endif // print L2-norm of source: double squarenorm = square_norm((spinor*)g_bispinor_field[4], 2*VOLUME, 1); if(g_proc_id==0) { printf("\n# square norm of the source: ||w||^2 = %e\n\n", squarenorm); fflush(stdout); } double t_MG, t_BK; /* inversion needs to be done first because it uses loads of the g_bispinor_fields internally */ #if TEST_INVERSION if(g_proc_id==1) printf("Testing inversion\n"); // Bartek's operator t1 = gettime(); cg_her_bi(g_bispinor_field[9], g_bispinor_field[4], 25000, 1.0e-14, 0, VOLUME, &Q2_psi_BSM2b); t_BK = gettime() - t1; // Marco's operator t1 = gettime(); cg_her_bi(g_bispinor_field[8], g_bispinor_field[4], 25000, 1.0e-14, 0, VOLUME, &Q2_psi_BSM2m); t_MG = gettime() - t1; if(g_proc_id==0) printf("Operator inversion time: t_MG = %f sec \t t_BK = %f sec\n\n", t_MG, t_BK); #endif /* now apply the operators to the same bispinor field and do various comparisons */ // Marco's operator #ifdef MPI MPI_Barrier(MPI_COMM_WORLD); #endif t_MG = 0.0; t1 = gettime(); D_psi_BSM2m(g_bispinor_field[0], g_bispinor_field[4]); t1 = gettime() - t1; #ifdef MPI MPI_Allreduce (&t1, &t_MG, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); #else t_MG = t1; #endif // Bartek's operator #ifdef MPI MPI_Barrier(MPI_COMM_WORLD); #endif t_BK = 0.0; t1 = gettime(); D_psi_BSM2b(g_bispinor_field[1], g_bispinor_field[4]); t1 = gettime() - t1; #ifdef MPI MPI_Allreduce (&t1, &t_BK, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); #else t_BK = t1; #endif if(g_proc_id==0) printf("Operator application time: t_MG = %f sec \t t_BK = %f sec\n\n", t_MG, t_BK); squarenorm = square_norm((spinor*)g_bispinor_field[0], 2*VOLUME, 1); if(g_proc_id==0) { printf("# || D_MG w ||^2 = %.16e\n", squarenorm); fflush(stdout); } squarenorm = square_norm((spinor*)g_bispinor_field[1], 2*VOLUME, 1); if(g_proc_id==0) { printf("# || D_BK w ||^2 = %.16e\n\n\n", squarenorm); fflush(stdout); } diff( (spinor*)g_bispinor_field[3], (spinor*)g_bispinor_field[0], (spinor*)g_bispinor_field[1], 2*VOLUME); printf("element-wise difference between (D_BK w) and (D_MG w)\n"); printf("( D_MG w - M_BK w )->sp_up.s0.c0= %.16e + I*(%.16e)\n\n", creal(g_bispinor_field[3][0].sp_up.s0.c0), cimag(g_bispinor_field[3][0].sp_up.s0.c0) ); double diffnorm = square_norm( (spinor*) g_bispinor_field[3], 2*VOLUME, 1 ); if(g_proc_id==0){ printf("Square norm of the difference\n"); printf("|| D_MG w - D_BK w ||^2 = %.16e \n\n\n", diffnorm); } // < D w, v > printf("Check consistency of D and D^dagger\n"); _Complex double prod1_MG = scalar_prod( (spinor*)g_bispinor_field[0], (spinor*)g_bispinor_field[5], 2*VOLUME, 1 ); if(g_proc_id==0) printf("< D_MG w, v > = %.16e + I*(%.16e)\n", creal(prod1_MG), cimag(prod1_MG)); _Complex double prod1_BK = scalar_prod( (spinor*)g_bispinor_field[1], (spinor*)g_bispinor_field[5], 2*VOLUME, 1 ); if(g_proc_id==0) printf("< D_BK w, v > = %.16e + I*(%.16e)\n\n", creal(prod1_BK), cimag(prod1_BK)); // < w, D^\dagger v > t_MG = gettime(); D_psi_dagger_BSM2m(g_bispinor_field[6], g_bispinor_field[5]); t_MG = gettime()-t_MG; t_BK = gettime(); D_psi_dagger_BSM2b(g_bispinor_field[7], g_bispinor_field[5]); t_BK = gettime() - t_BK; if(g_proc_id==0) printf("Operator dagger application time: t_MG = %f sec \t t_BK = %f sec\n\n", t_MG, t_BK); _Complex double prod2_MG = scalar_prod((spinor*)g_bispinor_field[4], (spinor*)g_bispinor_field[6], 2*VOLUME, 1); _Complex double prod2_BK = scalar_prod((spinor*)g_bispinor_field[4], (spinor*)g_bispinor_field[7], 2*VOLUME, 1); if( g_proc_id == 0 ){ printf("< w, D_MG^dagger v > = %.16e + I*(%.16e)\n", creal(prod2_MG), cimag(prod2_MG)); printf("< w, D_BK^dagger v > = %.16e + I*(%.16e)\n", creal(prod2_BK), cimag(prod2_BK)); printf("\n| < D_MG w, v > - < w, D_MG^dagger v > | = %.16e\n",cabs(prod2_MG-prod1_MG)); printf("| < D_BK w, v > - < w, D_BK^dagger v > | = %.16e\n\n",cabs(prod2_BK-prod1_BK)); } #if TEST_INVERSION // check result of inversion Q2_psi_BSM2m(g_bispinor_field[10], g_bispinor_field[8]); Q2_psi_BSM2b(g_bispinor_field[11], g_bispinor_field[8]); assign_diff_mul((spinor*)g_bispinor_field[10], (spinor*)g_bispinor_field[4], 1.0, 2*VOLUME); assign_diff_mul((spinor*)g_bispinor_field[11], (spinor*)g_bispinor_field[4], 1.0, 2*VOLUME); double squarenorm_MGMG = square_norm((spinor*)g_bispinor_field[10], 2*VOLUME, 1); double squarenorm_BKMG = square_norm((spinor*)g_bispinor_field[11], 2*VOLUME, 1); if(g_proc_id==0) { printf("# ||Q2_MG*(Q2_MG)^-1*(b)-b||^2 = %.16e\n\n", squarenorm_MGMG); printf("# ||Q2_BK*(Q2_MG)^-1*(b)-b||^2 = %.16e\n\n", squarenorm_BKMG); fflush(stdout); } Q2_psi_BSM2b(g_bispinor_field[10], g_bispinor_field[9]); Q2_psi_BSM2m(g_bispinor_field[11], g_bispinor_field[9]); assign_diff_mul((spinor*)g_bispinor_field[10], (spinor*)g_bispinor_field[4], 1.0, 2*VOLUME); assign_diff_mul((spinor*)g_bispinor_field[11], (spinor*)g_bispinor_field[4], 1.0, 2*VOLUME); double squarenorm_BKBK = square_norm((spinor*)g_bispinor_field[10], 2*VOLUME, 1); double squarenorm_MGBK = square_norm((spinor*)g_bispinor_field[11], 2*VOLUME, 1); if(g_proc_id==0) { printf("# ||Q2_BK*(Q2_BK)^-1*(b)-b||^2 = %.16e\n\n", squarenorm_BKBK); printf("# ||Q2_MG*(Q2_BK)^-1*(b)-b||^2 = %.16e\n\n", squarenorm_MGBK); fflush(stdout); } #endif #ifdef OMP free_omp_accumulators(); #endif free_gauge_field(); free_geometry_indices(); free_bispinor_field(); free_scalar_field(); #ifdef MPI MPI_Barrier(MPI_COMM_WORLD); MPI_Finalize(); #endif return(0); }
int gcr(spinor * const P, spinor * const Q, const int m, const int max_restarts, const double eps_sq, const int rel_prec, const int N, const int precon, matrix_mult f) { int k, l, restart, i, iter = 0; double norm_sq, err; spinor * rho, * tmp; complex ctmp; spinor ** solver_field = NULL; const int nr_sf = 2; if(N == VOLUME) { init_solver_field(&solver_field, VOLUMEPLUSRAND, nr_sf); } else { init_solver_field(&solver_field, VOLUMEPLUSRAND/2, nr_sf); } rho = solver_field[0]; tmp = solver_field[1]; init_gcr(m, N+RAND); norm_sq = square_norm(Q, N, 1); if(norm_sq < 1.e-32) { norm_sq = 1.; } for(restart = 0; restart < max_restarts; restart++) { dfl_sloppy_prec = 0; f(tmp, P); diff(rho, Q, tmp, N); err = square_norm(rho, N, 1); if(g_proc_id == g_stdio_proc && g_debug_level > 2){ printf("GCR: iteration number: %d, true residue: %g\n", iter, err); fflush(stdout); } if(((err <= eps_sq) && (rel_prec == 0)) || ((err <= eps_sq*norm_sq) && (rel_prec == 1))) { finalize_solver(solver_field, nr_sf); return(iter); } for(k = 0; k < m; k++) { if(precon == 0) { assign(xi[k], rho, N); } else { zero_spinor_field(xi[k], N); Msap_eo(xi[k], rho, 6); /* Msap(xi[k], rho, 8); */ } dfl_sloppy_prec = 1; dfl_little_D_prec = 1.e-12; f(tmp, xi[k]); /* tmp will become chi[k] */ for(l = 0; l < k; l++) { a[l][k] = scalar_prod(chi[l], tmp, N, 1); assign_diff_mul(tmp, chi[l], a[l][k], N); } b[k] = sqrt(square_norm(tmp, N, 1)); mul_r(chi[k], 1./b[k], tmp, N); c[k] = scalar_prod(chi[k], rho, N, 1); assign_diff_mul(rho, chi[k], c[k], N); err = square_norm(rho, N, 1); iter ++; if(g_proc_id == g_stdio_proc && g_debug_level > 0){ if(rel_prec == 1) printf("# GCR: %d\t%g >= %g iterated residue\n", iter, err, eps_sq*norm_sq); else printf("# GCR: %d\t%g >= %giterated residue\n", iter, err, eps_sq); fflush(stdout); } /* Precision reached? */ if((k == m-1) || ((err <= eps_sq) && (rel_prec == 0)) || ((err <= eps_sq*norm_sq) && (rel_prec == 1))) { break; } } /* prepare for restart */ _mult_real(c[k], c[k], 1./b[k]); assign_add_mul(P, xi[k], c[k], N); for(l = k-1; l >= 0; l--) { for(i = l+1; i <= k; i++) { _mult_assign_complex(ctmp, a[l][i], c[i]); /* c[l] -= ctmp */ _diff_complex(c[l], ctmp); } _mult_real(c[l], c[l], 1./b[l]); assign_add_mul(P, xi[l], c[l], N); } } finalize_solver(solver_field, nr_sf); return(-1); }
int gmres_dr(spinor * const P,spinor * const Q, const int m, const int nr_ev, const int max_restarts, const double eps_sq, const int rel_prec, const int N, matrix_mult f){ int restart=0, i, j, k, l; double beta, eps, norm, beta2=0.; complex *lswork = NULL; int lwork; complex tmp1, tmp2; int info=0; int _m = m, mp1 = m+1, np1 = nr_ev+1, ne = nr_ev, V2 = 12*(VOLUMEPLUSRAND)/2, _N = 12*N; spinor ** solver_field = NULL; const int nr_sf = 3; if(N == VOLUME) { init_solver_field(&solver_field, VOLUMEPLUSRAND, nr_sf); } else { init_solver_field(&solver_field, VOLUMEPLUSRAND/2, nr_sf); } double err=0.; spinor * r0, * x0; cmone.re = -1.; cmone.im=0.; cpone.re = 1.; cpone.im=0.; czero.re = 0.; czero.im = 0.; r0 = solver_field[0]; x0 = solver_field[2]; eps=sqrt(eps_sq); init_gmres_dr(m, (VOLUMEPLUSRAND)); norm = sqrt(square_norm(Q, N, 1)); assign(x0, P, N); /* first normal GMRES cycle */ /* r_0=Q-AP (b=Q, x+0=P) */ f(r0, x0); diff(r0, Q, r0, N); /* v_0=r_0/||r_0|| */ alpha[0].re=sqrt(square_norm(r0, N, 1)); err = alpha[0].re; if(g_proc_id == g_stdio_proc && g_debug_level > 0){ printf("%d\t%e true residue\n", restart*m, alpha[0].re*alpha[0].re); fflush(stdout); } if(alpha[0].re==0.){ assign(P, x0, N); finalize_solver(solver_field, nr_sf); return(restart*m); } mul_r(V[0], 1./alpha[0].re, r0, N); for(j = 0; j < m; j++){ /* solver_field[0]=A*v_j */ /* Set h_ij and omega_j */ /* solver_field[1] <- omega_j */ f(solver_field[1], V[j]); /* assign(solver_field[1], solver_field[0], N); */ for(i = 0; i <= j; i++){ H[i][j] = scalar_prod(V[i], solver_field[1], N, 1); /* G, work and work2 are in Fortran storage: columns first */ G[j][i] = H[i][j]; work2[j][i] = H[i][j]; work[i][j].re = H[i][j].re; work[i][j].im = -H[i][j].im; assign_diff_mul(solver_field[1], V[i], H[i][j], N); } _complex_set(H[j+1][j], sqrt(square_norm(solver_field[1], N, 1)), 0.); G[j][j+1] = H[j+1][j]; work2[j][j+1] = H[j+1][j]; work[j+1][j].re = H[j+1][j].re; work[j+1][j].im = -H[j+1][j].im; beta2 = H[j+1][j].re*H[j+1][j].re; for(i = 0; i < j; i++){ tmp1 = H[i][j]; tmp2 = H[i+1][j]; _mult_real(H[i][j], tmp2, s[i]); _add_assign_complex_conj(H[i][j], c[i], tmp1); _mult_real(H[i+1][j], tmp1, s[i]); _diff_assign_complex(H[i+1][j], c[i], tmp2); } /* Set beta, s, c, alpha[j],[j+1] */ beta = sqrt(_complex_square_norm(H[j][j]) + _complex_square_norm(H[j+1][j])); s[j] = H[j+1][j].re / beta; _mult_real(c[j], H[j][j], 1./beta); _complex_set(H[j][j], beta, 0.); _mult_real(alpha[j+1], alpha[j], s[j]); tmp1 = alpha[j]; _mult_assign_complex_conj(alpha[j], c[j], tmp1); /* precision reached? */ if(g_proc_id == g_stdio_proc && g_debug_level > 0){ printf("%d\t%e residue\n", restart*m+j, alpha[j+1].re*alpha[j+1].re); fflush(stdout); } if(((alpha[j+1].re <= eps) && (rel_prec == 0)) || ((alpha[j+1].re <= eps*norm) && (rel_prec == 1))){ _mult_real(alpha[j], alpha[j], 1./H[j][j].re); assign_add_mul(x0, V[j], alpha[j], N); for(i = j-1; i >= 0; i--){ for(k = i+1; k <= j; k++){ _mult_assign_complex(tmp1, H[i][k], alpha[k]); /* alpha[i] -= tmp1 */ _diff_complex(alpha[i], tmp1); } _mult_real(alpha[i], alpha[i], 1./H[i][i].re); assign_add_mul(x0, V[i], alpha[i], N); } for(i = 0; i < m; i++){ alpha[i].im = 0.; } assign(P, x0, N); finalize_solver(solver_field, nr_sf); return(restart*m+j); } /* if not */ else { mul_r(V[(j+1)], 1./H[j+1][j].re, solver_field[1], N); } } j=m-1; /* prepare for restart */ _mult_real(alpha[j], alpha[j], 1./H[j][j].re); assign_add_mul(x0, V[j], alpha[j], N); if(g_proc_id == 0 && g_debug_level > 3) { printf("alpha: %e %e\n", alpha[j].re, alpha[j].im); } for(i = j-1; i >= 0; i--){ for(k = i+1; k <= j; k++){ _mult_assign_complex(tmp1, H[i][k], alpha[k]); _diff_complex(alpha[i], tmp1); } _mult_real(alpha[i], alpha[i], 1./H[i][i].re); if(g_proc_id == 0 && g_debug_level > 3) { printf("alpha: %e %e\n", alpha[i].re, alpha[i].im); } assign_add_mul(x0, V[i], alpha[i], N); } /* This produces c=V_m+1*r0 */ for(i = 0; i < mp1; i++) { c[i] = scalar_prod(V[i], r0, N, 1); if(g_proc_id == 0 && g_debug_level > 3) { printf("c: %e %e err = %e\n", c[i].re, c[i].im, err); } } for(restart = 1; restart < max_restarts; restart++) { /* compute c-\bar H \alpha */ _FT(zgemv) ("N", &mp1, &_m, &cmone, G[0], &mp1, alpha, &one, &cpone, c, &one, 1); err = sqrt(short_scalar_prod(c, c, mp1).re); if(g_proc_id == 0 && g_debug_level > 0) { printf("%d\t %e short residue\n", m*restart, err*err); } /* Compute new residual r0 */ /* r_0=Q-AP (b=Q, x+0=P) */ if(g_debug_level > 0) { f(r0, x0); diff(r0, Q, r0, N); tmp1.im=sqrt(square_norm(r0, N, 1)); if(g_proc_id == g_stdio_proc){ printf("%d\t%e true residue\n", m*restart, tmp1.im*tmp1.im); fflush(stdout); } } mul(r0, c[0], V[0], N); for(i = 1; i < mp1; i++) { assign_add_mul(r0, V[i], c[i], N); } if(g_debug_level > 3) { tmp1.im=sqrt(square_norm(r0, N, 1)); if(g_proc_id == g_stdio_proc){ printf("%d\t%e residue\n", m*restart, tmp1.im*tmp1.im); fflush(stdout); } } /* Stop if satisfied */ if(err < eps){ assign(P, x0, N); finalize_solver(solver_field, nr_sf); return(restart*m); } /* Prepare to compute harmonic Ritz pairs */ for(i = 0; i < m-1; i++){ alpha[i].re = 0.; alpha[i].im = 0.; } alpha[m-1].re = 1.; alpha[m-1].im = 0.; _FT(zgesv) (&_m, &one, work[0], &mp1, idx, alpha, &_m, &info); for(i = 0; i < m; i++) { G[m-1][i].re += (beta2*alpha[idx[i]-1].re); G[m-1][i].im += (beta2*alpha[idx[i]-1].im); } if(g_proc_id == 0 && g_debug_level > 3){ printf("zgesv returned info = %d, c[m-1]= %e, %e , idx[m-1]=%d\n", info, alpha[idx[m-1]-1].re, alpha[idx[m-1]-1].im, idx[m-1]); } /* c - \bar H * d -> c */ /* G contains H + \beta^2 H^-He_n e_n^H */ /* Compute harmonic Ritz pairs */ diagonalise_general_matrix(m, G[0], mp1, alpha, evalues); for(i = 0; i < m; i++) { sortarray[i] = _complex_square_norm(evalues[i]); idx[i] = i; } quicksort(m, sortarray, idx); if(g_proc_id == g_stdio_proc && g_debug_level > 1) { for(i = 0; i < m; i++) { printf("# Evalues %d %e %e \n", i, evalues[idx[i]].re, evalues[idx[i]].im); } fflush(stdout); } /* Copy the first nr_ev eigenvectors to work */ for(i = 0; i < ne; i++) { for(l = 0; l < m; l++) { work[i][l] = G[idx[i]][l]; } } /* Orthonormalize them */ for(i = 0; i < ne; i++) { work[i][m].re = 0.; work[i][m].im = 0.; short_ModifiedGS(work[i], m, i, work[0], mp1); } /* Orthonormalize c - \bar H d to work */ short_ModifiedGS(c, m+1, ne, work[0], mp1); for(i = 0; i < mp1; i++) { work[nr_ev][i] = c[i]; } /* Now compute \bar H = P^T_k+1 \bar H_m P_k */ for(i = 0; i < mp1; i++) { for(l = 0; l < mp1; l++) { H[i][l].re = 0.; H[i][l].im = 0.; } } _FT(zgemm) ("N", "N", &mp1, &ne, &_m, &cpone, work2[0], &mp1, work[0], &mp1, &czero, G[0], &mp1, 1, 1); _FT(zgemm) ("C", "N", &np1, &ne , &mp1, &cpone, work[0], &mp1, G[0], &mp1, &czero, H[0], &mp1, 1, 1); if(g_debug_level > 3) { for(i = 0; i < ne+1; i++) { for(l = 0; l < ne+1; l++) { if(g_proc_id == 0) { printf("(g[%d], g[%d]) = %e, %e\n", i, l, short_scalar_prod(work[i], work[l], m+1).re, short_scalar_prod(work[i], work[l], m+1).im); printf("(g[%d], g[%d]) = %e, %e\n", l, i, short_scalar_prod(work[l], work[i], m+1).re, short_scalar_prod(work[l], work[i], m+1).im); } } } } /* V_k+1 = V_m+1 P_k+1 */ /* _FT(zgemm) ("N", "N", &_N, &np1, &mp1, &cpone, (complex*)V[0], &V2, work[0], &mp1, &czero, (complex*)Z[0], &V2, 1, 1); */ for(l = 0; l < np1; l++) { mul(Z[l], work[l][0], V[0], N); for(i = 1; i < mp1; i++) { assign_add_mul(Z[l], V[i], work[l][i], N); } } /* copy back to V */ for(i = 0; i < np1; i++) { assign(V[i], Z[i], N); } /* Reorthogonalise v_nr_ev */ ModifiedGS((complex*)V[nr_ev], _N, nr_ev, (complex*)V[0], V2); if(g_debug_level > 3) { for(i = 0; i < np1; i++) { for(l = 0; l < np1; l++) { tmp1 = scalar_prod(V[l], V[i], N, 1); if(g_proc_id == 0) { printf("(V[%d], V[%d]) = %e %e %d %d %d %d %d %d %e %e\n", l, i, tmp1.re, tmp1.im, np1, mp1, ne, _m, _N, V2, H[l][i].re, H[l][i].im); } } } } /* Copy the content of H to work, work2 and G */ for(i=0; i < mp1; i++) { for(l = 0; l < mp1; l++) { G[i][l] = H[i][l]; work2[i][l] = H[i][l]; work[l][i].re = H[i][l].re; work[l][i].im = -H[i][l].im; } } for(j = ne; j < m; j++) { /* solver_field[0]=A*v_j */ f(solver_field[1], V[j]); /* Set h_ij and omega_j */ /* solver_field[1] <- omega_j */ /* assign(solver_field[1], solver_field[0], N); */ for(i = 0; i <= j; i++){ H[j][i] = scalar_prod(V[i], solver_field[1], N, 1); /* H, G, work and work2 are now all in Fortran storage: columns first */ G[j][i] = H[j][i]; work2[j][i] = H[j][i]; work[i][j].re = H[j][i].re; work[i][j].im = -H[j][i].im; assign_diff_mul(solver_field[1], V[i], H[j][i], N); } beta2 = square_norm(solver_field[1], N, 1); _complex_set(H[j][j+1], sqrt(beta2), 0.); G[j][j+1] = H[j][j+1]; work2[j][j+1] = H[j][j+1]; work[j+1][j].re = H[j][j+1].re; work[j+1][j].im = -H[j][j+1].im; mul_r(V[(j+1)], 1./H[j][j+1].re, solver_field[1], N); } /* Solve the least square problem for alpha*/ /* This produces c=V_m+1*r0 */ for(i = 0; i < mp1; i++) { c[i] = scalar_prod(V[i], r0, N, 1); alpha[i] = c[i]; if(g_proc_id == 0 && g_debug_level > 3) { printf("c: %e %e err = %e\n", c[i].re, c[i].im, err); } } if(lswork == NULL) { lwork = -1; _FT(zgels) ("N", &mp1, &_m, &one, H[0], &mp1, alpha, &mp1, &tmp1, &lwork, &info, 1); lwork = (int)tmp1.re; lswork = (complex*)malloc(lwork*sizeof(complex)); } _FT(zgels) ("N", &mp1, &_m, &one, H[0], &mp1, alpha, &mp1, lswork, &lwork, &info, 1); if(g_proc_id == 0 && g_debug_level > 3) { printf("zgels returned info = %d\n", info); fflush(stdout); } /* Compute the new solution vector */ for(i = 0; i < m; i++){ if(g_proc_id == 0 && g_debug_level > 3) { printf("alpha: %e %e\n", alpha[i].re, alpha[i].im); } assign_add_mul(x0, V[i], alpha[i], N); } } /* If maximal number of restart is reached */ assign(P, x0, N); finalize_solver(solver_field, nr_sf); return(-1); }
/* P inout (guess for the solving spinor) Q input */ int bicgstab_complex(spinor * const P,spinor * const Q, const int max_iter, double eps_sq, const int rel_prec, const int N, matrix_mult f){ double err, d1, squarenorm; complex rho0, rho1, omega, alpha, beta, nom, denom; int i; spinor * r, * p, * v, *hatr, * s, * t; spinor ** solver_field = NULL; const int nr_sf = 6; if(N == VOLUME) { init_solver_field(&solver_field, VOLUMEPLUSRAND, nr_sf); } else { init_solver_field(&solver_field, VOLUMEPLUSRAND/2, nr_sf); } hatr = solver_field[0]; r = solver_field[1]; v = solver_field[2]; p = solver_field[3]; s = solver_field[4]; t = solver_field[5]; f(r, P); diff(p, Q, r, N); assign(r, p, N); assign(hatr, p, N); rho0 = scalar_prod(hatr, r, N, 1); squarenorm = square_norm(Q, N, 1); for(i = 0; i < max_iter; i++){ err = square_norm(r, N, 1); if(g_proc_id == g_stdio_proc && g_debug_level > 1) { printf("%d %e\n", i, err); fflush(stdout); } if((((err <= eps_sq) && (rel_prec == 0)) || ((err <= eps_sq*squarenorm) && (rel_prec == 1))) && i>0) { finalize_solver(solver_field, nr_sf); return(i); } f(v, p); denom = scalar_prod(hatr, v, N, 1); _div_complex(alpha, rho0, denom); assign(s, r, N); assign_diff_mul(s, v, alpha, N); f(t, s); omega = scalar_prod(t,s, N, 1); d1 = square_norm(t, N, 1); omega.re/=d1; omega.im/=d1; assign_add_mul_add_mul(P, p, s, alpha, omega, N); assign(r, s, N); assign_diff_mul(r, t, omega, N); rho1 = scalar_prod(hatr, r, N, 1); if(fabs(rho1.re) < 1.e-25 && fabs(rho1.im) < 1.e-25) { finalize_solver(solver_field, nr_sf); return(-1); } _mult_assign_complex(nom, alpha, rho1); _mult_assign_complex(denom, omega, rho0); _div_complex(beta, nom, denom); omega.re=-omega.re; omega.im=-omega.im; assign_mul_bra_add_mul_ket_add(p, v, r, omega, beta, N); rho0.re = rho1.re; rho0.im = rho1.im; } finalize_solver(solver_field, nr_sf); return -1; }
int gmres(spinor * const P,spinor * const Q, const int m, const int max_restarts, const double eps_sq, const int rel_prec, const int N, const int parallel, matrix_mult f){ int restart, i, j, k; double beta, eps, norm; complex tmp1, tmp2; spinor ** solver_field = NULL; const int nr_sf = 3; if(N == VOLUME) { init_solver_field(&solver_field, VOLUMEPLUSRAND, nr_sf); } else { init_solver_field(&solver_field, VOLUMEPLUSRAND/2, nr_sf); } eps=sqrt(eps_sq); init_gmres(m, VOLUMEPLUSRAND); norm = sqrt(square_norm(Q, N, parallel)); assign(solver_field[2], P, N); for(restart = 0; restart < max_restarts; restart++){ /* r_0=Q-AP (b=Q, x+0=P) */ f(solver_field[0], solver_field[2]); diff(solver_field[0], Q, solver_field[0], N); /* v_0=r_0/||r_0|| */ alpha[0].re=sqrt(square_norm(solver_field[0], N, parallel)); if(g_proc_id == g_stdio_proc && g_debug_level > 1){ printf("%d\t%g true residue\n", restart*m, alpha[0].re*alpha[0].re); fflush(stdout); } if(alpha[0].re==0.){ assign(P, solver_field[2], N); finalize_solver(solver_field, nr_sf); return(restart*m); } mul_r(V[0], 1./alpha[0].re, solver_field[0], N); for(j = 0; j < m; j++){ /* solver_field[0]=A*v_j */ f(solver_field[0], V[j]); /* Set h_ij and omega_j */ /* solver_field[1] <- omega_j */ assign(solver_field[1], solver_field[0], N); for(i = 0; i <= j; i++){ H[i][j] = scalar_prod(V[i], solver_field[1], N, parallel); assign_diff_mul(solver_field[1], V[i], H[i][j], N); } _complex_set(H[j+1][j], sqrt(square_norm(solver_field[1], N, parallel)), 0.); for(i = 0; i < j; i++){ tmp1 = H[i][j]; tmp2 = H[i+1][j]; _mult_real(H[i][j], tmp2, s[i]); _add_assign_complex_conj(H[i][j], c[i], tmp1); _mult_real(H[i+1][j], tmp1, s[i]); _diff_assign_complex(H[i+1][j], c[i], tmp2); } /* Set beta, s, c, alpha[j],[j+1] */ beta = sqrt(_complex_square_norm(H[j][j]) + _complex_square_norm(H[j+1][j])); s[j] = H[j+1][j].re / beta; _mult_real(c[j], H[j][j], 1./beta); _complex_set(H[j][j], beta, 0.); _mult_real(alpha[j+1], alpha[j], s[j]); tmp1 = alpha[j]; _mult_assign_complex_conj(alpha[j], c[j], tmp1); /* precision reached? */ if(g_proc_id == g_stdio_proc && g_debug_level > 1){ printf("%d\t%g residue\n", restart*m+j, alpha[j+1].re*alpha[j+1].re); fflush(stdout); } if(((alpha[j+1].re <= eps) && (rel_prec == 0)) || ((alpha[j+1].re <= eps*norm) && (rel_prec == 1))){ _mult_real(alpha[j], alpha[j], 1./H[j][j].re); assign_add_mul(solver_field[2], V[j], alpha[j], N); for(i = j-1; i >= 0; i--){ for(k = i+1; k <= j; k++){ _mult_assign_complex(tmp1, H[i][k], alpha[k]); _diff_complex(alpha[i], tmp1); } _mult_real(alpha[i], alpha[i], 1./H[i][i].re); assign_add_mul(solver_field[2], V[i], alpha[i], N); } for(i = 0; i < m; i++){ alpha[i].im = 0.; } assign(P, solver_field[2], N); finalize_solver(solver_field, nr_sf); return(restart*m+j); } /* if not */ else{ if(j != m-1){ mul_r(V[(j+1)], 1./H[j+1][j].re, solver_field[1], N); } } } j=m-1; /* prepare for restart */ _mult_real(alpha[j], alpha[j], 1./H[j][j].re); assign_add_mul(solver_field[2], V[j], alpha[j], N); for(i = j-1; i >= 0; i--){ for(k = i+1; k <= j; k++){ _mult_assign_complex(tmp1, H[i][k], alpha[k]); _diff_complex(alpha[i], tmp1); } _mult_real(alpha[i], alpha[i], 1./H[i][i].re); assign_add_mul(solver_field[2], V[i], alpha[i], N); } for(i = 0; i < m; i++){ alpha[i].im = 0.; } } /* If maximal number of restarts is reached */ assign(P, solver_field[2], N); finalize_solver(solver_field, nr_sf); return(-1); }