void simulate_RT(component c, double a){ /* Memory Allocation, Structures and Fields */ grid_volume v = vol2d(size_x,size_y,a); /* Grid volume for computations */ structure s0(v,air,pml(h_PML,Y)); /* Reference case: no scatterers; PML termination in the y-direction */ structure s(v,air_glass_grating,pml(h_PML,Y)); /* Structure to be simulated; PML termination in the y-direction */ fields f0(&s0); /* Fields for reference case */ fields f(&s); /* Fields for simulation structure */ h5file *eps_file_ptr=f.open_h5file(eps_file_name); f.output_hdf5(Dielectric, v.surroundings(), eps_file_ptr, true); /* Outputting dielectric function as .h5 file; <fields>.output_hdf5(<field_type>,<?>) */ /* Flux Lines for Transmissions and Reflection Detectors */ volume flux_line_trans(vec(0,h_PML+4*h_sep+d),vec(size_x,h_PML+4*h_sep+d)); volume flux_line_refl(vec(0,h_PML+2*h_sep),vec(size_x,h_PML+2*h_sep)); /* Appropriate Bloch Boundary Conditions */ double k_x=n_air*freq_centre*sin(theta_degrees*const_pi/180.0); f0.use_bloch(vec(k_x,0.0)); f.use_bloch(vec(k_x,0.0)); /* Light Sources */ gaussian_src_time src(freq_centre, 0.5/pw_freq_width, 0, 5/pw_freq_width); /* Time-domain definition of source */ volume src_line(vec(0,h_PML+h_sep),vec(size_x,h_PML+h_sep)); f0.add_volume_source(c,src,src_line,src_spatial_modulator,1.0); f.add_volume_source(c,src,src_line,src_spatial_modulator,1.0); master_printf("# Line source(s) added ...\n"); /* Fluxes for Transmission, Reflection */ dft_flux f_t0 = f0.add_dft_flux_plane(flux_line_trans,min2(freq_min,freq_max),max2(freq_min,freq_max),num_freqs); dft_flux f_t = f.add_dft_flux_plane(flux_line_trans,min2(freq_min,freq_max),max2(freq_min,freq_max),num_freqs); dft_flux f_r0 = f0.add_dft_flux_plane(flux_line_refl,min2(freq_min,freq_max),max2(freq_min,freq_max),num_freqs); dft_flux f_r = f.add_dft_flux_plane(flux_line_refl,min2(freq_min,freq_max),max2(freq_min,freq_max),num_freqs); angleResolvedDetectors2D *ard = new angleResolvedDetectors2D(h_PML+4*h_sep, h_PML+2*h_sep, size_x, a, angle_res_degrees, freq_min, freq_max, num_freqs, theta_degrees, n_air, n_air, degree); master_printf("# Simulating reference structure ...\n"); double t_final_src_0=f0.last_source_time(), t_final_sim_0=t_final_src_0+duration_factor*num_freqs/pw_freq_width/2; master_printf("\tparameter__user_inaccessible:\tt_final_src_0 = %f\n",t_final_src_0); master_printf("\tparameter__user_inaccessible:\tt_final_sim_0 = %f\n",t_final_sim_0); while(f0.time() < t_final_sim_0){ /* Time-stepping -- reference structure */ f0.step(); double t=f0.time(); ard->update(t,f0,reference); } f_r0.save_hdf5(f0, flux_file_name, "reflection"); ard->finalize_update(reference); master_printf("# Simulating test structure ...\n"); double t_final_src=f.last_source_time(), t_final_sim=t_final_src+duration_factor*num_freqs/pw_freq_width/2; master_printf("\tparameter__user_inaccessible:\tt_final_src = %f\n",t_final_src); master_printf("\tparameter__user_inaccessible:\tt_final_sim = %f\n",t_final_sim); f_r.load_hdf5(f, flux_file_name, "reflection"); f_r.scale_dfts(-1.0); while(f.time() < t_final_sim){ /* Time-stepping -- simulated structure */ f.step(); double t=f.time(); ard->update(t,f,simulation); } f.output_hdf5(c, v.surroundings()); /* Outputting electric field as .h5 file; <fields>.output_hdf5(<field_type>,<?>) */ ard->finalize_update(simulation); double *flux_t = f_t.flux(); /* Calculating flux -- integrating? */ double *flux_t0 = f_t0.flux(); /* Calculating flux -- integrating? */ double *flux_r = f_r.flux(); /* Calculating flux -- integrating? */ double *flux_r0 = f_r0.flux(); /* Calculating flux -- integrating? */ double *T; /* Array to store transmission coefficients (frequency-dependent) */ double *R; /* Array to store reflection coefficients (frequency-dependent) */ T = new double[num_freqs]; R = new double[num_freqs]; for (int i=0; i<num_freqs; ++i){ /* Calculating transmission, reflection coefficients */ T[i] = flux_t[i] / flux_t0[i]; R[i] = -flux_r[i] / flux_r0[i]; } double dfreq = pw_freq_width / (num_freqs-1); master_printf("transmission:, omega, T\n"); master_printf("reflection:, omega, R\n"); master_printf("addition_check:, omega, R+T\n"); for (int l=0; l<num_freqs; ++l){ /* Printing transmission coefficient values */ master_printf("transmission:, %f, %f\n",freq_min+l*dfreq,T[l]); master_printf("reflection:, %f, %f\n",freq_min+l*dfreq,R[l]); master_printf("addition_check:, %f, %f\n",freq_min+l*dfreq,T[l]+R[l]); } ard->print_angle_unresolved_T(); ard->print_angle_unresolved_R(); ard->print_angle_resolved_T(file_name_prefix); ard->print_angle_resolved_R(file_name_prefix); delete [] eps_file_name; delete [] flux_file_name; delete [] flux_t; /* "Garbage collection" at end of code execution */ delete [] flux_t0; /* "Garbage collection" at end of code execution */ delete [] flux_r; /* "Garbage collection" at end of code execution */ delete [] flux_r0; /* "Garbage collection" at end of code execution */ delete [] T; /* "Garbage collection" at end of code execution */ delete [] R; /* "Garbage collection" at end of code execution */ delete ard; }
int test_spincolor_writing_and_reading() { master_printf("\nGenerating random source of +-1\n"); spincolor *source=nissa_malloc("source",loc_vol,spincolor); spincolor *source2=nissa_malloc("source2",loc_vol,spincolor); if(nissa_loc_rnd_gen_inited) stop_loc_rnd_gen(); start_loc_rnd_gen(2342); generate_undiluted_source(source,RND_Z4,-1); write_spincolor("test_wr",source,64); read_spincolor(source2,"test_wr"); int loc_ret=memcmp(source,source2,sizeof(spincolor)*loc_vol); int ret; MPI_Allreduce(&loc_ret,&ret,1,MPI_INT,MPI_SUM,MPI_COMM_WORLD); if(ret) master_printf("Erorr, read data differs from memory!\n"); else master_printf("Read the same data present in memory\n"); master_printf("Removing temporary file \"test_wr\"\n"); if(rank==0) system("rm -vf test_wr"); nissa_free(source); nissa_free(source2); return !ret; }
initialize::~initialize() { if (!quiet) master_printf("\nElapsed run time = %g s\n", elapsed_time()); #ifdef HAVE_MPI end_divide_parallel(); MPI_Finalize(); #endif }
int test_Q2tm_inversion() { //load the well known source master_printf("\nLoading conf\n"); quad_su3 *conf=nissa_malloc("conf",loc_vol+bord_vol+edge_vol,quad_su3); read_ildg_gauge_conf(conf,"../../data/L4T8conf"); //generate the classic source master_printf("Generating source\n"); spincolor *source=nissa_malloc("source",loc_vol+bord_vol,spincolor); if(nissa_loc_rnd_gen_inited) stop_loc_rnd_gen(); start_loc_rnd_gen(2342); generate_undiluted_source(source,RND_Z4,-1); //invert Q2 double kappa=0.177000; double mu=0.50; double prec=1.e-25; spincolor *inver=nissa_malloc("inver",loc_vol+bord_vol,spincolor); inv_Q2_cg(inver,source,NULL,conf,kappa,mu,1000000,5,prec); //now compare with saved data master_printf("Reading saved spincolor\n"); spincolor *comp=nissa_malloc("comp",loc_vol,spincolor); read_spincolor(comp,"../../data/Q2tm_inv"); //compare the weighted norm double loc_weighted_norm=0,weighted_norm; NISSA_LOC_VOL_LOOP(ivol) for(int id=0;id<4;id++) for(int ic=0;ic<3;ic++) for(int ri=0;ri<2;ri++) loc_weighted_norm+=sqr((comp[ivol][id][ic][ri]-inver[ivol][id][ic][ri])/ (comp[ivol][id][ic][ri]+inver[ivol][id][ic][ri])); MPI_Allreduce(&loc_weighted_norm,&weighted_norm,1,MPI_DOUBLE,MPI_SUM,MPI_COMM_WORLD); weighted_norm=sqrt(weighted_norm/glb_vol/12); double tolerance=1.e-13; master_printf("Difference: %lg, tolerance: %lg\n",weighted_norm,tolerance); nissa_free(comp); nissa_free(inver); nissa_free(source); nissa_free(conf); return weighted_norm<=tolerance; }
void gauge_conf_t::read(const char *path) { if(!is_allocated()) create(); read_ildg_gauge_conf(U,path); reset_theta(); master_printf("plaq: %.18g\n",global_plaquette_lx_conf(U)); }
void prop_group_t::get_inverting(in_source_t &source,gauge_conf_t &gauge_conf,int rotate_to_physical_basis) { //get ntheta,nmass,nr int nmass,ntheta,nr; get_ntheta_mass_r(ntheta,nmass,nr); //allocate spincolor *temp_source=nissa_malloc("temp_source",loc_vol+bord_vol,spincolor); spincolor *temp_reco[2]={nissa_malloc("temp_reco",loc_vol+bord_vol,spincolor),nissa_malloc("temp_reco",loc_vol+bord_vol,spincolor)}; spincolor *cgm_solution[nmass]; for(int imass=0;imass<nmass;imass++) cgm_solution[imass]=nissa_malloc(combine("cgm_solution_%d",imass).c_str(),loc_vol+bord_vol,spincolor); for(int id=0;id<4;id++) { //extract index of the source get_spincolor_from_colorspinspin(temp_source,source.eta,id); //put the g5 safe_dirac_prod_spincolor(temp_source,base_gamma+5,temp_source); for(int itheta=0;itheta<ntheta;itheta++) { //adapt the border condition double th=theta->theta[itheta]; momentum_t mom={1,th,th,th}; gauge_conf.adapt_theta(mom); //invert int niter_max=100000; inv_tmQ2_cgm(cgm_solution,gauge_conf.U,gauge_conf.kappa,mass_res->mass,nmass,niter_max,mass_res->residues,temp_source); for(int imass=0;imass<nmass;imass++) { //reconstruct the doublet reconstruct_tm_doublet(temp_reco[0],temp_reco[1],gauge_conf.U,gauge_conf.kappa,mass_res->mass[imass],cgm_solution[imass]); master_printf("Mass %d (%g) reconstructed \n",imass,mass_res->mass[imass]); //convert the id-th spincolor into the colorspinspin for(int rdest=0;rdest<nr;rdest++) put_spincolor_into_colorspinspin(S[iprop(itheta,imass,rdest)],temp_reco[(nr==2)?rdest:which_r],id); } } } //rotate if needed if(rotate_to_physical_basis) for(int itheta=0;itheta<ntheta;itheta++) for(int imass=0;imass<nmass;imass++) for(int rdest=0;rdest<nr;rdest++) //rotate opposite of D rotate_vol_colorspinspin_to_physical_basis(S[iprop(itheta,imass,rdest)],!rdest,!rdest); //free for(int imass=0;imass<nmass;imass++) nissa_free(cgm_solution[imass]); nissa_free(temp_source); for(int r=0;r<2;r++) nissa_free(temp_reco[r]); }
void structure::set_output_directory(const char *name) { char buf[300]; outdir = name; if (!quiet) master_printf("Using output directory %s/\n", name); if (readlink(symlink_name, buf, 300) > 0) { // Link already exists. unlink(symlink_name); } symlink(name, symlink_name); outdir = name; }
const char *make_output_directory(const char *exename, const char *jobname) { const int buflen = 300; char basename[buflen]; const char * const evil_suffs[] = { ".dac", ".cpp", ".cc", ".cxx", ".C" }; char stripped_name[buflen]; const char *bnp = exename; // stripped_name holds the actual name of the executable (dirs removed). const char *t; for (t=exename;*t;t++) { if (*t == '/') bnp = t+1; } snprintf(stripped_name, buflen, "%s", bnp); for (int i = 0; i < (int)(sizeof(evil_suffs) / sizeof(evil_suffs[0])); ++i) { meep::integer sufflen = strlen(evil_suffs[i]); if (strcmp(stripped_name + strlen(stripped_name) - sufflen, evil_suffs[i]) == 0 && strlen(stripped_name) > size_t(sufflen)) { stripped_name[strlen(stripped_name) - sufflen] = (char)0; break; } } char sourcename[buflen]; // Holds the "example.cpp" filename. snprintf(sourcename, buflen, "%s.cpp", stripped_name); if (jobname != NULL) { snprintf(basename, buflen, "%s", jobname); } else { snprintf(basename, buflen, "%s", stripped_name); } static char outdirname[buflen]; snprintf(outdirname, buflen, "%s-out", basename); { int i = 0; while (!is_ok_dir(outdirname)) { if (!quiet) master_printf("Output directory %s already exists!\n", outdirname); snprintf(outdirname, buflen, "%s-out-%d", basename, i++); } } char outsrcname[buflen]; snprintf(outsrcname, buflen, "%s/%s", outdirname, sourcename); cp(sourcename, outsrcname); return outdirname; }
//Adapt the border condition void adapt_theta(quad_su3 *conf,double *old_theta,double *put_theta,int putonbords,int putonedges) { momentum_t diff_theta; int adapt=0; for(int idir=0;idir<NDIM;idir++) { adapt=adapt || (old_theta[idir]!=put_theta[idir]); diff_theta[idir]=put_theta[idir]-old_theta[idir]; old_theta[idir]=put_theta[idir]; } if(adapt) { master_printf("Necessary to add boundary condition: %f %f %f %f\n",diff_theta[0],diff_theta[1],diff_theta[2],diff_theta[3]); put_boundaries_conditions(conf,diff_theta,putonbords,putonedges); } }
THREADABLE_FUNCTION_END //generate momenta using guassian hermitian matrix generator THREADABLE_FUNCTION_5ARG(generate_hmc_momenta_with_FACC, quad_su3*,H, quad_su3*,conf, rat_approx_t*,rat_exp_H, double,kappa, double,residue) { GET_THREAD_ID(); //temporary for inversion su3 *in=nissa_malloc("in",loc_vol+bord_vol,su3); su3 *out=nissa_malloc("out",loc_vol+bord_vol,su3); su3 *tmp=nissa_malloc("tmp",loc_vol+bord_vol,su3); for(int mu=0;mu<NDIM;mu++) { //fill the vector randomly NISSA_PARALLEL_LOOP(ivol,0,loc_vol) herm_put_to_gauss(in[ivol],&(loc_rnd_gen[ivol]),1); set_borders_invalid(in); //compute the norm double norm; double_vector_glb_scalar_prod(&norm,(double*)in,(double*)in,loc_vol*sizeof(su3)/sizeof(double)); //invert summ_src_and_all_inv_MFACC_cgm(out,conf,kappa,rat_exp_H,1000000,residue,in); //try to compute the norm*D inv_MFACC_cg(tmp,NULL,conf,kappa,10000000,residue,out); double norm_reco; double_vector_glb_scalar_prod(&norm_reco,(double*)out,(double*)tmp,loc_vol*sizeof(su3)/sizeof(double)); master_printf("Norm: %16.16lg, norm_reco: %16.16lg, relative error: %lg\n",sqrt(norm),sqrt(norm_reco),sqrt(norm/norm_reco)-1); //store the vector NISSA_PARALLEL_LOOP(ivol,0,loc_vol) su3_copy(H[ivol][mu],out[ivol]); set_borders_invalid(H); } nissa_free(in); nissa_free(out); nissa_free(tmp); }
//check that the number of hopping to move in each direction is <=1 void check_all_lattice_neighbours_are_spi_first_neighbours() { int max_nhop=0; for(int mu=0;mu<4;mu++) if(paral_dir[mu]) for(int bf=0;bf<2;bf++) { //compute the number of hop according manhattan metric int nhop=0; for(int idir=0;idir<5;idir++) { int coord_neigh=spi_dest_coord[bf*4+mu][idir]; int off=abs(coord_neigh-spi_rank_coord[idir]); if(spi_dir_is_torus[idir]) off=std::min(off,std::min(spi_dir_size[idir]+coord_neigh-spi_rank_coord[idir], spi_dir_size[idir]+spi_rank_coord[idir]-coord_neigh)); nhop+=off; } max_nhop=std::max(max_nhop,nhop); } if(max_nhop>1) master_printf("WARNING not all lattice-nodes are first neighbours in SPI grid (non optimal communications)\n"); }
//benchmark memory THREADABLE_FUNCTION_1ARG(bench_memory_bandwidth, int,mem_size) { //allocate double double *a=nissa_malloc("a",mem_size/sizeof(double),double); double *b=nissa_malloc("b",mem_size/sizeof(double),double); //first call to warm up bench_memory_copy(a,b,mem_size); //exec 10 times int ntests=10; double bench_time=-take_time(); for(int i=0;i<ntests;i++) bench_memory_copy(a,b,mem_size); bench_time+=take_time(); bench_time/=ntests; nissa_free(a); nissa_free(b); master_printf("time to copy %d Mbytes: %lg, %lg Mbs\n",mem_size/1024/1024, bench_time,mem_size/1024/1024/bench_time); }
void prop_group_t::write(const char *ext_template_path,int save_reconstructing,int is_rotated,gauge_conf_t &gauge_conf) { char template_path[1024]; sprintf(template_path,"%s/%s",base_out_folder,ext_template_path); int ntheta,nmass,nr; get_ntheta_mass_r(ntheta,nmass,nr); for(int itheta=0;itheta<ntheta;itheta++) for(int imass=0;imass<nmass;imass++) for(int id=0;id<4;id++) { int ivol1=8,id1=2,ic1=1,ri1=1,mu1=1; int ip0=iprop(itheta,imass,0); int ip1=iprop(itheta,imass,1); } for(int itheta=0;itheta<ntheta;itheta++) for(int imass=0;imass<nmass;imass++) if(nr==2 && save_reconstructing) { int ip0=iprop(itheta,imass,0); int ip1=iprop(itheta,imass,1); double th=theta->theta[itheta]; momentum_t mom={1,th,th,th}; gauge_conf.adapt_theta(mom); master_printf("involved: %d %d\n",ip0,ip1); write_tm_colorspinspin_anti_reconstructing(combine(template_path,ip0).c_str(),S[ip0],S[ip1],is_rotated,mass_res->mass[imass],64,gauge_conf.U,gauge_conf.kappa,gauge_conf.theta); } else for(int r=0;r<2;r++) { int ip=iprop(itheta,imass,r); write_colorspinspin(combine(template_path,ip).c_str(),S[ip],64); } }
void close_bissa() { master_printf("Closing bissa\n"); //unset lx geometry if(lx_geom_inited) unset_lx_geometry(); //unset eo geometry if(eo_geom_inited) unset_eo_geometry(); //stop the random generator if(loc_rnd_gen_inited) stop_loc_rnd_gen(); //print information over the maximum amount of memory used master_printf("Maximal memory used during the run: %d bytes (",max_required_memory); if(rank==0) fprintf_friendly_filesize(stdout,max_required_memory); master_printf(") per rank\n\n"); //check wether there are still allocated vectors if(main_vect.next!=NULL && rank==0) { printf("Warning, there are still allocated vectors:\n"); print_all_vect_content(); printf("For a total of %d bytes\n",compute_vect_memory_usage()); } tot_time+=take_time(); master_printf("Total time: %lg s\n",tot_time); #ifdef COMM_BENCH master_printf("Total communication time: %lg s\n",tot_comm_time); #endif //free thread delays pattern #if THREAD_DEBUG>=2 free(delayed_thread_barrier); free(delay_rnd_gen); #endif MPI_Barrier(MPI_COMM_WORLD); master_printf(" Ciao!\n\n"); MPI_Finalize(); }
THREADABLE_FUNCTION_END //same but with acceleration THREADABLE_FUNCTION_8ARG(evolve_momenta_and_FACC_momenta, quad_su3*,H, su3**,pi, quad_su3*,conf, su3**,phi, theory_pars_t*,theory_pars, pure_gauge_evol_pars_t*,simul, double,dt, quad_su3*,ext_F) { verbosity_lv2_master_printf("Evolving momenta and FACC momenta, dt=%lg\n",dt); quad_su3 *F=(ext_F==NULL)?nissa_malloc("F",loc_vol,quad_su3):ext_F; #ifdef DEBUG vector_reset(F); double eps=1e-5; //store initial link and compute action su3 sto; su3_copy(sto,conf[0][0]); double act_ori=pure_gauge_action(conf,*theory_pars,*simul,H,phi,pi); //store derivative su3 nu_plus,nu_minus; su3_put_to_zero(nu_plus); su3_put_to_zero(nu_minus); for(int igen=0;igen<NCOL*NCOL-1;igen++) { //prepare increment and change su3 ba; su3_prod_double(ba,gell_mann_matr[igen],eps/2); su3 exp_mod; safe_hermitian_exact_i_exponentiate(exp_mod,ba); //change -, compute action unsafe_su3_dag_prod_su3(conf[0][0],exp_mod,sto); double act_minus=pure_gauge_action(conf,*theory_pars,*simul,H,phi,pi); //change +, compute action unsafe_su3_prod_su3(conf[0][0],exp_mod,sto); double act_plus=pure_gauge_action(conf,*theory_pars,*simul,H,phi,pi); //set back everything su3_copy(conf[0][0],sto); //printf("plus: %+016.016le, ori: %+016.016le, minus: %+016.016le, eps: %lg\n",act_plus,act_ori,act_minus,eps); double gr_plus=-(act_plus-act_ori)/eps; double gr_minus=-(act_ori-act_minus)/eps; su3_summ_the_prod_idouble(nu_plus,gell_mann_matr[igen],gr_plus); su3_summ_the_prod_idouble(nu_minus,gell_mann_matr[igen],gr_minus); } //take the average su3 nu; su3_summ(nu,nu_plus,nu_minus); su3_prodassign_double(nu,0.5); vector_reset(F); #endif //compute the various contribution to the QCD force if(evolve_SU3) { //compute without TA vector_reset(F); compute_gluonic_force_lx_conf_do_not_finish(F,conf,theory_pars); summ_the_MFACC_momenta_QCD_force(F,conf,simul->kappa,pi,simul->naux_fields); summ_the_MFACC_QCD_momenta_QCD_force(F,conf,simul->kappa,100000,simul->residue,H); //finish the calculation gluonic_force_finish_computation(F,conf); evolve_lx_momenta_with_force(H,F,dt); } #ifdef DEBUG master_printf("checking TOTAL gauge force\n"); master_printf("an\n"); su3_print(F[0][0]); master_printf("nu\n"); su3_print(nu); master_printf("nu_plus\n"); su3_print(nu_plus); master_printf("nu_minus\n"); su3_print(nu_minus); //crash("anna"); #endif //evolve FACC momenta if(evolve_FACC) evolve_MFACC_momenta(pi,phi,simul->naux_fields,dt); if(ext_F==NULL) nissa_free(F); }
void corr_command_t::exec() { FILE *fout=open_file(combine("%s/%s",base_out_folder,path).c_str(),"w"); for(int ipair=0;ipair<nprop_group_pair;ipair++) { master_printf("Starting contraction of group %d/%d\n",ipair,nprop_group_pair); int ntheta1=pair_list[ipair].first->theta->ntheta; double *theta1=pair_list[ipair].first->theta->theta; int ntheta2=pair_list[ipair].second->theta->ntheta; double *theta2=pair_list[ipair].second->theta->theta; int nmass1=pair_list[ipair].first->mass_res->nmass; double *mass1=pair_list[ipair].first->mass_res->mass; double *res1=pair_list[ipair].first->mass_res->residues; int nmass2=pair_list[ipair].second->mass_res->nmass; double *mass2=pair_list[ipair].second->mass_res->mass; double *res2=pair_list[ipair].second->mass_res->residues; int ntot_contr=two_pts_corr_group->ntot_contr; int ncorr=two_pts_corr_group->ncorr; //prepare the list of contractions int source_op[ntot_contr]; int sink_op[ntot_contr]; double coeff[ntot_contr]; { int icontr=0; for(int icorr=0;icorr<ncorr;icorr++) { two_pts_corr_pars_t *corr=two_pts_corr_group->corr_list[icorr]; for(int iloc_contr=0;iloc_contr<corr->ncontr;iloc_contr++) { source_op[icontr]=corr->source_op[iloc_contr]; sink_op[icontr]=corr->sink_op[iloc_contr]; coeff[icontr]=corr->coeff[iloc_contr]; icontr++; } } } //buffer where to store all the contractions complex *buf=nissa_malloc("buf",2*ntot_contr*glb_size[0],complex); for(int itheta1=0;itheta1<ntheta1;itheta1++) for(int imass1=0;imass1<nmass1;imass1++) for(int itheta2=0;itheta2<ntheta2;itheta2++) for(int imass2=0;imass2<nmass2;imass2++) { master_fprintf(fout," # group_pair=%d, m1=%lg th1=%lg res1=%lg (reverted), m2=%lg th2=%lg res2=%lg\n\n", ipair,mass1[imass1],theta1[itheta1],res1[imass1],mass2[imass2],theta2[itheta2],res2[imass2]); //contract for(int r=0;r<2;r++) { int iprop1=pair_list[ipair].first->iprop(itheta1,imass1,r); int iprop2=pair_list[ipair].second->iprop(itheta2,imass2,r); meson_two_points_Wilson_prop(buf+r*glb_size[0]*ntot_contr,sink_op,pair_list[ipair].first->S[iprop1],source_op,pair_list[ipair].second->S[iprop2],ntot_contr); } //add the contraction to build correlation functions int icontr=0; for(int icorr=0;icorr<ncorr;icorr++) { //reset the corr complex data[glb_size[0]]; memset(data,0,sizeof(complex)*glb_size[0]); two_pts_corr_pars_t *corr=two_pts_corr_group->corr_list[icorr]; char *corr_name=corr->name; //loop on contr for(int iloc_contr=0;iloc_contr<corr->ncontr;iloc_contr++) { for(int r=0;r<2;r++) for(int t=0;t<glb_size[0];t++) complex_summ_the_prod_double(data[t],buf[t+glb_size[0]*(icontr+r*ntot_contr)],0.5*coeff[icontr]); icontr++; } master_fprintf(fout," # %s\n",corr_name); print_contraction_to_file(fout,-1,-1,data,shift,"",1); master_fprintf(fout,"\n"); } } nissa_free(buf); } if(rank==0) fclose(fout); }
static void pt(double ts[], time_sink s) { if (ts[s]) master_printf(" %18s: %g s\n", ts2n(s), ts[s]); }
/* BiCGSTAB(L) algorithm for the n-by-n problem Ax = b */ ptrdiff_t bicgstabL(const int L, const size_t n, realnum *x, bicgstab_op A, void *Adata, const realnum *b, const double tol, int *iters, realnum *work, const bool quiet) { if (!work) return (2 * L + 3) * n; // required workspace prealnum *r = new prealnum[L + 1]; prealnum *u = new prealnum[L + 1]; for (int i = 0; i <= L; ++i) { r[i] = work + i * n; u[i] = work + (L + 1 + i) * n; } double bnrm = norm2(n, b); if (bnrm == 0.0) bnrm = 1.0; int iter = 0; double last_output_wall_time = wall_time(); double *gamma = new double[L + 1]; double *gamma_p = new double[L + 1]; double *gamma_pp = new double[L + 1]; double *tau = new double[L * L]; double *sigma = new double[L + 1]; int ierr = 0; // error code to return, if any const double breaktol = 1e-30; /**** FIXME: check for breakdown conditions(?) during iteration ****/ // rtilde = r[0] = b - Ax realnum *rtilde = work + (2 * L + 2) * n; A(x, r[0], Adata); for (size_t m = 0; m < n; ++m) rtilde[m] = r[0][m] = b[m] - r[0][m]; { /* Sleipjen normalizes rtilde in his code; it seems to help slightly */ double s = 1.0 / norm2(n, rtilde); for (size_t m = 0; m < n; ++m) rtilde[m] *= s; } memset(u[0], 0, sizeof(realnum) * n); // u[0] = 0 double rho = 1.0, alpha = 0, omega = 1; double resid; while ((resid = norm2(n, r[0])) > tol * bnrm) { ++iter; if (!quiet && wall_time() > last_output_wall_time + MEEP_MIN_OUTPUT_TIME) { master_printf("residual[%d] = %g\n", iter, resid / bnrm); last_output_wall_time = wall_time(); } rho = -omega * rho; for (int j = 0; j < L; ++j) { if (fabs(rho) < breaktol) { ierr = -1; goto finish; } double rho1 = dot(n, r[j], rtilde); double beta = alpha * rho1 / rho; rho = rho1; for (int i = 0; i <= j; ++i) for (size_t m = 0; m < n; ++m) u[i][m] = r[i][m] - beta * u[i][m]; A(u[j], u[j + 1], Adata); alpha = rho / dot(n, u[j + 1], rtilde); for (int i = 0; i <= j; ++i) xpay(n, r[i], -alpha, u[i + 1]); A(r[j], r[j + 1], Adata); xpay(n, x, alpha, u[0]); } for (int j = 1; j <= L; ++j) { for (int i = 1; i < j; ++i) { int ij = (j - 1) * L + (i - 1); tau[ij] = dot(n, r[j], r[i]) / sigma[i]; xpay(n, r[j], -tau[ij], r[i]); } sigma[j] = dot(n, r[j], r[j]); gamma_p[j] = dot(n, r[0], r[j]) / sigma[j]; } omega = gamma[L] = gamma_p[L]; for (int j = L - 1; j >= 1; --j) { gamma[j] = gamma_p[j]; for (int i = j + 1; i <= L; ++i) gamma[j] -= tau[(i - 1) * L + (j - 1)] * gamma[i]; } for (int j = 1; j < L; ++j) { gamma_pp[j] = gamma[j + 1]; for (int i = j + 1; i < L; ++i) gamma_pp[j] += tau[(i - 1) * L + (j - 1)] * gamma[i + 1]; } xpay(n, x, gamma[1], r[0]); xpay(n, r[0], -gamma_p[L], r[L]); xpay(n, u[0], -gamma[L], u[L]); for (int j = 1; j < L; ++j) { /* TODO: use blas DGEMV (for L > 2) */ xpay(n, x, gamma_pp[j], r[j]); xpay(n, r[0], -gamma_p[j], r[j]); xpay(n, u[0], -gamma[j], u[j]); } if (iter == *iters) { ierr = 1; break; } } if (!quiet) master_printf("final residual = %g\n", norm2(n, r[0]) / bnrm); finish: delete[] sigma; delete[] tau; delete[] gamma_pp; delete[] gamma_p; delete[] gamma; delete[] u; delete[] r; *iters = iter; return ierr; }
// Evolve momenta according to the rooted staggered force THREADABLE_FUNCTION_7ARG(evolve_momenta_with_quark_force, quad_su3**,H, quad_su3**,conf, std::vector<std::vector<pseudofermion_t> >*,pf, theory_pars_t*,theory_pars, hmc_evol_pars_t*,simul_pars, std::vector<rat_approx_t>*,rat_appr, double,dt) { GET_THREAD_ID(); verbosity_lv2_master_printf("Evolving momenta with quark force, dt=%lg\n",dt); //allocate forces quad_su3 *F[2]={nissa_malloc("F0",loc_volh,quad_su3),nissa_malloc("F1",loc_volh,quad_su3)}; //compute the force compute_quark_force(F,conf,pf,theory_pars,rat_appr,simul_pars->md_residue); //#define DEBUG #ifdef DEBUG int par=1,ieo=1,mu=1; double eps=1e-5; //store initial link su3 sto; su3_copy(sto,conf[par][ieo][mu]); //allocate smeared conf quad_su3 *sme_conf[2]; for(int eo=0;eo<2;eo++) sme_conf[eo]=nissa_malloc("sme_conf",loc_volh+bord_volh+edge_volh,quad_su3); //compute action before double act_ori; stout_smear(sme_conf,conf,&(theory_pars->stout_pars)); rootst_eoimpr_quark_action(&act_ori,sme_conf,theory_pars->nflavs,theory_pars->backfield,pf,simul_pars); //store derivative su3 nu_plus,nu_minus; su3_put_to_zero(nu_plus); su3_put_to_zero(nu_minus); for(int igen=0;igen<NCOL*NCOL-1;igen++) { //prepare increment and change su3 ba; su3_prod_double(ba,gell_mann_matr[igen],eps/2); su3 exp_mod; safe_hermitian_exact_i_exponentiate(exp_mod,ba); //change -, compute action unsafe_su3_dag_prod_su3(conf[par][ieo][mu],exp_mod,sto); double act_minus; stout_smear(sme_conf,conf,&(theory_pars->stout_pars)); rootst_eoimpr_quark_action(&act_minus,sme_conf,theory_pars->nflavs,theory_pars->backfield,pf,simul_pars); //change +, compute action unsafe_su3_prod_su3(conf[par][ieo][mu],exp_mod,sto); double act_plus; stout_smear(sme_conf,conf,&(theory_pars->stout_pars)); rootst_eoimpr_quark_action(&act_plus,sme_conf,theory_pars->nflavs,theory_pars->backfield,pf,simul_pars); //set back everything su3_copy(conf[par][ieo][mu],sto); //printf("plus: %+016.016le, ori: %+016.016le, minus: %+016.016le, eps: %lg\n",act_plus,act_ori,act_minus,eps); double gr_plus=-(act_plus-act_ori)/eps; double gr_minus=-(act_ori-act_minus)/eps; su3_summ_the_prod_idouble(nu_plus,gell_mann_matr[igen],gr_plus); su3_summ_the_prod_idouble(nu_minus,gell_mann_matr[igen],gr_minus); } //take the average su3 nu; su3_summ(nu,nu_plus,nu_minus); su3_prodassign_double(nu,0.5); master_printf("checking pure gauge force\n"); master_printf("an\n"); su3_print(F[par][ieo][mu]); master_printf("nu\n"); su3_print(nu); master_printf("nu_plus\n"); su3_print(nu_plus); master_printf("nu_minus\n"); su3_print(nu_minus); //crash("anna"); #endif //evolve for(int par=0;par<2;par++) { NISSA_PARALLEL_LOOP(ivol,0,loc_volh) for(int mu=0;mu<NDIM;mu++) for(int ic1=0;ic1<NCOL;ic1++) for(int ic2=0;ic2<NCOL;ic2++) complex_subt_the_prod_idouble(H[par][ivol][mu][ic1][ic2],F[par][ivol][mu][ic1][ic2],dt); nissa_free(F[par]); } }
//take also the TA THREADABLE_FUNCTION_3ARG(compute_gluonic_force_lx_conf, quad_su3*,F, quad_su3*,conf, theory_pars_t*,physics) { GET_THREAD_ID(); START_TIMING(gluon_force_time,ngluon_force); #ifdef DEBUG vector_reset(F); double eps=1e-5; //store initial link and compute action su3 sto; su3_copy(sto,conf[0][0]); double act_ori; gluonic_action(&act_ori,conf,physics->gauge_action_name,physics->beta); //store derivative su3 nu_plus,nu_minus; su3_put_to_zero(nu_plus); su3_put_to_zero(nu_minus); for(int igen=0;igen<NCOL*NCOL-1;igen++) { //prepare increment and change su3 ba; su3_prod_double(ba,gell_mann_matr[igen],eps/2); su3 exp_mod; safe_hermitian_exact_i_exponentiate(exp_mod,ba); //change -, compute action unsafe_su3_dag_prod_su3(conf[0][0],exp_mod,sto); double act_minus; gluonic_action(&act_minus,conf,physics->gauge_action_name,physics->beta); //change +, compute action unsafe_su3_prod_su3(conf[0][0],exp_mod,sto); double act_plus; gluonic_action(&act_plus,conf,physics->gauge_action_name,physics->beta); //set back everything su3_copy(conf[0][0],sto); //printf("plus: %+016.016le, ori: %+016.016le, minus: %+016.016le, eps: %lg\n",act_plus,act_ori,act_minus,eps); double gr_plus=-(act_plus-act_ori)/eps; double gr_minus=-(act_ori-act_minus)/eps; su3_summ_the_prod_idouble(nu_plus,gell_mann_matr[igen],gr_plus); su3_summ_the_prod_idouble(nu_minus,gell_mann_matr[igen],gr_minus); } //take the average su3 nu; su3_summ(nu,nu_plus,nu_minus); su3_prodassign_double(nu,0.5); vector_reset(F); #endif compute_gluonic_force_lx_conf_do_not_finish(F,conf,physics); //finish the calculation gluonic_force_finish_computation(F,conf); #ifdef DEBUG master_printf("checking pure gauge force\n"); master_printf("an\n"); su3_print(F[0][0]); master_printf("nu\n"); su3_print(nu); master_printf("nu_plus\n"); su3_print(nu_plus); master_printf("nu_minus\n"); su3_print(nu_minus); //crash("anna"); #endif //print the intensity of the force if(VERBOSITY_LV2) { double norm=0; norm+=double_vector_glb_norm2(F,loc_vol); master_printf(" Gluonic force average norm: %lg\n",sqrt(norm/glb_vol)); } STOP_TIMING(gluon_force_time); }
void gauge_conf_t::ape_smear(ape_smear_pars_t &ape_smear_pars) { ape_spatial_smear_conf(U,U,ape_smear_pars.alpha,ape_smear_pars.niter); master_printf("smerded plaq: %.18g\n",global_plaquette_lx_conf(U)); }
void fields::print_times() { master_printf("\nField time usage:\n"); for (int i=0;i<=Other;i++) pt(times_spent, (time_sink) i); master_printf("\n"); }
void fields::step() { // however many times the fields have been synched, we want to restore now int save_synchronized_magnetic_fields = synchronized_magnetic_fields; if (synchronized_magnetic_fields) { synchronized_magnetic_fields = 1; // reset synchronization count restore_magnetic_fields(); } am_now_working_on(Stepping); if (!t) { last_step_output_wall_time = wall_time(); last_step_output_t = t; } if (!quiet && wall_time() > last_step_output_wall_time + MIN_OUTPUT_TIME) { master_printf("on time step %d (time=%g), %g s/step\n", t, time(), (wall_time() - last_step_output_wall_time) / (t - last_step_output_t)); if (save_synchronized_magnetic_fields) master_printf(" (doing expensive timestepping of synched fields)\n"); last_step_output_wall_time = wall_time(); last_step_output_t = t; } phase_material(); // update cached conductivity-inverse array, if needed for (int i=0;i<num_chunks;i++) chunks[i]->s->update_condinv(); calc_sources(time()); // for B sources step_db(B_stuff); step_source(B_stuff); step_boundaries(B_stuff); calc_sources(time() + 0.5*dt); // for integrated H sources update_eh(H_stuff); step_boundaries(WH_stuff); update_pols(H_stuff); step_boundaries(PH_stuff); step_boundaries(H_stuff); if (fluxes) fluxes->update_half(); calc_sources(time() + 0.5*dt); // for D sources step_db(D_stuff); step_source(D_stuff); step_boundaries(D_stuff); calc_sources(time() + dt); // for integrated E sources update_eh(E_stuff); step_boundaries(WE_stuff); update_pols(E_stuff); step_boundaries(PE_stuff); step_boundaries(E_stuff); if (fluxes) fluxes->update(); t += 1; update_dfts(); finished_working(); // re-synch magnetic fields if they were previously synchronized if (save_synchronized_magnetic_fields) { synchronize_magnetic_fields(); synchronized_magnetic_fields = save_synchronized_magnetic_fields; } }