int main(int argc, char *argv[]) { FILE *parameterfile = NULL; int c, j, i, ix = 0, isample = 0, op_id = 0; char * filename = NULL; char datafilename[50]; char parameterfilename[50]; char conf_filename[50]; char * input_filename = NULL; double plaquette_energy; struct stout_parameters params_smear; spinor **s, *s_; #ifdef _KOJAK_INST #pragma pomp inst init #pragma pomp inst begin(main) #endif #if (defined SSE || defined SSE2 || SSE3) signal(SIGILL, &catch_ill_inst); #endif DUM_DERI = 8; DUM_MATRIX = DUM_DERI + 5; #if ((defined BGL && defined XLC) || defined _USE_TSPLITPAR) NO_OF_SPINORFIELDS = DUM_MATRIX + 3; #else NO_OF_SPINORFIELDS = DUM_MATRIX + 3; #endif verbose = 0; g_use_clover_flag = 0; #ifdef MPI # ifdef OMP int mpi_thread_provided; MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_thread_provided); # else MPI_Init(&argc, &argv); # endif MPI_Comm_rank(MPI_COMM_WORLD, &g_proc_id); #else g_proc_id = 0; #endif while ((c = getopt(argc, argv, "h?vVf:o:")) != -1) { switch (c) { case 'f': input_filename = calloc(200, sizeof(char)); strcpy(input_filename, optarg); break; case 'o': filename = calloc(200, sizeof(char)); strcpy(filename, optarg); break; case 'v': verbose = 1; break; case 'V': fprintf(stdout,"%s %s\n",PACKAGE_STRING,git_hash); exit(0); break; case 'h': case '?': default: usage(); break; } } if (input_filename == NULL) { input_filename = "invert.input"; } if (filename == NULL) { filename = "output"; } /* Read the input file */ if( (j = read_input(input_filename)) != 0) { fprintf(stderr, "Could not find input file: %s\nAborting...\n", input_filename); exit(-1); } #ifdef OMP if(omp_num_threads > 0) { omp_set_num_threads(omp_num_threads); } else { if( g_proc_id == 0 ) printf("# No value provided for OmpNumThreads, running in single-threaded mode!\n"); omp_num_threads = 1; omp_set_num_threads(omp_num_threads); } init_omp_accumulators(omp_num_threads); #endif /* this DBW2 stuff is not needed for the inversion ! */ if (g_dflgcr_flag == 1) { even_odd_flag = 0; } g_rgi_C1 = 0; if (Nsave == 0) { Nsave = 1; } if (g_running_phmc) { NO_OF_SPINORFIELDS = DUM_MATRIX + 8; } tmlqcd_mpi_init(argc, argv); g_dbw2rand = 0; /* starts the single and double precision random number */ /* generator */ start_ranlux(rlxd_level, random_seed); /* we need to make sure that we don't have even_odd_flag = 1 */ /* if any of the operators doesn't use it */ /* in this way even/odd can still be used by other operators */ for(j = 0; j < no_operators; j++) if(!operator_list[j].even_odd_flag) even_odd_flag = 0; #ifndef MPI g_dbw2rand = 0; #endif #ifdef _GAUGE_COPY j = init_gauge_field(VOLUMEPLUSRAND, 1); #else j = init_gauge_field(VOLUMEPLUSRAND, 0); #endif if (j != 0) { fprintf(stderr, "Not enough memory for gauge_fields! Aborting...\n"); exit(-1); } j = init_geometry_indices(VOLUMEPLUSRAND); if (j != 0) { fprintf(stderr, "Not enough memory for geometry indices! Aborting...\n"); exit(-1); } if (no_monomials > 0) { if (even_odd_flag) { j = init_monomials(VOLUMEPLUSRAND / 2, even_odd_flag); } else { j = init_monomials(VOLUMEPLUSRAND, even_odd_flag); } if (j != 0) { fprintf(stderr, "Not enough memory for monomial pseudo fermion fields! Aborting...\n"); exit(-1); } } if (even_odd_flag) { j = init_spinor_field(VOLUMEPLUSRAND / 2, NO_OF_SPINORFIELDS); } else { j = init_spinor_field(VOLUMEPLUSRAND, NO_OF_SPINORFIELDS); } if (j != 0) { fprintf(stderr, "Not enough memory for spinor fields! Aborting...\n"); exit(-1); } if (g_running_phmc) { j = init_chi_spinor_field(VOLUMEPLUSRAND / 2, 20); if (j != 0) { fprintf(stderr, "Not enough memory for PHMC Chi fields! Aborting...\n"); exit(-1); } } g_mu = g_mu1; if (g_cart_id == 0) { /*construct the filenames for the observables and the parameters*/ strcpy(datafilename, filename); strcat(datafilename, ".data"); strcpy(parameterfilename, filename); strcat(parameterfilename, ".para"); parameterfile = fopen(parameterfilename, "w"); write_first_messages(parameterfile, 1); fclose(parameterfile); } /* define the geometry */ geometry(); /* define the boundary conditions for the fermion fields */ boundary(g_kappa); phmc_invmaxev = 1.; init_operators(); /* this could be maybe moved to init_operators */ #ifdef _USE_HALFSPINOR j = init_dirac_halfspinor(); if (j != 0) { fprintf(stderr, "Not enough memory for halffield! Aborting...\n"); exit(-1); } if (g_sloppy_precision_flag == 1) { j = init_dirac_halfspinor32(); if (j != 0) { fprintf(stderr, "Not enough memory for 32-bit halffield! Aborting...\n"); exit(-1); } } # if (defined _PERSISTENT) if (even_odd_flag) init_xchange_halffield(); # endif #endif for (j = 0; j < Nmeas; j++) { sprintf(conf_filename, "%s.%.4d", gauge_input_filename, nstore); if (g_cart_id == 0) { printf("#\n# Trying to read gauge field from file %s in %s precision.\n", conf_filename, (gauge_precision_read_flag == 32 ? "single" : "double")); fflush(stdout); } if( (i = read_gauge_field(conf_filename)) !=0) { fprintf(stderr, "Error %d while reading gauge field from %s\n Aborting...\n", i, conf_filename); exit(-2); } if (g_cart_id == 0) { printf("# Finished reading gauge field.\n"); fflush(stdout); } #ifdef MPI xchange_gauge(g_gauge_field); #endif /*compute the energy of the gauge field*/ plaquette_energy = measure_gauge_action( (const su3**) g_gauge_field); if (g_cart_id == 0) { printf("# The computed plaquette value is %e.\n", plaquette_energy / (6.*VOLUME*g_nproc)); fflush(stdout); } if (use_stout_flag == 1){ params_smear.rho = stout_rho; params_smear.iterations = stout_no_iter; /* if (stout_smear((su3_tuple*)(g_gauge_field[0]), ¶ms_smear, (su3_tuple*)(g_gauge_field[0])) != 0) */ /* exit(1) ; */ g_update_gauge_copy = 1; g_update_gauge_energy = 1; g_update_rectangle_energy = 1; plaquette_energy = measure_gauge_action( (const su3**) g_gauge_field); if (g_cart_id == 0) { printf("# The plaquette value after stouting is %e\n", plaquette_energy / (6.*VOLUME*g_nproc)); fflush(stdout); } } if (reweighting_flag == 1) { reweighting_factor(reweighting_samples, nstore); } /* Compute minimal eigenvalues, if wanted */ if (compute_evs != 0) { eigenvalues(&no_eigenvalues, 5000, eigenvalue_precision, 0, compute_evs, nstore, even_odd_flag); } if (phmc_compute_evs != 0) { #ifdef MPI MPI_Finalize(); #endif return(0); } /* Compute the mode number or topological susceptibility using spectral projectors, if wanted*/ if(compute_modenumber != 0 || compute_topsus !=0){ s_ = calloc(no_sources_z2*VOLUMEPLUSRAND+1, sizeof(spinor)); s = calloc(no_sources_z2, sizeof(spinor*)); if(s_ == NULL) { printf("Not enough memory in %s: %d",__FILE__,__LINE__); exit(42); } if(s == NULL) { printf("Not enough memory in %s: %d",__FILE__,__LINE__); exit(42); } for(i = 0; i < no_sources_z2; i++) { #if (defined SSE3 || defined SSE2 || defined SSE) s[i] = (spinor*)(((unsigned long int)(s_)+ALIGN_BASE)&~ALIGN_BASE)+i*VOLUMEPLUSRAND; #else s[i] = s_+i*VOLUMEPLUSRAND; #endif z2_random_spinor_field(s[i], VOLUME); /* what is this here needed for?? */ /* spinor *aux_,*aux; */ /* #if ( defined SSE || defined SSE2 || defined SSE3 ) */ /* aux_=calloc(VOLUMEPLUSRAND+1, sizeof(spinor)); */ /* aux = (spinor *)(((unsigned long int)(aux_)+ALIGN_BASE)&~ALIGN_BASE); */ /* #else */ /* aux_=calloc(VOLUMEPLUSRAND, sizeof(spinor)); */ /* aux = aux_; */ /* #endif */ if(g_proc_id == 0) { printf("source %d \n", i); } if(compute_modenumber != 0){ mode_number(s[i], mstarsq); } if(compute_topsus !=0) { top_sus(s[i], mstarsq); } } free(s); free(s_); } /* move to operators as well */ if (g_dflgcr_flag == 1) { /* set up deflation blocks */ init_blocks(nblocks_t, nblocks_x, nblocks_y, nblocks_z); /* the can stay here for now, but later we probably need */ /* something like init_dfl_solver called somewhere else */ /* create set of approximate lowest eigenvectors ("global deflation subspace") */ /* g_mu = 0.; */ /* boundary(0.125); */ generate_dfl_subspace(g_N_s, VOLUME); /* boundary(g_kappa); */ /* g_mu = g_mu1; */ /* Compute little Dirac operators */ /* alt_block_compute_little_D(); */ if (g_debug_level > 0) { check_projectors(); check_local_D(); } if (g_debug_level > 1) { check_little_D_inversion(); } } if(SourceInfo.type == 1) { index_start = 0; index_end = 1; } g_precWS=NULL; if(use_preconditioning == 1){ /* todo load fftw wisdom */ #if (defined HAVE_FFTW ) && !( defined MPI) loadFFTWWisdom(g_spinor_field[0],g_spinor_field[1],T,LX); #else use_preconditioning=0; #endif } if (g_cart_id == 0) { fprintf(stdout, "#\n"); /*Indicate starting of the operator part*/ } for(op_id = 0; op_id < no_operators; op_id++) { boundary(operator_list[op_id].kappa); g_kappa = operator_list[op_id].kappa; g_mu = 0.; if(use_preconditioning==1 && PRECWSOPERATORSELECT[operator_list[op_id].solver]!=PRECWS_NO ){ printf("# Using preconditioning with treelevel preconditioning operator: %s \n", precWSOpToString(PRECWSOPERATORSELECT[operator_list[op_id].solver])); /* initial preconditioning workspace */ operator_list[op_id].precWS=(spinorPrecWS*)malloc(sizeof(spinorPrecWS)); spinorPrecWS_Init(operator_list[op_id].precWS, operator_list[op_id].kappa, operator_list[op_id].mu/2./operator_list[op_id].kappa, -(0.5/operator_list[op_id].kappa-4.), PRECWSOPERATORSELECT[operator_list[op_id].solver]); g_precWS = operator_list[op_id].precWS; if(PRECWSOPERATORSELECT[operator_list[op_id].solver] == PRECWS_D_DAGGER_D) { fitPrecParams(op_id); } } for(isample = 0; isample < no_samples; isample++) { for (ix = index_start; ix < index_end; ix++) { if (g_cart_id == 0) { fprintf(stdout, "#\n"); /*Indicate starting of new index*/ } /* we use g_spinor_field[0-7] for sources and props for the moment */ /* 0-3 in case of 1 flavour */ /* 0-7 in case of 2 flavours */ prepare_source(nstore, isample, ix, op_id, read_source_flag, source_location); operator_list[op_id].inverter(op_id, index_start); } } if(use_preconditioning==1 && operator_list[op_id].precWS!=NULL ){ /* free preconditioning workspace */ spinorPrecWS_Free(operator_list[op_id].precWS); free(operator_list[op_id].precWS); } if(operator_list[op_id].type == OVERLAP){ free_Dov_WS(); } } nstore += Nsave; } #ifdef MPI MPI_Finalize(); #endif #ifdef OMP free_omp_accumulators(); #endif free_blocks(); free_dfl_subspace(); free_gauge_field(); free_geometry_indices(); free_spinor_field(); free_moment_field(); free_chi_spinor_field(); return(0); #ifdef _KOJAK_INST #pragma pomp inst end(main) #endif }
int main(int argc,char *argv[]) { int j,j_max,k,k_max = 2; paramsXlfInfo *xlfInfo; int ix, n, *nn,*mm,i; double delta, deltamax; spinor rsp; int status = 0; #ifdef MPI DUM_DERI = 6; DUM_SOLVER = DUM_DERI+2; DUM_MATRIX = DUM_SOLVER+6; NO_OF_SPINORFIELDS = DUM_MATRIX+2; MPI_Init(&argc, &argv); #endif g_rgi_C1 = 1.; /* Read the input file */ read_input("hopping_test.input"); tmlqcd_mpi_init(argc, argv); if(g_proc_id==0) { #ifdef SSE printf("# The code was compiled with SSE instructions\n"); #endif #ifdef SSE2 printf("# The code was compiled with SSE2 instructions\n"); #endif #ifdef SSE3 printf("# The code was compiled with SSE3 instructions\n"); #endif #ifdef P4 printf("# The code was compiled for Pentium4\n"); #endif #ifdef OPTERON printf("# The code was compiled for AMD Opteron\n"); #endif #ifdef _GAUGE_COPY printf("# The code was compiled with -D_GAUGE_COPY\n"); #endif #ifdef BGL printf("# The code was compiled for Blue Gene/L\n"); #endif #ifdef BGP printf("# The code was compiled for Blue Gene/P\n"); #endif #ifdef _USE_HALFSPINOR printf("# The code was compiled with -D_USE_HALFSPINOR\n"); #endif #ifdef _USE_SHMEM printf("# the code was compiled with -D_USE_SHMEM\n"); # ifdef _PERSISTENT printf("# the code was compiled for persistent MPI calls (halfspinor only)\n"); # endif #endif #ifdef _INDEX_INDEP_GEOM printf("# the code was compiled with index independent geometry\n"); #endif #ifdef MPI # ifdef _NON_BLOCKING printf("# the code was compiled for non-blocking MPI calls (spinor and gauge)\n"); # endif # ifdef _USE_TSPLITPAR printf("# the code was compiled with tsplit parallelization\n"); # endif #endif printf("\n"); fflush(stdout); } #ifdef _GAUGE_COPY init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 1); #else init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 0); #endif init_geometry_indices(VOLUMEPLUSRAND + g_dbw2rand); if(even_odd_flag) { j = init_spinor_field(VOLUMEPLUSRAND/2, 2*k_max+1); } else { j = init_spinor_field(VOLUMEPLUSRAND, 2*k_max); } if ( j!= 0) { fprintf(stderr, "Not enough memory for spinor fields! Aborting...\n"); exit(0); } j = init_moment_field(VOLUME, VOLUMEPLUSRAND); if ( j!= 0) { fprintf(stderr, "Not enough memory for moment fields! Aborting...\n"); exit(0); } if(g_proc_id == 0) { fprintf(stdout,"The number of processes is %d \n",g_nproc); printf("# The lattice size is %d x %d x %d x %d\n", (int)(T*g_nproc_t), (int)(LX*g_nproc_x), (int)(LY*g_nproc_y), (int)(g_nproc_z*LZ)); printf("# The local lattice size is %d x %d x %d x %d\n", (int)(T), (int)(LX), (int)(LY),(int) LZ); if(even_odd_flag) { printf("# testinging the even/odd preconditioned Dirac operator\n"); } else { printf("# testinging the standard Dirac operator\n"); } fflush(stdout); } /* define the geometry */ geometry(); /* define the boundary conditions for the fermion fields */ boundary(g_kappa); #ifdef _USE_HALFSPINOR j = init_dirac_halfspinor(); if ( j!= 0) { fprintf(stderr, "Not enough memory for halfspinor fields! Aborting...\n"); exit(0); } if(g_sloppy_precision_flag == 1) { g_sloppy_precision = 1; j = init_dirac_halfspinor32(); if ( j!= 0) { fprintf(stderr, "Not enough memory for 32-Bit halfspinor fields! Aborting...\n"); exit(0); } } # if (defined _PERSISTENT) init_xchange_halffield(); # endif #endif status = check_geometry(); if (status != 0) { fprintf(stderr, "Checking of geometry failed. Unable to proceed.\nAborting....\n"); exit(1); } #if (defined MPI && !(defined _USE_SHMEM)) check_xchange(); #endif start_ranlux(1, 123456); xlfInfo = construct_paramsXlfInfo(0.5, 0); random_gauge_field(reproduce_randomnumber_flag); if ( startoption == 2 ) { /* restart */ write_gauge_field(gauge_input_filename,gauge_precision_write_flag,xlfInfo); } else if ( startoption == 0 ) { /* cold */ unit_g_gauge_field(); } else if (startoption == 3 ) { /* continue */ read_gauge_field(gauge_input_filename); } else if ( startoption == 1 ) { /* hot */ } #ifdef MPI /*For parallelization: exchange the gaugefield */ xchange_gauge(); #endif #ifdef _GAUGE_COPY update_backward_gauge(); #endif if(even_odd_flag) { /*initialize the pseudo-fermion fields*/ j_max=1; for (k = 0; k < k_max; k++) { random_spinor_field(g_spinor_field[k], VOLUME/2, 0); } if (read_source_flag == 2) { /* save */ /* even first, odd second */ write_spinorfield_cm_single(g_spinor_field[0],g_spinor_field[1],SourceInfo.basename); } else if (read_source_flag == 1) { /* yes */ /* even first, odd second */ read_spinorfield_cm_single(g_spinor_field[0],g_spinor_field[1],SourceInfo.basename,-1,0); # if (!defined MPI) if (write_cp_flag == 1) { strcat(SourceInfo.basename,".2"); read_spinorfield_cm_single(g_spinor_field[2],g_spinor_field[3],SourceInfo.basename,-1,0); nn=(int*)calloc(VOLUME,sizeof(int)); if((void*)nn == NULL) return(100); mm=(int*)calloc(VOLUME,sizeof(int)); if((void*)mm == NULL) return(100); n=0; deltamax=0.0; for(ix=0;ix<VOLUME/2;ix++){ (rsp.s0).c0.re = (g_spinor_field[2][ix].s0).c0.re - (g_spinor_field[0][ix].s0).c0.re; (rsp.s0).c0.im = (g_spinor_field[2][ix].s0).c0.im - (g_spinor_field[0][ix].s0).c0.im; (rsp.s0).c1.re = (g_spinor_field[2][ix].s0).c1.re - (g_spinor_field[0][ix].s0).c1.re; (rsp.s0).c1.im = (g_spinor_field[2][ix].s0).c1.im - (g_spinor_field[0][ix].s0).c1.im; (rsp.s0).c2.re = (g_spinor_field[2][ix].s0).c2.re - (g_spinor_field[0][ix].s0).c2.re; (rsp.s0).c2.im = (g_spinor_field[2][ix].s0).c2.im - (g_spinor_field[0][ix].s0).c2.im; (rsp.s1).c0.re = (g_spinor_field[2][ix].s1).c0.re - (g_spinor_field[0][ix].s1).c0.re; (rsp.s1).c0.im = (g_spinor_field[2][ix].s1).c0.im - (g_spinor_field[0][ix].s1).c0.im; (rsp.s1).c1.re = (g_spinor_field[2][ix].s1).c1.re - (g_spinor_field[0][ix].s1).c1.re; (rsp.s1).c1.im = (g_spinor_field[2][ix].s1).c1.im - (g_spinor_field[0][ix].s1).c1.im; (rsp.s1).c2.re = (g_spinor_field[2][ix].s1).c2.re - (g_spinor_field[0][ix].s1).c2.re; (rsp.s1).c2.im = (g_spinor_field[2][ix].s1).c2.im - (g_spinor_field[0][ix].s1).c2.im; (rsp.s2).c0.re = (g_spinor_field[2][ix].s2).c0.re - (g_spinor_field[0][ix].s2).c0.re; (rsp.s2).c0.im = (g_spinor_field[2][ix].s2).c0.im - (g_spinor_field[0][ix].s2).c0.im; (rsp.s2).c1.re = (g_spinor_field[2][ix].s2).c1.re - (g_spinor_field[0][ix].s2).c1.re; (rsp.s2).c1.im = (g_spinor_field[2][ix].s2).c1.im - (g_spinor_field[0][ix].s2).c1.im; (rsp.s2).c2.re = (g_spinor_field[2][ix].s2).c2.re - (g_spinor_field[0][ix].s2).c2.re; (rsp.s2).c2.im = (g_spinor_field[2][ix].s2).c2.im - (g_spinor_field[0][ix].s2).c2.im; (rsp.s3).c0.re = (g_spinor_field[2][ix].s3).c0.re - (g_spinor_field[0][ix].s3).c0.re; (rsp.s3).c0.im = (g_spinor_field[2][ix].s3).c0.im - (g_spinor_field[0][ix].s3).c0.im; (rsp.s3).c1.re = (g_spinor_field[2][ix].s3).c1.re - (g_spinor_field[0][ix].s3).c1.re; (rsp.s3).c1.im = (g_spinor_field[2][ix].s3).c1.im - (g_spinor_field[0][ix].s3).c1.im; (rsp.s3).c2.re = (g_spinor_field[2][ix].s3).c2.re - (g_spinor_field[0][ix].s3).c2.re; (rsp.s3).c2.im = (g_spinor_field[2][ix].s3).c2.im - (g_spinor_field[0][ix].s3).c2.im; _spinor_norm_sq(delta,rsp); if (delta > 1.0e-12) { nn[n] = g_eo2lexic[ix]; mm[n]=ix; n++; } if(delta>deltamax) deltamax=delta; } if (n>0){ printf("mismatch in even spincolorfield in %d points:\n",n); for(i=0; i< MIN(n,1000); i++){ printf("%d,(%d,%d,%d,%d):%f vs. %f\n",nn[i],g_coord[nn[i]][0],g_coord[nn[i]][1],g_coord[nn[i]][2],g_coord[nn[i]][3],(g_spinor_field[2][mm[i]].s0).c0.re, (g_spinor_field[0][mm[i]].s0).c0.re);fflush(stdout); } } n = 0; for(ix=0;ix<VOLUME/2;ix++){ (rsp.s0).c0.re = (g_spinor_field[3][ix].s0).c0.re - (g_spinor_field[1][ix].s0).c0.re; (rsp.s0).c0.im = (g_spinor_field[3][ix].s0).c0.im - (g_spinor_field[1][ix].s0).c0.im; (rsp.s0).c1.re = (g_spinor_field[3][ix].s0).c1.re - (g_spinor_field[1][ix].s0).c1.re; (rsp.s0).c1.im = (g_spinor_field[3][ix].s0).c1.im - (g_spinor_field[1][ix].s0).c1.im; (rsp.s0).c2.re = (g_spinor_field[3][ix].s0).c2.re - (g_spinor_field[1][ix].s0).c2.re; (rsp.s0).c2.im = (g_spinor_field[3][ix].s0).c2.im - (g_spinor_field[1][ix].s0).c2.im; (rsp.s1).c0.re = (g_spinor_field[3][ix].s1).c0.re - (g_spinor_field[1][ix].s1).c0.re; (rsp.s1).c0.im = (g_spinor_field[3][ix].s1).c0.im - (g_spinor_field[1][ix].s1).c0.im; (rsp.s1).c1.re = (g_spinor_field[3][ix].s1).c1.re - (g_spinor_field[1][ix].s1).c1.re; (rsp.s1).c1.im = (g_spinor_field[3][ix].s1).c1.im - (g_spinor_field[1][ix].s1).c1.im; (rsp.s1).c2.re = (g_spinor_field[3][ix].s1).c2.re - (g_spinor_field[1][ix].s1).c2.re; (rsp.s1).c2.im = (g_spinor_field[3][ix].s1).c2.im - (g_spinor_field[1][ix].s1).c2.im; (rsp.s2).c0.re = (g_spinor_field[3][ix].s2).c0.re - (g_spinor_field[1][ix].s2).c0.re; (rsp.s2).c0.im = (g_spinor_field[3][ix].s2).c0.im - (g_spinor_field[1][ix].s2).c0.im; (rsp.s2).c1.re = (g_spinor_field[3][ix].s2).c1.re - (g_spinor_field[1][ix].s2).c1.re; (rsp.s2).c1.im = (g_spinor_field[3][ix].s2).c1.im - (g_spinor_field[1][ix].s2).c1.im; (rsp.s2).c2.re = (g_spinor_field[3][ix].s2).c2.re - (g_spinor_field[1][ix].s2).c2.re; (rsp.s2).c2.im = (g_spinor_field[3][ix].s2).c2.im - (g_spinor_field[1][ix].s2).c2.im; (rsp.s3).c0.re = (g_spinor_field[3][ix].s3).c0.re - (g_spinor_field[1][ix].s3).c0.re; (rsp.s3).c0.im = (g_spinor_field[3][ix].s3).c0.im - (g_spinor_field[1][ix].s3).c0.im; (rsp.s3).c1.re = (g_spinor_field[3][ix].s3).c1.re - (g_spinor_field[1][ix].s3).c1.re; (rsp.s3).c1.im = (g_spinor_field[3][ix].s3).c1.im - (g_spinor_field[1][ix].s3).c1.im; (rsp.s3).c2.re = (g_spinor_field[3][ix].s3).c2.re - (g_spinor_field[1][ix].s3).c2.re; (rsp.s3).c2.im = (g_spinor_field[3][ix].s3).c2.im - (g_spinor_field[1][ix].s3).c2.im; _spinor_norm_sq(delta,rsp); if (delta > 1.0e-12) { nn[n]=g_eo2lexic[ix+(VOLUME+RAND)/2]; mm[n]=ix; n++; } if(delta>deltamax) deltamax=delta; } if (n>0){ printf("mismatch in odd spincolorfield in %d points:\n",n); for(i=0; i< MIN(n,1000); i++){ printf("%d,(%d,%d,%d,%d):%f vs. %f\n",nn[i],g_coord[nn[i]][0],g_coord[nn[i]][1],g_coord[nn[i]][2],g_coord[nn[i]][3],(g_spinor_field[3][mm[i]].s0).c0.re, (g_spinor_field[1][mm[i]].s0).c0.re);fflush(stdout); } } printf("max delta=%e",deltamax);fflush(stdout); } # endif } if (read_source_flag > 0 && write_cp_flag == 0) { /* read-source yes or nobutsave; checkpoint no */ /* first spinorial arg is output, the second is input */ Hopping_Matrix(1, g_spinor_field[1], g_spinor_field[0]); /*ieo=1 M_{eo}*/ Hopping_Matrix(0, g_spinor_field[0], g_spinor_field[1]); /*ieo=0 M_{oe}*/ strcat(SourceInfo.basename,".out"); write_spinorfield_cm_single(g_spinor_field[0],g_spinor_field[1],SourceInfo.basename); printf("Check-field printed. Exiting...\n"); fflush(stdout); } #ifdef MPI MPI_Barrier(MPI_COMM_WORLD); MPI_Finalize(); #endif } free_gauge_field(); free_geometry_indices(); free_spinor_field(); free_moment_field(); return(0); }
int main(int argc,char *argv[]) { #ifdef _USE_HALFSPINOR #undef _USE_HALFSPINOR printf("# WARNING: USE_HALFSPINOR will be ignored (not supported here).\n"); #endif if(even_odd_flag) { even_odd_flag=0; printf("# WARNING: even_odd_flag will be ignored (not supported here).\n"); } int j,j_max,k,k_max = 1; #ifdef HAVE_LIBLEMON paramsXlfInfo *xlfInfo; #endif int status = 0; static double t1,t2,dt,sdt,dts,qdt,sqdt; double antioptaway=0.0; #ifdef MPI static double dt2; DUM_DERI = 6; DUM_SOLVER = DUM_DERI+2; DUM_MATRIX = DUM_SOLVER+6; NO_OF_SPINORFIELDS = DUM_MATRIX+2; # ifdef OMP int mpi_thread_provided; MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_thread_provided); # else MPI_Init(&argc, &argv); # endif MPI_Comm_rank(MPI_COMM_WORLD, &g_proc_id); #else g_proc_id = 0; #endif g_rgi_C1 = 1.; /* Read the input file */ if((status = read_input("test_Dslash.input")) != 0) { fprintf(stderr, "Could not find input file: test_Dslash.input\nAborting...\n"); exit(-1); } #ifdef OMP init_openmp(); #endif tmlqcd_mpi_init(argc, argv); if(g_proc_id==0) { #ifdef SSE printf("# The code was compiled with SSE instructions\n"); #endif #ifdef SSE2 printf("# The code was compiled with SSE2 instructions\n"); #endif #ifdef SSE3 printf("# The code was compiled with SSE3 instructions\n"); #endif #ifdef P4 printf("# The code was compiled for Pentium4\n"); #endif #ifdef OPTERON printf("# The code was compiled for AMD Opteron\n"); #endif #ifdef _GAUGE_COPY printf("# The code was compiled with -D_GAUGE_COPY\n"); #endif #ifdef BGL printf("# The code was compiled for Blue Gene/L\n"); #endif #ifdef BGP printf("# The code was compiled for Blue Gene/P\n"); #endif #ifdef _USE_HALFSPINOR printf("# The code was compiled with -D_USE_HALFSPINOR\n"); #endif #ifdef _USE_SHMEM printf("# The code was compiled with -D_USE_SHMEM\n"); # ifdef _PERSISTENT printf("# The code was compiled for persistent MPI calls (halfspinor only)\n"); # endif #endif #ifdef MPI # ifdef _NON_BLOCKING printf("# The code was compiled for non-blocking MPI calls (spinor and gauge)\n"); # endif #endif printf("\n"); fflush(stdout); } #ifdef _GAUGE_COPY init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 1); #else init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 0); #endif init_geometry_indices(VOLUMEPLUSRAND + g_dbw2rand); if(even_odd_flag) { j = init_spinor_field(VOLUMEPLUSRAND/2, 2*k_max+1); } else { j = init_spinor_field(VOLUMEPLUSRAND, 2*k_max); } if ( j!= 0) { fprintf(stderr, "Not enough memory for spinor fields! Aborting...\n"); exit(0); } j = init_moment_field(VOLUME, VOLUMEPLUSRAND + g_dbw2rand); if ( j!= 0) { fprintf(stderr, "Not enough memory for moment fields! Aborting...\n"); exit(0); } if(g_proc_id == 0) { fprintf(stdout,"# The number of processes is %d \n",g_nproc); printf("# The lattice size is %d x %d x %d x %d\n", (int)(T*g_nproc_t), (int)(LX*g_nproc_x), (int)(LY*g_nproc_y), (int)(g_nproc_z*LZ)); printf("# The local lattice size is %d x %d x %d x %d\n", (int)(T), (int)(LX), (int)(LY),(int) LZ); // if(even_odd_flag) { // printf("# benchmarking the even/odd preconditioned Dirac operator\n"); // } // else { // printf("# benchmarking the standard Dirac operator\n"); // } fflush(stdout); } /* define the geometry */ geometry(); /* define the boundary conditions for the fermion fields */ boundary(g_kappa); #ifdef _USE_HALFSPINOR j = init_dirac_halfspinor(); if ( j!= 0) { fprintf(stderr, "Not enough memory for halfspinor fields! Aborting...\n"); exit(0); } if(g_sloppy_precision_flag == 1) { g_sloppy_precision = 1; j = init_dirac_halfspinor32(); if ( j!= 0) { fprintf(stderr, "Not enough memory for 32-Bit halfspinor fields! Aborting...\n"); exit(0); } } # if (defined _PERSISTENT) init_xchange_halffield(); # endif #endif status = check_geometry(); if (status != 0) { fprintf(stderr, "Checking of geometry failed. Unable to proceed.\nAborting....\n"); exit(1); } #if (defined MPI && !(defined _USE_SHMEM)) check_xchange(); #endif start_ranlux(1, 123456); random_gauge_field(reproduce_randomnumber_flag, g_gauge_field); #ifdef MPI /*For parallelization: exchange the gaugefield */ xchange_gauge(g_gauge_field); #endif /* the non even/odd case now */ /*initialize the pseudo-fermion fields*/ j_max=1; sdt=0.; for (k=0;k<k_max;k++) { random_spinor_field_lexic(g_spinor_field[k], reproduce_randomnumber_flag, RN_GAUSS); } #ifdef MPI MPI_Barrier(MPI_COMM_WORLD); #endif t1 = gettime(); /* here the actual Dslash */ D_psi(g_spinor_field[0], g_spinor_field[1]); t2 = gettime(); dt=t2-t1; #ifdef MPI MPI_Allreduce (&dt, &sdt, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); #else sdt = dt; #endif if(g_proc_id==0) { printf("# Time for Dslash %e sec.\n", sdt); printf("\n"); fflush(stdout); } #ifdef HAVE_LIBLEMON if(g_proc_id==0) { printf("# Performing parallel IO test ...\n"); } xlfInfo = construct_paramsXlfInfo(0.5, 0); write_gauge_field( "conf.test", 64, xlfInfo); free(xlfInfo); if(g_proc_id==0) { printf("# done ...\n"); } #endif #ifdef OMP free_omp_accumulators(); #endif free_gauge_field(); free_geometry_indices(); free_spinor_field(); free_moment_field(); #ifdef MPI MPI_Barrier(MPI_COMM_WORLD); MPI_Finalize(); #endif return(0); }
int main(int argc,char *argv[]) { double plaquette_energy; paramsXlfInfo *xlfInfo; #ifdef MPI MPI_Init(&argc, &argv); #endif g_rgi_C1 = 1.; /* Read the input file */ read_input("benchmark.input"); tmlqcd_mpi_init(argc, argv); #ifdef _GAUGE_COPY init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 1); #else init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 0); #endif init_geometry_indices(VOLUMEPLUSRAND + g_dbw2rand); if(g_proc_id == 0) { fprintf(stdout,"The number of processes is %d \n",g_nproc); printf("# The lattice size is %d x %d x %d x %d\n", (int)(T*g_nproc_t), (int)(LX*g_nproc_x), (int)(LY*g_nproc_y), (int)(g_nproc_z*LZ)); printf("# The local lattice size is %d x %d x %d x %d\n", (int)(T), (int)(LX), (int)(LY),(int) LZ); printf("# Testing IO routines for gauge-fields\n"); fflush(stdout); } /* define the geometry */ geometry(); /* define the boundary conditions for the fermion fields */ boundary(g_kappa); /* generate a random gauge field */ start_ranlux(1, 123456); random_gauge_field(reproduce_randomnumber_flag, g_gauge_field); #ifdef MPI /*For parallelization: exchange the gaugefield */ xchange_gauge(g_gauge_field); #endif plaquette_energy = measure_gauge_action(g_gauge_field) / (6.*VOLUME*g_nproc); if(g_proc_id == 0) { printf("# the first plaquette value is %e\n", plaquette_energy); printf("# writing with lime first to conf.lime\n"); } /* write with lime first */ xlfInfo = construct_paramsXlfInfo(plaquette_energy, 0); write_lime_gauge_field( "conf.lime", 64, xlfInfo); #ifdef HAVE_LIBLEMON if(g_proc_id == 0) { printf("Now we do write with lemon to conf.lemon...\n"); } write_lemon_gauge_field_parallel( "conf.lemon", 64, xlfInfo); if(g_proc_id == 0) { printf("# now we read with lemon from conf.lime\n"); } read_lemon_gauge_field_parallel("conf.lime", NULL, NULL, NULL); plaquette_energy = measure_gauge_action(g_gauge_field) / (6.*VOLUME*g_nproc); if(g_proc_id == 0) { printf("# the plaquette value after lemon read of conf.lime is %e\n", plaquette_energy); } if(g_proc_id == 0) { printf("# now we read with lemon from conf.lemon\n"); } read_lemon_gauge_field_parallel("conf.lemon", NULL, NULL, NULL); plaquette_energy = measure_gauge_action(g_gauge_field) / (6.*VOLUME*g_nproc); if(g_proc_id == 0) { printf("# the plaquette value after lemon read of conf.lemon is %e\n", plaquette_energy); } if(g_proc_id == 0) { printf("# now we read with lime from conf.lemon\n"); } read_lime_gauge_field("conf.lemon"); plaquette_energy = measure_gauge_action(g_gauge_field) / (6.*VOLUME*g_nproc); if(g_proc_id == 0) { printf("# the plaquette value after lime read of conf.lemon is %e\n", plaquette_energy); } free(xlfInfo); if(g_proc_id==0) { printf("done ...\n"); } #endif if(g_proc_id == 0) { printf("# now we read with lime from conf.lime\n"); } read_lime_gauge_field("conf.lime", NULL, NULL, NULL); plaquette_energy = measure_gauge_action(g_gauge_field) / (6.*VOLUME*g_nproc); if(g_proc_id == 0) { printf("# the plaquette value after lime read of conf.lime is %e\n", plaquette_energy); } #ifdef MPI MPI_Finalize(); #endif free_gauge_field(); free_geometry_indices(); return(0); }
int main(int argc,char *argv[]) { FILE *parameterfile=NULL, *countfile=NULL; char *filename = NULL; char datafilename[50]; char parameterfilename[50]; char gauge_filename[50]; char nstore_filename[50]; char tmp_filename[50]; char *input_filename = NULL; int status = 0, accept = 0; int j,ix,mu, trajectory_counter=1; struct timeval t1; /* Energy corresponding to the Gauge part */ double plaquette_energy = 0., rectangle_energy = 0.; /* Acceptance rate */ int Rate=0; /* Do we want to perform reversibility checks */ /* See also return_check_flag in read_input.h */ int return_check = 0; /* For getopt */ int c; paramsXlfInfo *xlfInfo; /* For online measurements */ measurement * meas; int imeas; #ifdef _KOJAK_INST #pragma pomp inst init #pragma pomp inst begin(main) #endif #if (defined SSE || defined SSE2 || SSE3) signal(SIGILL,&catch_ill_inst); #endif strcpy(gauge_filename,"conf.save"); strcpy(nstore_filename,".nstore_counter"); strcpy(tmp_filename, ".conf.tmp"); verbose = 1; g_use_clover_flag = 0; #ifdef MPI # ifdef OMP int mpi_thread_provided; MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_thread_provided); # else MPI_Init(&argc, &argv); # endif MPI_Comm_rank(MPI_COMM_WORLD, &g_proc_id); #else g_proc_id = 0; #endif while ((c = getopt(argc, argv, "h?vVf:o:")) != -1) { switch (c) { case 'f': input_filename = calloc(200, sizeof(char)); strcpy(input_filename,optarg); break; case 'o': filename = calloc(200, sizeof(char)); strcpy(filename,optarg); break; case 'v': verbose = 1; break; case 'V': fprintf(stdout,"%s %s\n",PACKAGE_STRING,git_hash); exit(0); break; case 'h': case '?': default: usage(); break; } } if(input_filename == NULL){ input_filename = "hmc.input"; } if(filename == NULL){ filename = "output"; } /* Read the input file */ if( (status = read_input(input_filename)) != 0) { fprintf(stderr, "Could not find input file: %s\nAborting...\n", input_filename); exit(-1); } /* set number of omp threads to be used */ #ifdef OMP if(omp_num_threads > 0) { omp_set_num_threads(omp_num_threads); } else { if( g_proc_id == 0 ) printf("# No value provided for OmpNumThreads, running in single-threaded mode!\n"); omp_num_threads = 1; omp_set_num_threads(omp_num_threads); } init_omp_accumulators(omp_num_threads); #endif DUM_DERI = 4; DUM_SOLVER = DUM_DERI+1; DUM_MATRIX = DUM_SOLVER+6; if(g_running_phmc) { NO_OF_SPINORFIELDS = DUM_MATRIX+8; } else { NO_OF_SPINORFIELDS = DUM_MATRIX+6; } DUM_BI_DERI = 6; DUM_BI_SOLVER = DUM_BI_DERI+7; DUM_BI_MATRIX = DUM_BI_SOLVER+6; NO_OF_BISPINORFIELDS = DUM_BI_MATRIX+6; tmlqcd_mpi_init(argc, argv); if(nstore == -1) { countfile = fopen(nstore_filename, "r"); if(countfile != NULL) { j = fscanf(countfile, "%d %d %s\n", &nstore, &trajectory_counter, gauge_input_filename); if(j < 1) nstore = 0; if(j < 2) trajectory_counter = 0; fclose(countfile); } else { nstore = 0; trajectory_counter = 0; } } #ifndef MPI g_dbw2rand = 0; #endif g_mu = g_mu1; #ifdef _GAUGE_COPY status = init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 1); #else status = init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 0); #endif if (status != 0) { fprintf(stderr, "Not enough memory for gauge_fields! Aborting...\n"); exit(0); } j = init_geometry_indices(VOLUMEPLUSRAND + g_dbw2rand); if (j != 0) { fprintf(stderr, "Not enough memory for geometry_indices! Aborting...\n"); exit(0); } if(even_odd_flag) { j = init_spinor_field(VOLUMEPLUSRAND/2, NO_OF_SPINORFIELDS); } else { j = init_spinor_field(VOLUMEPLUSRAND, NO_OF_SPINORFIELDS); } if (j != 0) { fprintf(stderr, "Not enough memory for spinor fields! Aborting...\n"); exit(0); } if(even_odd_flag) { j = init_csg_field(VOLUMEPLUSRAND/2); } else { j = init_csg_field(VOLUMEPLUSRAND); } if (j != 0) { fprintf(stderr, "Not enough memory for csg fields! Aborting...\n"); exit(0); } j = init_moment_field(VOLUME, VOLUMEPLUSRAND + g_dbw2rand); if (j != 0) { fprintf(stderr, "Not enough memory for moment fields! Aborting...\n"); exit(0); } if(g_running_phmc) { j = init_bispinor_field(VOLUME/2, NO_OF_BISPINORFIELDS); if (j!= 0) { fprintf(stderr, "Not enough memory for bi-spinor fields! Aborting...\n"); exit(0); } } /* list and initialize measurements*/ if(g_proc_id == 0) { printf("\n"); for(j = 0; j < no_measurements; j++) { printf("# measurement id %d, type = %d: Frequency %d\n", j, measurement_list[j].type, measurement_list[j].freq); } } init_measurements(); /*construct the filenames for the observables and the parameters*/ strcpy(datafilename,filename); strcat(datafilename,".data"); strcpy(parameterfilename,filename); strcat(parameterfilename,".para"); if(g_proc_id == 0){ parameterfile = fopen(parameterfilename, "a"); write_first_messages(parameterfile, "hmc", git_hash); } /* define the geometry */ geometry(); /* define the boundary conditions for the fermion fields */ boundary(g_kappa); status = check_geometry(); if (status != 0) { fprintf(stderr, "Checking of geometry failed. Unable to proceed.\nAborting....\n"); exit(1); } #ifdef _USE_HALFSPINOR j = init_dirac_halfspinor(); if (j!= 0) { fprintf(stderr, "Not enough memory for halffield! Aborting...\n"); exit(-1); } if(g_sloppy_precision_flag == 1) { init_dirac_halfspinor32(); } # if (defined _PERSISTENT) init_xchange_halffield(); # endif #endif /* Initialise random number generator */ start_ranlux(rlxd_level, random_seed^nstore ); /* Set up the gauge field */ /* continue and restart */ if(startoption==3 || startoption == 2) { if(g_proc_id == 0) { printf("# Trying to read gauge field from file %s in %s precision.\n", gauge_input_filename, (gauge_precision_read_flag == 32 ? "single" : "double")); fflush(stdout); } if( (status = read_gauge_field(gauge_input_filename)) != 0) { fprintf(stderr, "Error %d while reading gauge field from %s\nAborting...\n", status, gauge_input_filename); exit(-2); } if (g_proc_id == 0){ printf("# Finished reading gauge field.\n"); fflush(stdout); } } else if (startoption == 1) { /* hot */ random_gauge_field(reproduce_randomnumber_flag, g_gauge_field); } else if(startoption == 0) { /* cold */ unit_g_gauge_field(); } /*For parallelization: exchange the gaugefield */ #ifdef MPI xchange_gauge(g_gauge_field); #endif if(even_odd_flag) { j = init_monomials(VOLUMEPLUSRAND/2, even_odd_flag); } else { j = init_monomials(VOLUMEPLUSRAND, even_odd_flag); } if (j != 0) { fprintf(stderr, "Not enough memory for monomial pseudo fermion fields! Aborting...\n"); exit(0); } init_integrator(); if(g_proc_id == 0) { for(j = 0; j < no_monomials; j++) { printf("# monomial id %d type = %d timescale %d\n", j, monomial_list[j].type, monomial_list[j].timescale); } } plaquette_energy = measure_gauge_action( (const su3**) g_gauge_field); if(g_rgi_C1 > 0. || g_rgi_C1 < 0.) { rectangle_energy = measure_rectangles( (const su3**) g_gauge_field); if(g_proc_id == 0){ fprintf(parameterfile,"# Computed rectangle value: %14.12f.\n",rectangle_energy/(12.*VOLUME*g_nproc)); } } //eneg = g_rgi_C0 * plaquette_energy + g_rgi_C1 * rectangle_energy; if(g_proc_id == 0) { fprintf(parameterfile,"# Computed plaquette value: %14.12f.\n", plaquette_energy/(6.*VOLUME*g_nproc)); printf("# Computed plaquette value: %14.12f.\n", plaquette_energy/(6.*VOLUME*g_nproc)); fclose(parameterfile); } /* set ddummy to zero */ for(ix = 0; ix < VOLUMEPLUSRAND; ix++){ for(mu=0; mu<4; mu++){ ddummy[ix][mu].d1=0.; ddummy[ix][mu].d2=0.; ddummy[ix][mu].d3=0.; ddummy[ix][mu].d4=0.; ddummy[ix][mu].d5=0.; ddummy[ix][mu].d6=0.; ddummy[ix][mu].d7=0.; ddummy[ix][mu].d8=0.; } } if(g_proc_id == 0) { gettimeofday(&t1,NULL); countfile = fopen("history_hmc_tm", "a"); fprintf(countfile, "!!! Timestamp %ld, Nsave = %d, g_mu = %e, g_mu1 = %e, g_mu_2 = %e, g_mu3 = %e, beta = %f, kappa = %f, C1 = %f, ", t1.tv_sec, Nsave, g_mu, g_mu1, g_mu2, g_mu3, g_beta, g_kappa, g_rgi_C1); for(j = 0; j < Integrator.no_timescales; j++) { fprintf(countfile, "n_int[%d] = %d ", j, Integrator.no_mnls_per_ts[j]); } fprintf(countfile, "\n"); fclose(countfile); } /* Loop for measurements */ for(j = 0; j < Nmeas; j++) { if(g_proc_id == 0) { printf("#\n# Starting trajectory no %d\n", trajectory_counter); } return_check = return_check_flag && (trajectory_counter%return_check_interval == 0); accept = update_tm(&plaquette_energy, &rectangle_energy, datafilename, return_check, Ntherm<trajectory_counter, trajectory_counter); Rate += accept; /* Save gauge configuration all Nsave times */ if((Nsave !=0) && (trajectory_counter%Nsave == 0) && (trajectory_counter!=0)) { sprintf(gauge_filename,"conf.%.4d", nstore); if(g_proc_id == 0) { countfile = fopen("history_hmc_tm", "a"); fprintf(countfile, "%.4d, measurement %d of %d, Nsave = %d, Plaquette = %e, trajectory nr = %d\n", nstore, j, Nmeas, Nsave, plaquette_energy/(6.*VOLUME*g_nproc), trajectory_counter); fclose(countfile); } nstore ++; } else { sprintf(gauge_filename,"conf.save"); } if(((Nsave !=0) && (trajectory_counter%Nsave == 0) && (trajectory_counter!=0)) || (write_cp_flag == 1) || (j >= (Nmeas - 1))) { /* If a reversibility check was performed this trajectory, and the trajectory was accepted, * then the configuration is currently stored in .conf.tmp, written out by update_tm. * In that case also a readback was performed, so no need to test .conf.tmp * In all other cases the gauge configuration still needs to be written out here. */ if (!(return_check && accept)) { xlfInfo = construct_paramsXlfInfo(plaquette_energy/(6.*VOLUME*g_nproc), trajectory_counter); if (g_proc_id == 0) { fprintf(stdout, "# Writing gauge field to %s.\n", tmp_filename); } if((status = write_gauge_field( tmp_filename, gauge_precision_write_flag, xlfInfo) != 0 )) { /* Writing the gauge field failed directly */ fprintf(stderr, "Error %d while writing gauge field to %s\nAborting...\n", status, tmp_filename); exit(-2); } if (!g_disable_IO_checks) { #ifdef HAVE_LIBLEMON /* Read gauge field back to verify the writeout */ if (g_proc_id == 0) { fprintf(stdout, "# Write completed, verifying write...\n"); } if( (status = read_gauge_field(tmp_filename)) != 0) { fprintf(stderr, "WARNING, writeout of %s returned no error, but verification discovered errors.\n", tmp_filename); fprintf(stderr, "Potential disk or MPI I/O error. Aborting...\n"); exit(-3); } if (g_proc_id == 0) { fprintf(stdout, "# Write successfully verified.\n"); } #else if (g_proc_id == 0) { fprintf(stdout, "# Write completed successfully.\n"); } #endif } free(xlfInfo); } /* Now move .conf.tmp into place */ if(g_proc_id == 0) { fprintf(stdout, "# Renaming %s to %s.\n", tmp_filename, gauge_filename); if (rename(tmp_filename, gauge_filename) != 0) { /* Errno can be inspected here for more descriptive error reporting */ fprintf(stderr, "Error while trying to rename temporary file %s to %s. Unable to proceed.\n", tmp_filename, gauge_filename); exit(-2); } countfile = fopen(nstore_filename, "w"); fprintf(countfile, "%d %d %s\n", nstore, trajectory_counter+1, gauge_filename); fclose(countfile); } } /* online measurements */ for(imeas = 0; imeas < no_measurements; imeas++){ meas = &measurement_list[imeas]; if(trajectory_counter%meas->freq == 0){ if (g_proc_id == 0) { fprintf(stdout, "#\n# Beginning online measurement.\n"); } meas->measurefunc(trajectory_counter, imeas, even_odd_flag); } } if(g_proc_id == 0) { verbose = 1; } ix = reread_input("hmc.reread"); if(g_proc_id == 0) { verbose = 0; } #ifdef MPI MPI_Barrier(MPI_COMM_WORLD); #endif if(ix == 0 && g_proc_id == 0) { countfile = fopen("history_hmc_tm", "a"); fprintf(countfile, "# Changed input parameters according to hmc.reread: measurement %d of %d\n", j, Nmeas); fclose(countfile); printf("# Changed input parameters according to hmc.reread (see stdout): measurement %d of %d\n", j, Nmeas); remove("hmc.reread"); } trajectory_counter++; } /* end of loop over trajectories */ if(g_proc_id == 0 && Nmeas != 0) { printf("# Acceptance rate was %3.2f percent, %d out of %d trajectories accepted.\n", 100.*(double)Rate/(double)Nmeas, Rate, Nmeas); fflush(stdout); parameterfile = fopen(parameterfilename, "a"); fprintf(parameterfile, "# Acceptance rate was %3.2f percent, %d out of %d trajectories accepted.\n", 100.*(double)Rate/(double)Nmeas, Rate, Nmeas); fclose(parameterfile); } #ifdef MPI MPI_Finalize(); #endif #ifdef OMP free_omp_accumulators(); #endif free_gauge_tmp(); free_gauge_field(); free_geometry_indices(); free_spinor_field(); free_moment_field(); free_monomials(); if(g_running_phmc) { free_bispinor_field(); free_chi_spinor_field(); } return(0); #ifdef _KOJAK_INST #pragma pomp inst end(main) #endif }
int main(int argc,char *argv[]) { FILE *parameterfile = NULL; char datafilename[206]; char parameterfilename[206]; char conf_filename[50]; char scalar_filename[50]; char * input_filename = NULL; char * filename = NULL; double plaquette_energy; #ifdef _USE_HALFSPINOR #undef _USE_HALFSPINOR printf("# WARNING: USE_HALFSPINOR will be ignored (not supported here).\n"); #endif if(even_odd_flag) { even_odd_flag=0; printf("# WARNING: even_odd_flag will be ignored (not supported here).\n"); } int j,j_max,k,k_max = 2; _Complex double * drvsc; #ifdef HAVE_LIBLEMON paramsXlfInfo *xlfInfo; #endif int status = 0; static double t1,t2,dt,sdt,dts,qdt,sqdt; double antioptaway=0.0; #ifdef MPI static double dt2; DUM_DERI = 6; DUM_SOLVER = DUM_DERI+2; DUM_MATRIX = DUM_SOLVER+6; NO_OF_SPINORFIELDS = DUM_MATRIX+2; #ifdef OMP int mpi_thread_provided; MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_thread_provided); #else MPI_Init(&argc, &argv); #endif MPI_Comm_rank(MPI_COMM_WORLD, &g_proc_id); #else g_proc_id = 0; #endif g_rgi_C1 = 1.; process_args(argc,argv,&input_filename,&filename); set_default_filenames(&input_filename, &filename); /* Read the input file */ if( (j = read_input(input_filename)) != 0) { fprintf(stderr, "Could not find input file: %s\nAborting...\n", input_filename); exit(-1); } if(g_proc_id==0) { printf("parameter rho_BSM set to %f\n", rho_BSM); printf("parameter eta_BSM set to %f\n", eta_BSM); printf("parameter m0_BSM set to %f\n", m0_BSM); } #ifdef OMP init_openmp(); #endif tmlqcd_mpi_init(argc, argv); if(g_proc_id==0) { #ifdef SSE printf("# The code was compiled with SSE instructions\n"); #endif #ifdef SSE2 printf("# The code was compiled with SSE2 instructions\n"); #endif #ifdef SSE3 printf("# The code was compiled with SSE3 instructions\n"); #endif #ifdef P4 printf("# The code was compiled for Pentium4\n"); #endif #ifdef OPTERON printf("# The code was compiled for AMD Opteron\n"); #endif #ifdef _GAUGE_COPY printf("# The code was compiled with -D_GAUGE_COPY\n"); #endif #ifdef BGL printf("# The code was compiled for Blue Gene/L\n"); #endif #ifdef BGP printf("# The code was compiled for Blue Gene/P\n"); #endif #ifdef _USE_HALFSPINOR printf("# The code was compiled with -D_USE_HALFSPINOR\n"); #endif #ifdef _USE_SHMEM printf("# The code was compiled with -D_USE_SHMEM\n"); #ifdef _PERSISTENT printf("# The code was compiled for persistent MPI calls (halfspinor only)\n"); #endif #endif #ifdef MPI #ifdef _NON_BLOCKING printf("# The code was compiled for non-blocking MPI calls (spinor and gauge)\n"); #endif #endif printf("\n"); fflush(stdout); } #ifdef _GAUGE_COPY init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 1); #else init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 0); #endif init_geometry_indices(VOLUMEPLUSRAND + g_dbw2rand); j = init_bispinor_field(VOLUMEPLUSRAND, 12); if ( j!= 0) { fprintf(stderr, "Not enough memory for bispinor fields! Aborting...\n"); exit(0); } j = init_spinor_field(VOLUMEPLUSRAND, 12); if ( j!= 0) { fprintf(stderr, "Not enough memory for spinor fields! Aborting...\n"); exit(0); } int numbScalarFields = 4; j = init_scalar_field(VOLUMEPLUSRAND, numbScalarFields); if ( j!= 0) { fprintf(stderr, "Not enough memory for scalar fields! Aborting...\n"); exit(0); } drvsc = malloc(18*VOLUMEPLUSRAND*sizeof(_Complex double)); if(g_proc_id == 0) { fprintf(stdout,"# The number of processes is %d \n",g_nproc); printf("# The lattice size is %d x %d x %d x %d\n", (int)(T*g_nproc_t), (int)(LX*g_nproc_x), (int)(LY*g_nproc_y), (int)(g_nproc_z*LZ)); printf("# The local lattice size is %d x %d x %d x %d\n", (int)(T), (int)(LX), (int)(LY),(int) LZ); fflush(stdout); } /* define the geometry */ geometry(); j = init_bsm_2hop_lookup(VOLUME); if ( j!= 0) { // this should not be reached since the init function calls fatal_error anyway fprintf(stderr, "Not enough memory for BSM2b 2hop lookup table! Aborting...\n"); exit(0); } /* define the boundary conditions for the fermion fields */ /* for the actual inversion, this is done in invert.c as the operators are iterated through */ // // For the BSM operator we don't use kappa normalisation, // as a result, when twisted boundary conditions are applied this needs to be unity. // In addition, unlike in the Wilson case, the hopping term comes with a plus sign. // However, in boundary(), the minus sign for the Wilson case is implicitly included. // We therefore use -1.0 here. boundary(-1.0); status = check_geometry(); if (status != 0) { fprintf(stderr, "Checking of geometry failed. Unable to proceed.\nAborting....\n"); exit(1); } #if (defined MPI && !(defined _USE_SHMEM)) // fails, we're not using spinor fields // check_xchange(); #endif start_ranlux(1, 123456); // read gauge field if( strcmp(gauge_input_filename, "create_random_gaugefield") == 0 ) { random_gauge_field(reproduce_randomnumber_flag, g_gauge_field); } else { sprintf(conf_filename, "%s.%.4d", gauge_input_filename, nstore); if (g_cart_id == 0) { printf("#\n# Trying to read gauge field from file %s in %s precision.\n", conf_filename, (gauge_precision_read_flag == 32 ? "single" : "double")); fflush(stdout); } int i; if( (i = read_gauge_field(conf_filename,g_gauge_field)) !=0) { fprintf(stderr, "Error %d while reading gauge field from %s\n Aborting...\n", i, conf_filename); exit(-2); } if (g_cart_id == 0) { printf("# Finished reading gauge field.\n"); fflush(stdout); } } // read scalar field if( strcmp(scalar_input_filename, "create_random_scalarfield") == 0 ) { for( int s=0; s<numbScalarFields; s++ ) ranlxd(g_scalar_field[s], VOLUME); } else { sprintf(scalar_filename, "%s.%d", scalar_input_filename, nscalar); if (g_cart_id == 0) { printf("#\n# Trying to read scalar field from file %s in %s precision.\n", scalar_filename, (scalar_precision_read_flag == 32 ? "single" : "double")); fflush(stdout); } int i; if( (i = read_scalar_field(scalar_filename,g_scalar_field)) !=0) { fprintf(stderr, "Error %d while reading scalar field from %s\n Aborting...\n", i, scalar_filename); exit(-2); } if (g_cart_id == 0) { printf("# Finished reading scalar field.\n"); fflush(stdout); } } #ifdef MPI xchange_gauge(g_gauge_field); #endif /*compute the energy of the gauge field*/ plaquette_energy = measure_plaquette( (const su3**) g_gauge_field); if (g_cart_id == 0) { printf("# The computed plaquette value is %e.\n", plaquette_energy / (6.*VOLUME*g_nproc)); fflush(stdout); } #ifdef MPI for( int s=0; s<numbScalarFields; s++ ) generic_exchange(g_scalar_field[s], sizeof(scalar)); #endif /*initialize the bispinor fields*/ j_max=1; sdt=0.; // w random_spinor_field_lexic( (spinor*)(g_bispinor_field[4]), reproduce_randomnumber_flag, RN_GAUSS); random_spinor_field_lexic( (spinor*)(g_bispinor_field[4])+VOLUME, reproduce_randomnumber_flag, RN_GAUSS); // for the D^\dagger test: // v random_spinor_field_lexic( (spinor*)(g_bispinor_field[5]), reproduce_randomnumber_flag, RN_GAUSS); random_spinor_field_lexic( (spinor*)(g_bispinor_field[5])+VOLUME, reproduce_randomnumber_flag, RN_GAUSS); #if defined MPI generic_exchange(g_bispinor_field[4], sizeof(bispinor)); #endif // print L2-norm of source: double squarenorm = square_norm((spinor*)g_bispinor_field[4], 2*VOLUME, 1); if(g_proc_id==0) { printf("\n# square norm of the source: ||w||^2 = %e\n\n", squarenorm); fflush(stdout); } double t_MG, t_BK; /* inversion needs to be done first because it uses loads of the g_bispinor_fields internally */ #if TEST_INVERSION if(g_proc_id==1) printf("Testing inversion\n"); // Bartek's operator t1 = gettime(); cg_her_bi(g_bispinor_field[9], g_bispinor_field[4], 25000, 1.0e-14, 0, VOLUME, &Q2_psi_BSM2b); t_BK = gettime() - t1; // Marco's operator t1 = gettime(); cg_her_bi(g_bispinor_field[8], g_bispinor_field[4], 25000, 1.0e-14, 0, VOLUME, &Q2_psi_BSM2m); t_MG = gettime() - t1; if(g_proc_id==0) printf("Operator inversion time: t_MG = %f sec \t t_BK = %f sec\n\n", t_MG, t_BK); #endif /* now apply the operators to the same bispinor field and do various comparisons */ // Marco's operator #ifdef MPI MPI_Barrier(MPI_COMM_WORLD); #endif t_MG = 0.0; t1 = gettime(); D_psi_BSM2m(g_bispinor_field[0], g_bispinor_field[4]); t1 = gettime() - t1; #ifdef MPI MPI_Allreduce (&t1, &t_MG, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); #else t_MG = t1; #endif // Bartek's operator #ifdef MPI MPI_Barrier(MPI_COMM_WORLD); #endif t_BK = 0.0; t1 = gettime(); D_psi_BSM2b(g_bispinor_field[1], g_bispinor_field[4]); t1 = gettime() - t1; #ifdef MPI MPI_Allreduce (&t1, &t_BK, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); #else t_BK = t1; #endif if(g_proc_id==0) printf("Operator application time: t_MG = %f sec \t t_BK = %f sec\n\n", t_MG, t_BK); squarenorm = square_norm((spinor*)g_bispinor_field[0], 2*VOLUME, 1); if(g_proc_id==0) { printf("# || D_MG w ||^2 = %.16e\n", squarenorm); fflush(stdout); } squarenorm = square_norm((spinor*)g_bispinor_field[1], 2*VOLUME, 1); if(g_proc_id==0) { printf("# || D_BK w ||^2 = %.16e\n\n\n", squarenorm); fflush(stdout); } diff( (spinor*)g_bispinor_field[3], (spinor*)g_bispinor_field[0], (spinor*)g_bispinor_field[1], 2*VOLUME); printf("element-wise difference between (D_BK w) and (D_MG w)\n"); printf("( D_MG w - M_BK w )->sp_up.s0.c0= %.16e + I*(%.16e)\n\n", creal(g_bispinor_field[3][0].sp_up.s0.c0), cimag(g_bispinor_field[3][0].sp_up.s0.c0) ); double diffnorm = square_norm( (spinor*) g_bispinor_field[3], 2*VOLUME, 1 ); if(g_proc_id==0){ printf("Square norm of the difference\n"); printf("|| D_MG w - D_BK w ||^2 = %.16e \n\n\n", diffnorm); } // < D w, v > printf("Check consistency of D and D^dagger\n"); _Complex double prod1_MG = scalar_prod( (spinor*)g_bispinor_field[0], (spinor*)g_bispinor_field[5], 2*VOLUME, 1 ); if(g_proc_id==0) printf("< D_MG w, v > = %.16e + I*(%.16e)\n", creal(prod1_MG), cimag(prod1_MG)); _Complex double prod1_BK = scalar_prod( (spinor*)g_bispinor_field[1], (spinor*)g_bispinor_field[5], 2*VOLUME, 1 ); if(g_proc_id==0) printf("< D_BK w, v > = %.16e + I*(%.16e)\n\n", creal(prod1_BK), cimag(prod1_BK)); // < w, D^\dagger v > t_MG = gettime(); D_psi_dagger_BSM2m(g_bispinor_field[6], g_bispinor_field[5]); t_MG = gettime()-t_MG; t_BK = gettime(); D_psi_dagger_BSM2b(g_bispinor_field[7], g_bispinor_field[5]); t_BK = gettime() - t_BK; if(g_proc_id==0) printf("Operator dagger application time: t_MG = %f sec \t t_BK = %f sec\n\n", t_MG, t_BK); _Complex double prod2_MG = scalar_prod((spinor*)g_bispinor_field[4], (spinor*)g_bispinor_field[6], 2*VOLUME, 1); _Complex double prod2_BK = scalar_prod((spinor*)g_bispinor_field[4], (spinor*)g_bispinor_field[7], 2*VOLUME, 1); if( g_proc_id == 0 ){ printf("< w, D_MG^dagger v > = %.16e + I*(%.16e)\n", creal(prod2_MG), cimag(prod2_MG)); printf("< w, D_BK^dagger v > = %.16e + I*(%.16e)\n", creal(prod2_BK), cimag(prod2_BK)); printf("\n| < D_MG w, v > - < w, D_MG^dagger v > | = %.16e\n",cabs(prod2_MG-prod1_MG)); printf("| < D_BK w, v > - < w, D_BK^dagger v > | = %.16e\n\n",cabs(prod2_BK-prod1_BK)); } #if TEST_INVERSION // check result of inversion Q2_psi_BSM2m(g_bispinor_field[10], g_bispinor_field[8]); Q2_psi_BSM2b(g_bispinor_field[11], g_bispinor_field[8]); assign_diff_mul((spinor*)g_bispinor_field[10], (spinor*)g_bispinor_field[4], 1.0, 2*VOLUME); assign_diff_mul((spinor*)g_bispinor_field[11], (spinor*)g_bispinor_field[4], 1.0, 2*VOLUME); double squarenorm_MGMG = square_norm((spinor*)g_bispinor_field[10], 2*VOLUME, 1); double squarenorm_BKMG = square_norm((spinor*)g_bispinor_field[11], 2*VOLUME, 1); if(g_proc_id==0) { printf("# ||Q2_MG*(Q2_MG)^-1*(b)-b||^2 = %.16e\n\n", squarenorm_MGMG); printf("# ||Q2_BK*(Q2_MG)^-1*(b)-b||^2 = %.16e\n\n", squarenorm_BKMG); fflush(stdout); } Q2_psi_BSM2b(g_bispinor_field[10], g_bispinor_field[9]); Q2_psi_BSM2m(g_bispinor_field[11], g_bispinor_field[9]); assign_diff_mul((spinor*)g_bispinor_field[10], (spinor*)g_bispinor_field[4], 1.0, 2*VOLUME); assign_diff_mul((spinor*)g_bispinor_field[11], (spinor*)g_bispinor_field[4], 1.0, 2*VOLUME); double squarenorm_BKBK = square_norm((spinor*)g_bispinor_field[10], 2*VOLUME, 1); double squarenorm_MGBK = square_norm((spinor*)g_bispinor_field[11], 2*VOLUME, 1); if(g_proc_id==0) { printf("# ||Q2_BK*(Q2_BK)^-1*(b)-b||^2 = %.16e\n\n", squarenorm_BKBK); printf("# ||Q2_MG*(Q2_BK)^-1*(b)-b||^2 = %.16e\n\n", squarenorm_MGBK); fflush(stdout); } #endif #ifdef OMP free_omp_accumulators(); #endif free_gauge_field(); free_geometry_indices(); free_bispinor_field(); free_scalar_field(); #ifdef MPI MPI_Barrier(MPI_COMM_WORLD); MPI_Finalize(); #endif return(0); }
int main(int argc, char **argv) { //initialize plqcd int init_status; if(argc < 3) { fprintf(stderr,"Error. Must pass the name of the input file and the number of multiplications to be performed \n"); fprintf(stderr,"Usage: %s input_file_name Nmul\n",argv[0]); exit(1); } init_status = init_plqcd(argc,argv); if(init_status != 0) printf("Error initializing plqcd\n"); int proc_id; int i,j,k,Nmul; proc_id = ipr(plqcd_g.cpr); Nmul=atoi(argv[2]); #if 0 //Intialize the ranlux random number generator start_ranlux(0,1); #endif int NPROCS=plqcd_g.nprocs[0]*plqcd_g.nprocs[1]*plqcd_g.nprocs[2]*plqcd_g.nprocs[3]; char ofname[128]; char buff[128]; strcpy(ofname,"test_hopping_output.procgrid."); sprintf(buff,"%d-%d-%d-%d.nthreads.%d.proc.%d",plqcd_g.nprocs[0],plqcd_g.nprocs[1],plqcd_g.nprocs[2],plqcd_g.nprocs[3],plqcd_g.nthread,proc_id); strcat(ofname,buff); FILE *ofp; //FILE *ofp_source; //if(proc_id==0) //{ // ofp_source = fopen("test_rand_vals.out","w"); //} if(proc_id==0) { ofp=fopen(ofname,"w"); fprintf(ofp,"INPUT GLOBALS:\n"); fprintf(ofp,"----------------\n"); fprintf(ofp,"NPROC0 %d, NPROC1 %d, NPROC2 %d, NPROC3 %d, NTHREAD %d\n",plqcd_g.nprocs[0],plqcd_g.nprocs[1],plqcd_g.nprocs[2],plqcd_g.nprocs[3], plqcd_g.nthread); fprintf(ofp,"L0 %d, L1 %d, L2 %d, L3 %d\n\n",plqcd_g.latdims[0],plqcd_g.latdims[1],plqcd_g.latdims[2],plqcd_g.latdims[3]); //printf("sizeof(spinor) %ld, sizeof(halfspinor) %ld, sizeof(su3) %ld \n",sizeof(spinor),sizeof(halfspinor),sizeof(su3)); } int nthr; #ifdef _OPENMP #pragma omp parallel { nthr=omp_get_num_threads(); if(omp_get_thread_num() == 0) if(proc_id==0) fprintf(ofp,"Number of threads as returned by openmp %d\n",nthr); } #endif /***************************************************** *Testing the Dirac operator interface ****************************************************/ spinor *pin= (spinor *) amalloc(plqcd_g.VOLUME*sizeof(spinor), plqcd_g.ALIGN); if(pin==NULL) { fprintf(stderr,"ERROR: insufficient memory for spinor pin.\n"); exit(2); } spinor *pout= (spinor *) amalloc(plqcd_g.VOLUME*sizeof(spinor), plqcd_g.ALIGN); if(pout==NULL) { fprintf(stderr,"ERROR: insufficient memory for spinor pout.\n"); exit(2); } su3 *ufield= (su3 *) amalloc(4*plqcd_g.VOLUME*sizeof(su3), plqcd_g.ALIGN); if(ufield==NULL) { fprintf(stderr,"ERROR: insufficient memory for gauge field ufield.\n"); exit(2); } //256 arrays #ifdef AVX spinor_256 *pin_256= (spinor_256 *) amalloc(plqcd_g.VOLUME/2*sizeof(spinor_256), plqcd_g.ALIGN); if(pin_256==NULL) { fprintf(stderr,"ERROR: insufficient memory for spinor pin_256.\n"); exit(2); } spinor_256 *pout_256= (spinor_256 *) amalloc(plqcd_g.VOLUME/2*sizeof(spinor_256), plqcd_g.ALIGN); if(pout_256==NULL) { fprintf(stderr,"ERROR: insufficient memory for spinor pout_256.\n"); exit(2); } su3_256 *ufield_256= (su3_256 *) amalloc(4*plqcd_g.VOLUME/2*sizeof(su3_256), plqcd_g.ALIGN); if(ufield_256==NULL) { fprintf(stderr,"ERROR: insufficient memory for gauge field ufield_256.\n"); exit(2); } #endif //512 arrays #ifdef MIC spinor_512 *pin_512= (spinor_512 *) amalloc(plqcd_g.VOLUME/4*sizeof(spinor_512), plqcd_g.ALIGN); if(pin_512==NULL) { fprintf(stderr,"ERROR: insufficient memory for spinor pin_512.\n"); exit(2); } spinor_512 *pout_512= (spinor_512 *) amalloc(plqcd_g.VOLUME/4*sizeof(spinor_512), plqcd_g.ALIGN); if(pout_512==NULL) { fprintf(stderr,"ERROR: insufficient memory for spinor pout_512.\n"); exit(2); } su3_512 *ufield_512= (su3_512 *) amalloc(4*plqcd_g.VOLUME/4*sizeof(su3_512), plqcd_g.ALIGN); if(ufield_512==NULL) { fprintf(stderr,"ERROR: insufficient memory for gauge field ufield_512.\n"); exit(2); } #endif //intialize the random number generator by a seed equals to the process rank srand((unsigned int) proc_id); //Initialize the input spinor and gauge links to random numbers //intialize the random number generator by a seed equals to the process rank srand((unsigned int) proc_id); //Initialize the input spinor and gauge links to random numbers double ru[18]; double rs[24]; for(i=0; i<plqcd_g.VOLUME; i++) { //ranlxd(rs,24); for(j=0; j<24; j++) { rs[j]= rand() / (double)RAND_MAX; //fprintf(stderr,"rs[%d]=%lf\n",j,rs[j]); } pin[i].s0.c0=rs[0]+I*rs[1]; pin[i].s0.c1=rs[2]+I*rs[3]; pin[i].s0.c2=rs[4]+I*rs[5]; pin[i].s1.c0=rs[6]+I*rs[7]; pin[i].s1.c1=rs[8]+I*rs[9]; pin[i].s1.c2=rs[10]+I*rs[11]; pin[i].s2.c0=rs[12]+I*rs[13]; pin[i].s2.c1=rs[14]+I*rs[15]; pin[i].s2.c2=rs[16]+I*rs[17]; pin[i].s3.c0=rs[18]+I*rs[19]; pin[i].s3.c1=rs[20]+I*rs[21]; pin[i].s3.c2=rs[22]+I*rs[23]; //ranlxd(rs,24); for(j=0; j<24; j++) rs[j]= rand() / (double)RAND_MAX; pout[i].s0.c0=rs[0]+I*rs[1]; pout[i].s0.c1=rs[2]+I*rs[3]; pout[i].s0.c2=rs[4]+I*rs[5]; pout[i].s1.c0=rs[6]+I*rs[7]; pout[i].s1.c1=rs[8]+I*rs[9]; pout[i].s1.c2=rs[10]+I*rs[11]; pout[i].s2.c0=rs[12]+I*rs[13]; pout[i].s2.c1=rs[14]+I*rs[15]; pout[i].s2.c2=rs[16]+I*rs[17]; pout[i].s3.c0=rs[18]+I*rs[19]; pout[i].s3.c1=rs[20]+I*rs[21]; pout[i].s3.c2=rs[22]+I*rs[23]; for(j=0; j<4; j++) { //ranlxd(ru,18); for(k=0; k<18; k++) { ru[k]= rand() / (double)RAND_MAX; //fprintf(stderr,"ru[%d]=%lf\n",k,ru[k]); } ufield[4*i+j].c00=ru[0]+I*ru[1]; ufield[4*i+j].c01=ru[2]+I*ru[3]; ufield[4*i+j].c02=ru[4]+I*ru[5]; ufield[4*i+j].c10=ru[6]+I*ru[7]; ufield[4*i+j].c11=ru[8]+I*ru[9]; ufield[4*i+j].c12=ru[10]+I*ru[11]; ufield[4*i+j].c20=ru[12]+I*ru[13]; ufield[4*i+j].c21=ru[14]+I*ru[15]; ufield[4*i+j].c22=ru[16]+I*ru[17]; } } #ifdef AVX for(i=0; i<plqcd_g.VOLUME; i +=2) { for(j=0; j<4; j++) copy_su3_to_su3_256(ufield_256+4*i/2+j, ufield+4*i+j, ufield+4*(i+1)+j); copy_spinor_to_spinor_256(pin_256+i/2, pin+i, pin+i+1); copy_spinor_to_spinor_256(pout_256+i/2, pout+i, pout+i+1); } #endif #ifdef MIC for(i=0; i<plqcd_g.VOLUME; i +=4) { for(j=0; j<4; j++) copy_su3_to_su3_512(ufield_512+4*i/4+j, ufield+4*i+j, ufield+4*(i+1)+j, ufield+4*(i+2)+j, ufield+4*(i+3)+j); copy_spinor_to_spinor_512(pin_512+i/4, pin+i, pin+i+1, pin+i+2, pin+i+3); copy_spinor_to_spinor_512(pout_512+i/4, pout+i, pout+i+1, pout+i+2, pout+i+3); } #endif double total,t1=0.0,t2=0.0,mytotal; int matvecs; #ifdef ASSYMBLY //--------------------------------------------- //1: non-blocking assymbly/c version //--------------------------------------------- matvecs=0; total=0.0; mytotal =0.0; while(mytotal < 30) { MPI_Barrier(MPI_COMM_WORLD); for(i=0; i<Nmul; i++) { t1=plqcd_hopping_matrix_eo_sse3_assymbly(pin,pout,ufield); t2=plqcd_hopping_matrix_oe_sse3_assymbly(pin,pout,ufield); mytotal += t1+t2; } matvecs += Nmul; } MPI_Reduce(&mytotal,&total,1,MPI_DOUBLE,MPI_SUM,0, MPI_COMM_WORLD); MPI_Bcast(&total,1,MPI_DOUBLE,0, MPI_COMM_WORLD); if (proc_id==0) { total /= (double)(NPROCS); } if(proc_id==0) { fprintf(ofp,"non-blocking assymbly/c version:\n"); fprintf(ofp,"------------------------------------------\n"); fprintf(ofp,"test_hopping\tmult\t%d\ttotal(sec)\t%lf\tMFlops/process\t%lf\n", matvecs,total,matvecs*plqcd_g.VOLUME/2.0*1200/total/1e+6); } #endif #ifdef SSE3_INTRIN //--------------------------------------------- //1: non-blocking sse3 with intrinsics version //--------------------------------------------- matvecs=0; total=0.0; mytotal =0.0; while(mytotal < 30) { MPI_Barrier(MPI_COMM_WORLD); for(i=0; i<Nmul; i++) { t1=plqcd_hopping_matrix_eo_sse3_intrin(pin,pout,ufield); t2=plqcd_hopping_matrix_oe_sse3_intrin(pin,pout,ufield); mytotal += t1+t2; } matvecs += Nmul; } MPI_Reduce(&mytotal,&total,1,MPI_DOUBLE,MPI_SUM,0, MPI_COMM_WORLD); MPI_Bcast(&total,1,MPI_DOUBLE,0, MPI_COMM_WORLD); if (proc_id==0) { total /= (double)(NPROCS); } if(proc_id==0) { fprintf(ofp,"non-blocking sse3 with intrinsics version:\n"); fprintf(ofp,"------------------------------------------\n"); fprintf(ofp,"test_hopping\tmult\t%d\ttotal(sec)\t%lf\tMFlops/process\t%lf\n", matvecs,total,matvecs*plqcd_g.VOLUME/2.0*1200/total/1e+6); } //--------------------------------------------- //2: blocking sse3 with intrinsics version //--------------------------------------------- matvecs=0; total=0.0; mytotal =0.0; while(mytotal < 30) { MPI_Barrier(MPI_COMM_WORLD); for(i=0; i<Nmul; i++) { t1=plqcd_hopping_matrix_eo_sse3_intrin_blocking(pin,pout,ufield); t2=plqcd_hopping_matrix_oe_sse3_intrin_blocking(pin,pout,ufield); mytotal += t1+t2; } matvecs += Nmul; } MPI_Reduce(&mytotal,&total,1,MPI_DOUBLE,MPI_SUM,0, MPI_COMM_WORLD); MPI_Bcast(&total,1,MPI_DOUBLE,0, MPI_COMM_WORLD); if (proc_id==0) { total /= (double)(NPROCS); } if(proc_id==0) { fprintf(ofp,"blocking sse3 with intrinsics version:\n"); fprintf(ofp,"------------------------------------------\n"); fprintf(ofp,"test_hopping\tmult\t%d\ttotal(sec)\t%lf\tMFlops/process\t%lf\n", matvecs,total,matvecs*plqcd_g.VOLUME/2.0*1200/total/1e+6); } #endif #ifdef AVX //--------------------------------------------- //2: avx version //--------------------------------------------- matvecs=0; total=0.0; mytotal =0.0; t1=plqcd_hopping_matrix_eo_intrin_256(pin_256,pout_256,ufield_256); while(mytotal < 30) { MPI_Barrier(MPI_COMM_WORLD); for(i=0; i<Nmul; i++) { t1=plqcd_hopping_matrix_eo_intrin_256(pin_256,pout_256,ufield_256); t2=plqcd_hopping_matrix_oe_intrin_256(pin_256,pout_256,ufield_256); mytotal += t1+t2; } matvecs += Nmul; } MPI_Reduce(&mytotal,&total,1,MPI_DOUBLE,MPI_SUM,0, MPI_COMM_WORLD); MPI_Bcast(&total,1,MPI_DOUBLE,0, MPI_COMM_WORLD); if (proc_id==0) { total /= (double)(NPROCS); } if(proc_id==0) { fprintf(ofp,"avxversion:\n"); fprintf(ofp,"------------------------------------------\n"); fprintf(ofp,"test_hopping\tmult\t%d\ttotal(sec)\t%lf\tMFlops/process\t%lf\n", matvecs,total,matvecs*plqcd_g.VOLUME/2.0*1200/total/1e+6); } #endif #ifdef MIC #ifdef TEST_HOPPING_MIC //--------------------------------------------- //3: MIC version full su3 matrix //--------------------------------------------- matvecs=0; total=0.0; mytotal =0.0; t1=plqcd_hopping_matrix_eo_single_mic(pin_512,pout_512,ufield_512); while(mytotal < 30) { MPI_Barrier(MPI_COMM_WORLD); for(i=0; i<Nmul; i++) { //t1=plqcd_hopping_matrix_eo_intrin_512(pin_512,pout_512,ufield_512); //t2=plqcd_hopping_matrix_oe_intrin_512(pin_512,pout_512,ufield_512); t1=plqcd_hopping_matrix_eo_single_mic(pin_512,pout_512,ufield_512); t2=plqcd_hopping_matrix_eo_single_mic(pin_512,pout_512,ufield_512); mytotal += t1+t2; } matvecs += 2*Nmul; } MPI_Reduce(&mytotal,&total,1,MPI_DOUBLE,MPI_SUM,0, MPI_COMM_WORLD); MPI_Bcast(&total,1,MPI_DOUBLE,0, MPI_COMM_WORLD); if (proc_id==0) { total /= (double)(NPROCS); } if(proc_id==0) { fprintf(ofp,"mic version, 3x3 links:\n"); fprintf(ofp,"------------------------------------------\n"); fprintf(ofp,"test_hopping\tmult\t%d\ttotal(sec)\t%lf\tMFlops/process\t%lf\n", matvecs,total,(double )matvecs*plqcd_g.VOLUME/2.0*1200/total/1e+6); } //--------------------------------------------- //3: MIC version full reduced su3 storage //--------------------------------------------- matvecs=0; total=0.0; mytotal =0.0; t1=plqcd_hopping_matrix_eo_single_mic_short(pin_512,pout_512,ufield_512); while(mytotal < 30) { MPI_Barrier(MPI_COMM_WORLD); for(i=0; i<Nmul; i++) { //t1=plqcd_hopping_matrix_eo_intrin_512(pin_512,pout_512,ufield_512); //t2=plqcd_hopping_matrix_oe_intrin_512(pin_512,pout_512,ufield_512); t1=plqcd_hopping_matrix_eo_single_mic_short(pin_512,pout_512,ufield_512); t2=plqcd_hopping_matrix_eo_single_mic_short(pin_512,pout_512,ufield_512); mytotal += t1+t2; } matvecs += 2*Nmul; } MPI_Reduce(&mytotal,&total,1,MPI_DOUBLE,MPI_SUM,0, MPI_COMM_WORLD); MPI_Bcast(&total,1,MPI_DOUBLE,0, MPI_COMM_WORLD); if (proc_id==0) { total /= (double)(NPROCS); } if(proc_id==0) { fprintf(ofp,"mic version, 2x3 links:\n"); fprintf(ofp,"------------------------------------------\n"); fprintf(ofp,"test_hopping\tmult\t%d\ttotal(sec)\t%lf\tMFlops/process\t%lf\n", matvecs,total,(double )matvecs*plqcd_g.VOLUME/2.0*1200/total/1e+6); } #endif #ifdef TEST_SU3MUL_MIC matvecs=0; total=0.0; mytotal =0.0; //while(mytotal < 10) //{ MPI_Barrier(MPI_COMM_WORLD); for(i=0; i<Nmul; i++) { t1=stop_watch(0.0); #ifdef _OPENMP #pragma omp parallel { #endif __m512d U[3][3], gin[3],gout[3]; su3_512 *u0; su3_vector_512 *hin,*hout; #ifdef _OPENMP #pragma omp for #endif for(j=0; j< plqcd_g.VOLUME/4; j++) { u0 = &ufield_512[4*j]; hin = &pin_512[j].s0; hout= &pout_512[j].s0; intrin_su3_load_512(U,u0); intrin_vector_load_512(gin,hin); intrin_su3_multiply_512(gout,U,gin); intrin_vector_store_512(hout,gout); u0++; hin++; hout++; intrin_su3_load_512(U,u0); intrin_vector_load_512(gin,hin); intrin_su3_multiply_512(gout,U,gin); intrin_vector_store_512(hout,gout); u0++; hin++; hout++; intrin_su3_load_512(U,u0); intrin_vector_load_512(gin,hin); intrin_su3_multiply_512(gout,U,gin); intrin_vector_store_512(hout,gout); u0++; hin++; hout++; intrin_su3_load_512(U,u0); intrin_vector_load_512(gin,hin); intrin_su3_multiply_512(gout,U,gin); intrin_vector_store_512(hout,gout); } #ifdef _OPENMP } #endif t2 = stop_watch(t1); mytotal += t2; } matvecs += 4*Nmul*plqcd_g.VOLUME; //} MPI_Reduce(&mytotal,&total,1,MPI_DOUBLE,MPI_SUM,0, MPI_COMM_WORLD); MPI_Bcast(&total,1,MPI_DOUBLE,0, MPI_COMM_WORLD); if (proc_id==0) { total /= (double)(NPROCS); } if(proc_id==0) { fprintf(ofp,"su3mul mic version:\n"); fprintf(ofp,"------------------------------------------\n"); fprintf(ofp,"test_hopping\tmult\t%d\ttotal(sec)\t%lf\tMFlops/process\t%lf\n", matvecs,total,matvecs*66.0/total/1e+6); } #endif #endif //MIC finalize_plqcd(); return 0; }
int main(int argc,char *argv[]) { int irw,isp,ispp[2],status[6],mnkv; int bs[4],Ns,nmx,nkv,nmr,ncy,ninv; double kappa,m0,dm,mu0,mu,res,mres; double sqne,sqnp[2]; complex_dble lnw1[2],lnr,dr,drmx; solver_parms_t sp; mrw_masses_t ms; MPI_Init(&argc,&argv); MPI_Comm_rank(MPI_COMM_WORLD,&my_rank); if (my_rank==0) { flog=freopen("check2.log","w",stdout); fin=freopen("check2.in","r",stdin); printf("\n"); printf("Direct check of mrw2\n"); printf("----------------------\n\n"); printf("%dx%dx%dx%d lattice, ",NPROC0*L0,NPROC1*L1,NPROC2*L2,NPROC3*L3); printf("%dx%dx%dx%d process grid, ",NPROC0,NPROC1,NPROC2,NPROC3); printf("%dx%dx%dx%d local lattice\n\n",L0,L1,L2,L3); } mnkv=0; mres=0.0; for (isp=0;isp<3;isp++) { read_solver_parms(isp); sp=solver_parms(isp); if (sp.res>mres) mres=sp.res; if (sp.nkv>mnkv) mnkv=sp.nkv; } read_bc_parms(); if (my_rank==0) { find_section("SAP"); read_line("bs","%d %d %d %d",bs,bs+1,bs+2,bs+3); } MPI_Bcast(bs,4,MPI_INT,0,MPI_COMM_WORLD); set_sap_parms(bs,0,1,1); if (my_rank==0) { find_section("Deflation subspace"); read_line("bs","%d %d %d %d",bs,bs+1,bs+2,bs+3); read_line("Ns","%d",&Ns); } MPI_Bcast(bs,4,MPI_INT,0,MPI_COMM_WORLD); MPI_Bcast(&Ns,1,MPI_INT,0,MPI_COMM_WORLD); set_dfl_parms(bs,Ns); if (my_rank==0) { find_section("Deflation subspace generation"); read_line("kappa","%lf",&kappa); read_line("mu","%lf",&mu); read_line("ninv","%d",&ninv); read_line("nmr","%d",&nmr); read_line("ncy","%d",&ncy); } MPI_Bcast(&kappa,1,MPI_DOUBLE,0,MPI_COMM_WORLD); MPI_Bcast(&mu,1,MPI_DOUBLE,0,MPI_COMM_WORLD); MPI_Bcast(&ninv,1,MPI_INT,0,MPI_COMM_WORLD); MPI_Bcast(&nmr,1,MPI_INT,0,MPI_COMM_WORLD); MPI_Bcast(&ncy,1,MPI_INT,0,MPI_COMM_WORLD); set_dfl_gen_parms(kappa,mu,ninv,nmr,ncy); if (my_rank==0) { find_section("Deflation projection"); read_line("nkv","%d",&nkv); read_line("nmx","%d",&nmx); read_line("res","%lf",&res); fclose(fin); } MPI_Bcast(&nkv,1,MPI_INT,0,MPI_COMM_WORLD); MPI_Bcast(&nmx,1,MPI_INT,0,MPI_COMM_WORLD); MPI_Bcast(&res,1,MPI_DOUBLE,0,MPI_COMM_WORLD); set_dfl_pro_parms(nkv,nmx,res); set_lat_parms(6.0,1.0,0,NULL,1.234); print_solver_parms(status,status+1); print_sap_parms(0); print_dfl_parms(0); start_ranlux(0,1245); geometry(); mnkv=2*mnkv+2; if (mnkv<(Ns+2)) mnkv=Ns+2; if (mnkv<5) mnkv=5; alloc_ws(mnkv); alloc_wsd(7); alloc_wv(2*nkv+2); alloc_wvd(4); drmx.re=0.0; drmx.im=0.0; for (irw=0;irw<3;irw++) { dm=1.0e-2; for (isp=0;isp<3;isp++) { ispp[0]=isp; ispp[1]=isp; if (isp==0) { m0=1.0877; mu0=0.1; } else if (isp==1) { m0=0.0877; mu0=0.01; } else { m0=-0.0123; mu0=0.001; } random_ud(); if (isp==2) { dfl_modes(status); error_root(status[0]<0,1,"main [check2.c]", "dfl_modes failed"); } if (irw==0) { ms.m1=m0; ms.d1=dm; ms.mu1=mu0; ms.m2=m0; ms.d2=dm; ms.mu2=mu0; lnr=mrw2(ms,0,ispp,lnw1,sqnp,&sqne,status); dr.re=fabs(lnw1[0].re-lnw1[1].re); dr.im=fabs(lnw1[0].im-lnw1[1].im); lnr=mrw2(ms,1,ispp,lnw1,sqnp,&sqne,status); dr.re+=fabs(lnr.re-(2.0*mu0*dm+dm*dm)*sqnp[0]); dr.re+=fabs(lnw1[0].re-lnw1[1].re); dr.re+=fabs(sqnp[0]-sqnp[1]); dr.im+=fabs(lnr.im); dr.im+=fabs(lnw1[0].im-lnw1[1].im); } else if (irw==1) { ms.m1=m0; ms.d1=dm; ms.mu1=mu0; ms.m2=m0; ms.d2=-dm; ms.mu2=mu0; lnr=mrw2(ms,0,ispp,lnw1,sqnp,&sqne,status); dr.re=fabs(lnw1[0].re+lnw1[1].re); dr.im=fabs(lnw1[0].im+lnw1[1].im); lnr=mrw2(ms,1,ispp,lnw1,sqnp,&sqne,status); dr.re+=fabs(lnr.re+dm*dm*sqnp[0]); dr.re+=fabs(lnw1[0].re+lnw1[1].re); dr.re+=fabs(sqnp[0]-sqnp[1]); dr.im+=fabs(lnr.im-2.0*lnw1[0].im); dr.im+=fabs(lnw1[0].im+lnw1[1].im); } else { ms.m1=m0; ms.d1=dm; ms.mu1=mu0; ms.m2=m0+dm; ms.d2=-dm; ms.mu2=mu0; lnr=mrw2(ms,0,ispp,lnw1,sqnp,&sqne,status); dr.re=fabs(lnr.re); dr.im=fabs(lnr.im); } if (dr.re>drmx.re) drmx.re=dr.re; if (dr.im>drmx.im) drmx.im=dr.im; if (my_rank==0) { if (irw==0) printf("mrw2(d2=d1): "); else if (irw==1) printf("mrw2(d2=-d1): "); else printf("mrw2(m2=m1+d1,d2=-d1): "); if ((isp==0)||(isp==1)) printf("status = %d\n",status[0]); else if (isp==2) printf("status = (%d,%d,%d)\n", status[0],status[1],status[2]); printf("diff = %.1e + i%.1e\n\n",dr.re,dr.im); } error_chk(); } } if (my_rank==0) { printf("\n"); printf("max diff = %.1e + i%.1e\n",drmx.re,drmx.im); printf("(should be smaller than %.1e)\n\n",mres*sqrt((double)(VOLUME*NPROC*24))); fclose(flog); } MPI_Finalize(); exit(0); }
int main(int argc,char *argv[]) { int j,j_max,k,k_max = 1; #ifdef HAVE_LIBLEMON paramsXlfInfo *xlfInfo; #endif int status = 0; static double t1,t2,dt,sdt,dts,qdt,sqdt; double antioptaway=0.0; #ifdef MPI static double dt2; DUM_DERI = 6; DUM_SOLVER = DUM_DERI+2; DUM_MATRIX = DUM_SOLVER+6; NO_OF_SPINORFIELDS = DUM_MATRIX+2; # ifdef OMP int mpi_thread_provided; MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_thread_provided); # else MPI_Init(&argc, &argv); # endif MPI_Comm_rank(MPI_COMM_WORLD, &g_proc_id); #else g_proc_id = 0; #endif g_rgi_C1 = 1.; /* Read the input file */ if((status = read_input("benchmark.input")) != 0) { fprintf(stderr, "Could not find input file: benchmark.input\nAborting...\n"); exit(-1); } #ifdef OMP if(omp_num_threads > 0) { omp_set_num_threads(omp_num_threads); } else { if( g_proc_id == 0 ) printf("# No value provided for OmpNumThreads, running in single-threaded mode!\n"); omp_num_threads = 1; omp_set_num_threads(omp_num_threads); } init_omp_accumulators(omp_num_threads); #endif tmlqcd_mpi_init(argc, argv); if(g_proc_id==0) { #ifdef SSE printf("# The code was compiled with SSE instructions\n"); #endif #ifdef SSE2 printf("# The code was compiled with SSE2 instructions\n"); #endif #ifdef SSE3 printf("# The code was compiled with SSE3 instructions\n"); #endif #ifdef P4 printf("# The code was compiled for Pentium4\n"); #endif #ifdef OPTERON printf("# The code was compiled for AMD Opteron\n"); #endif #ifdef _GAUGE_COPY printf("# The code was compiled with -D_GAUGE_COPY\n"); #endif #ifdef BGL printf("# The code was compiled for Blue Gene/L\n"); #endif #ifdef BGP printf("# The code was compiled for Blue Gene/P\n"); #endif #ifdef _USE_HALFSPINOR printf("# The code was compiled with -D_USE_HALFSPINOR\n"); #endif #ifdef _USE_SHMEM printf("# The code was compiled with -D_USE_SHMEM\n"); # ifdef _PERSISTENT printf("# The code was compiled for persistent MPI calls (halfspinor only)\n"); # endif #endif #ifdef MPI # ifdef _NON_BLOCKING printf("# The code was compiled for non-blocking MPI calls (spinor and gauge)\n"); # endif #endif printf("\n"); fflush(stdout); } #ifdef _GAUGE_COPY init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 1); #else init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 0); #endif init_geometry_indices(VOLUMEPLUSRAND + g_dbw2rand); if(even_odd_flag) { j = init_spinor_field(VOLUMEPLUSRAND/2, 2*k_max+1); } else { j = init_spinor_field(VOLUMEPLUSRAND, 2*k_max); } if ( j!= 0) { fprintf(stderr, "Not enough memory for spinor fields! Aborting...\n"); exit(0); } j = init_moment_field(VOLUME, VOLUMEPLUSRAND + g_dbw2rand); if ( j!= 0) { fprintf(stderr, "Not enough memory for moment fields! Aborting...\n"); exit(0); } if(g_proc_id == 0) { fprintf(stdout,"# The number of processes is %d \n",g_nproc); printf("# The lattice size is %d x %d x %d x %d\n", (int)(T*g_nproc_t), (int)(LX*g_nproc_x), (int)(LY*g_nproc_y), (int)(g_nproc_z*LZ)); printf("# The local lattice size is %d x %d x %d x %d\n", (int)(T), (int)(LX), (int)(LY),(int) LZ); if(even_odd_flag) { printf("# benchmarking the even/odd preconditioned Dirac operator\n"); } else { printf("# benchmarking the standard Dirac operator\n"); } fflush(stdout); } /* define the geometry */ geometry(); /* define the boundary conditions for the fermion fields */ boundary(g_kappa); #ifdef _USE_HALFSPINOR j = init_dirac_halfspinor(); if ( j!= 0) { fprintf(stderr, "Not enough memory for halfspinor fields! Aborting...\n"); exit(0); } if(g_sloppy_precision_flag == 1) { g_sloppy_precision = 1; j = init_dirac_halfspinor32(); if ( j!= 0) { fprintf(stderr, "Not enough memory for 32-Bit halfspinor fields! Aborting...\n"); exit(0); } } # if (defined _PERSISTENT) init_xchange_halffield(); # endif #endif status = check_geometry(); if (status != 0) { fprintf(stderr, "Checking of geometry failed. Unable to proceed.\nAborting....\n"); exit(1); } #if (defined MPI && !(defined _USE_SHMEM)) check_xchange(); #endif start_ranlux(1, 123456); random_gauge_field(reproduce_randomnumber_flag); #ifdef MPI /*For parallelization: exchange the gaugefield */ xchange_gauge(g_gauge_field); #endif if(even_odd_flag) { /*initialize the pseudo-fermion fields*/ j_max=2048; sdt=0.; for (k = 0; k < k_max; k++) { random_spinor_field(g_spinor_field[k], VOLUME/2, 0); } while(sdt < 30.) { #ifdef MPI MPI_Barrier(MPI_COMM_WORLD); #endif t1 = gettime(); antioptaway=0.0; for (j=0;j<j_max;j++) { for (k=0;k<k_max;k++) { Hopping_Matrix(0, g_spinor_field[k+k_max], g_spinor_field[k]); Hopping_Matrix(1, g_spinor_field[2*k_max], g_spinor_field[k+k_max]); antioptaway+=creal(g_spinor_field[2*k_max][0].s0.c0); } } t2 = gettime(); dt = t2-t1; #ifdef MPI MPI_Allreduce (&dt, &sdt, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); #else sdt = dt; #endif qdt=dt*dt; #ifdef MPI MPI_Allreduce (&qdt, &sqdt, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); #else sqdt = qdt; #endif sdt=sdt/((double)g_nproc); sqdt=sqrt(sqdt/g_nproc-sdt*sdt); j_max*=2; } j_max=j_max/2; dts=dt; sdt=1.0e6f*sdt/((double)(k_max*j_max*(VOLUME))); sqdt=1.0e6f*sqdt/((double)(k_max*j_max*(VOLUME))); if(g_proc_id==0) { printf("# The following result is just to make sure that the calculation is not optimized away: %e\n", antioptaway); printf("# Total compute time %e sec, variance of the time %e sec. (%d iterations).\n", sdt, sqdt, j_max); printf("# Communication switched on:\n# (%d Mflops [%d bit arithmetic])\n", (int)(1608.0f/sdt),(int)sizeof(spinor)/3); #ifdef OMP printf("# Mflops per OpenMP thread ~ %d\n",(int)(1608.0f/(omp_num_threads*sdt))); #endif printf("\n"); fflush(stdout); } #ifdef MPI /* isolated computation */ t1 = gettime(); antioptaway=0.0; for (j=0;j<j_max;j++) { for (k=0;k<k_max;k++) { Hopping_Matrix_nocom(0, g_spinor_field[k+k_max], g_spinor_field[k]); Hopping_Matrix_nocom(1, g_spinor_field[2*k_max], g_spinor_field[k+k_max]); antioptaway += creal(g_spinor_field[2*k_max][0].s0.c0); } } t2 = gettime(); dt2 = t2-t1; /* compute the bandwidth */ dt=dts-dt2; MPI_Allreduce (&dt, &sdt, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); sdt=sdt/((double)g_nproc); MPI_Allreduce (&dt2, &dt, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); dt=dt/((double)g_nproc); dt=1.0e6f*dt/((double)(k_max*j_max*(VOLUME))); if(g_proc_id==0) { printf("# The following result is printed just to make sure that the calculation is not optimized away: %e\n",antioptaway); printf("# Communication switched off: \n# (%d Mflops [%d bit arithmetic])\n", (int)(1608.0f/dt),(int)sizeof(spinor)/3); #ifdef OMP printf("# Mflops per OpenMP thread ~ %d\n",(int)(1608.0f/(omp_num_threads*dt))); #endif printf("\n"); fflush(stdout); } sdt=sdt/((double)k_max); sdt=sdt/((double)j_max); sdt=sdt/((double)(2*SLICE)); if(g_proc_id==0) { printf("# The size of the package is %d bytes.\n",(SLICE)*192); #ifdef _USE_HALFSPINOR printf("# The bandwidth is %5.2f + %5.2f MB/sec\n", 192./sdt/1024/1024, 192./sdt/1024./1024); #else printf("# The bandwidth is %5.2f + %5.2f MB/sec\n", 2.*192./sdt/1024/1024, 2.*192./sdt/1024./1024); #endif } #endif fflush(stdout); } else { /* the non even/odd case now */ /*initialize the pseudo-fermion fields*/ j_max=1; sdt=0.; for (k=0;k<k_max;k++) { random_spinor_field(g_spinor_field[k], VOLUME, 0); } while(sdt < 3.) { #ifdef MPI MPI_Barrier(MPI_COMM_WORLD); #endif t1 = gettime(); for (j=0;j<j_max;j++) { for (k=0;k<k_max;k++) { D_psi(g_spinor_field[k+k_max], g_spinor_field[k]); antioptaway+=creal(g_spinor_field[k+k_max][0].s0.c0); } } t2 = gettime(); dt=t2-t1; #ifdef MPI MPI_Allreduce (&dt, &sdt, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); #else sdt = dt; #endif qdt=dt*dt; #ifdef MPI MPI_Allreduce (&qdt, &sqdt, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); #else sqdt = qdt; #endif sdt=sdt/((double)g_nproc); sqdt=sqrt(sqdt/g_nproc-sdt*sdt); j_max*=2; } j_max=j_max/2; dts=dt; sdt=1.0e6f*sdt/((double)(k_max*j_max*(VOLUME))); sqdt=1.0e6f*sqdt/((double)(k_max*j_max*(VOLUME))); if(g_proc_id==0) { printf("# The following result is just to make sure that the calculation is not optimized away: %e\n", antioptaway); printf("# Total compute time %e sec, variance of the time %e sec. (%d iterations).\n", sdt, sqdt, j_max); printf("\n# (%d Mflops [%d bit arithmetic])\n", (int)(1680.0f/sdt),(int)sizeof(spinor)/3); #ifdef OMP printf("# Mflops per OpenMP thread ~ %d\n",(int)(1680.0f/(omp_num_threads*sdt))); #endif printf("\n"); fflush(stdout); } } #ifdef HAVE_LIBLEMON if(g_proc_id==0) { printf("# Performing parallel IO test ...\n"); } xlfInfo = construct_paramsXlfInfo(0.5, 0); write_gauge_field( "conf.test", 64, xlfInfo); free(xlfInfo); if(g_proc_id==0) { printf("# done ...\n"); } #endif #ifdef MPI MPI_Finalize(); #endif #ifdef OMP free_omp_accumulators(); #endif free_gauge_field(); free_geometry_indices(); free_spinor_field(); free_moment_field(); return(0); }
int main(int argc, char *argv[]) { FILE *parameterfile = NULL; int c, j; char * filename = NULL; char datafilename[50]; char parameterfilename[50]; char conf_filename[50]; char * input_filename = NULL; char * xlfmessage = NULL; char * gaugelfn = NULL; char * gaugecksum = NULL; double plaquette_energy; #ifdef _KOJAK_INST #pragma pomp inst init #pragma pomp inst begin(main) #endif #ifdef HAVE_LIBLEMON MPI_File fh; LemonWriter *lemonWriter; paramsXlfInfo *xlfInfo; paramsPropagatorFormat *propagatorFormat; #endif #if (defined SSE || defined SSE2 || SSE3) signal(SIGILL, &catch_ill_inst); #endif DUM_DERI = 6; /* DUM_DERI + 2 is enough (not 7) */ DUM_SOLVER = DUM_DERI + 3; DUM_MATRIX = DUM_SOLVER + 8; /* DUM_MATRIX + 2 is enough (not 6) */ NO_OF_SPINORFIELDS = DUM_MATRIX + 2; verbose = 0; g_use_clover_flag = 0; #ifdef MPI MPI_Init(&argc, &argv); #endif while ((c = getopt(argc, argv, "h?f:o:")) != -1) { switch (c) { case 'f': input_filename = calloc(200, sizeof(char)); strcpy(input_filename, optarg); break; case 'o': filename = calloc(200, sizeof(char)); strcpy(filename, optarg); break; case 'h': case '?': default: usage(); break; } } if (input_filename == NULL) { input_filename = "hmc.input"; } if (filename == NULL) { filename = "output"; } /* Read the input file */ read_input(input_filename); if (solver_flag == 12 && even_odd_flag == 1) { even_odd_flag = 0; if (g_proc_id == 0) { fprintf(stderr, "CGMMS works only without even/odd! Forcing!\n"); } } /* this DBW2 stuff is not needed for the inversion ! */ if (g_dflgcr_flag == 1) { even_odd_flag = 0; } g_rgi_C1 = 0; if (Nsave == 0) { Nsave = 1; } if(g_running_phmc) { NO_OF_SPINORFIELDS = DUM_MATRIX + 8; } mpi_init(argc, argv); g_dbw2rand = 0; /* starts the single and double precision random number */ /* generator */ start_ranlux(rlxd_level, random_seed); #ifndef MPI g_dbw2rand = 0; #endif #ifdef _GAUGE_COPY j = init_gauge_field(VOLUMEPLUSRAND, 1); #else j = init_gauge_field(VOLUMEPLUSRAND, 0); #endif if(j != 0) { fprintf(stderr, "Not enough memory for gauge_fields! Aborting...\n"); exit(-1); } j = init_geometry_indices(VOLUMEPLUSRAND); if(j != 0) { fprintf(stderr, "Not enough memory for geometry indices! Aborting...\n"); exit(-1); } if(no_monomials > 0) { if(even_odd_flag) { j = init_monomials(VOLUMEPLUSRAND / 2, even_odd_flag); } else { j = init_monomials(VOLUMEPLUSRAND, even_odd_flag); } if(j != 0) { fprintf(stderr, "Not enough memory for monomial pseudo fermion fields! Aborting...\n"); exit(0); } } if(even_odd_flag) { j = init_spinor_field(VOLUMEPLUSRAND / 2, NO_OF_SPINORFIELDS); } else { j = init_spinor_field(VOLUMEPLUSRAND, NO_OF_SPINORFIELDS); } if(j != 0) { fprintf(stderr, "Not enough memory for spinor fields! Aborting...\n"); exit(-1); } if(g_running_phmc) { j = init_chi_up_spinor_field(VOLUMEPLUSRAND / 2, 20); if(j != 0) { fprintf(stderr, "Not enough memory for PHMC Chi_up fields! Aborting...\n"); exit(0); } j = init_chi_dn_spinor_field(VOLUMEPLUSRAND / 2, 20); if(j != 0) { fprintf(stderr, "Not enough memory for PHMC Chi_dn fields! Aborting...\n"); exit(0); } } g_mu = g_mu1; if(g_proc_id == 0) { /*construct the filenames for the observables and the parameters*/ strcpy(datafilename, filename); strcat(datafilename, ".data"); strcpy(parameterfilename, filename); strcat(parameterfilename, ".para"); parameterfile = fopen(parameterfilename, "w"); write_first_messages(parameterfile, 1); fclose(parameterfile); } /* this is for the extra masses of the CGMMS */ if (solver_flag == 12 && g_no_extra_masses > 0) { if ((parameterfile = fopen("extra_masses.input", "r")) != NULL) { for (j = 0; j < g_no_extra_masses; j++) { fscanf(parameterfile, "%lf", &g_extra_masses[j]); if (g_proc_id == 0 && g_debug_level > 0) { printf("# g_extra_masses[%d] = %lf\n", j, g_extra_masses[j]); } } fclose(parameterfile); } else { fprintf(stderr, "Could not open file extra_masses.input!\n"); g_no_extra_masses = 0; } } /* define the geometry */ geometry(); /* define the boundary conditions for the fermion fields */ boundary(g_kappa); phmc_invmaxev = 1.; #ifdef _USE_HALFSPINOR j = init_dirac_halfspinor(); if (j != 0) { fprintf(stderr, "Not enough memory for halffield! Aborting...\n"); exit(-1); } if (g_sloppy_precision_flag == 1) { j = init_dirac_halfspinor32(); if (j != 0) { fprintf(stderr, "Not enough memory for 32-Bit halffield! Aborting...\n"); exit(-1); } } # if (defined _PERSISTENT) if (even_odd_flag) { init_xchange_halffield(); } # endif #endif for (j = 0; j < Nmeas; j++) { sprintf(conf_filename, "%s.%.4d", gauge_input_filename, nstore); if (g_proc_id == 0) { printf("Reading Gauge field from file %s\n", conf_filename); fflush(stdout); } #ifdef HAVE_LIBLEMON read_lemon_gauge_field_parallel(conf_filename, &gaugecksum, &xlfmessage, &gaugelfn); #else /* HAVE_LIBLEMON */ if (xlfmessage != (char*)NULL) free(xlfmessage); if (gaugelfn != (char*)NULL) free(gaugelfn); if (gaugecksum != (char*)NULL) free(gaugecksum); read_lime_gauge_field(conf_filename); xlfmessage = read_message(conf_filename, "xlf-info"); gaugelfn = read_message(conf_filename, "ildg-data-lfn"); gaugecksum = read_message(conf_filename, "scidac-checksum"); printf("%s \n", gaugecksum); #endif /* HAVE_LIBLEMON */ if (g_proc_id == 0) { printf("done!\n"); fflush(stdout); } /* unit_g_gauge_field(); */ #ifdef MPI xchange_gauge(g_gauge_field); #endif /*compute the energy of the gauge field*/ plaquette_energy = measure_gauge_action(); if (g_proc_id == 0) { printf("The plaquette value is %e\n", plaquette_energy / (6.*VOLUME*g_nproc)); fflush(stdout); } if (use_stout_flag == 1) { if (stout_smear_gauge_field(stout_rho , stout_no_iter) != 0) { exit(1) ; } plaquette_energy = measure_gauge_action(); if (g_proc_id == 0) { printf("The plaquette value after stouting is %e\n", plaquette_energy / (6.*VOLUME*g_nproc)); fflush(stdout); } } /* Compute minimal eigenvalues, necessary for overlap! */ if (compute_evs != 0) eigenvalues(&no_eigenvalues, max_solver_iterations, eigenvalue_precision, 0, compute_evs, nstore, even_odd_flag); else { compute_evs = 1; no_eigenvalues = 1; eigenvalues(&no_eigenvalues, max_solver_iterations, eigenvalue_precision, 0, compute_evs, nstore, even_odd_flag); no_eigenvalues = 0; compute_evs = 0; } if (phmc_compute_evs != 0) { #ifdef MPI MPI_Finalize(); #endif return (0); } /* here we can do something */ ov_n_cheby = (-log(delta))/(2*sqrt(ev_minev)); printf("// Degree of cheby polynomial: %d\n", ov_n_cheby); // g_mu = 0.; ov_check_locality(); // ov_check_ginsparg_wilson_relation_strong(); // ov_compare_4x4("overlap.mat"); // ov_compare_12x12("overlap.mat"); // ov_save_12x12("overlap.mat"); // ov_check_operator(1,0,0,0); nstore += Nsave; } #ifdef MPI MPI_Finalize(); #endif free_blocks(); free_dfl_subspace(); free_gauge_field(); free_geometry_indices(); free_spinor_field(); free_moment_field(); if (g_running_phmc) { free_chi_up_spinor_field(); free_chi_dn_spinor_field(); } return(0); #ifdef _KOJAK_INST #pragma pomp inst end(main) #endif }
int main(int argc, char *argv[]) { FILE *parameterfile = NULL; int j, i, ix = 0, isample = 0, op_id = 0; char datafilename[206]; char parameterfilename[206]; char conf_filename[50]; char * input_filename = NULL; char * filename = NULL; double plaquette_energy; struct stout_parameters params_smear; #ifdef _KOJAK_INST #pragma pomp inst init #pragma pomp inst begin(main) #endif #if (defined SSE || defined SSE2 || SSE3) signal(SIGILL, &catch_ill_inst); #endif DUM_DERI = 8; DUM_MATRIX = DUM_DERI + 5; NO_OF_SPINORFIELDS = DUM_MATRIX + 4; //4 extra fields (corresponding to DUM_MATRIX+0..5) for deg. and ND matrix mult. NO_OF_SPINORFIELDS_32 = 6; verbose = 0; g_use_clover_flag = 0; process_args(argc,argv,&input_filename,&filename); set_default_filenames(&input_filename, &filename); init_parallel_and_read_input(argc, argv, input_filename); /* this DBW2 stuff is not needed for the inversion ! */ if (g_dflgcr_flag == 1) { even_odd_flag = 0; } g_rgi_C1 = 0; if (Nsave == 0) { Nsave = 1; } if (g_running_phmc) { NO_OF_SPINORFIELDS = DUM_MATRIX + 8; } tmlqcd_mpi_init(argc, argv); g_dbw2rand = 0; /* starts the single and double precision random number */ /* generator */ start_ranlux(rlxd_level, random_seed^nstore); /* we need to make sure that we don't have even_odd_flag = 1 */ /* if any of the operators doesn't use it */ /* in this way even/odd can still be used by other operators */ for(j = 0; j < no_operators; j++) if(!operator_list[j].even_odd_flag) even_odd_flag = 0; #ifndef TM_USE_MPI g_dbw2rand = 0; #endif #ifdef _GAUGE_COPY j = init_gauge_field(VOLUMEPLUSRAND, 1); j += init_gauge_field_32(VOLUMEPLUSRAND, 1); #else j = init_gauge_field(VOLUMEPLUSRAND, 0); j += init_gauge_field_32(VOLUMEPLUSRAND, 0); #endif if (j != 0) { fprintf(stderr, "Not enough memory for gauge_fields! Aborting...\n"); exit(-1); } j = init_geometry_indices(VOLUMEPLUSRAND); if (j != 0) { fprintf(stderr, "Not enough memory for geometry indices! Aborting...\n"); exit(-1); } if (no_monomials > 0) { if (even_odd_flag) { j = init_monomials(VOLUMEPLUSRAND / 2, even_odd_flag); } else { j = init_monomials(VOLUMEPLUSRAND, even_odd_flag); } if (j != 0) { fprintf(stderr, "Not enough memory for monomial pseudo fermion fields! Aborting...\n"); exit(-1); } } if (even_odd_flag) { j = init_spinor_field(VOLUMEPLUSRAND / 2, NO_OF_SPINORFIELDS); j += init_spinor_field_32(VOLUMEPLUSRAND / 2, NO_OF_SPINORFIELDS_32); } else { j = init_spinor_field(VOLUMEPLUSRAND, NO_OF_SPINORFIELDS); j += init_spinor_field_32(VOLUMEPLUSRAND, NO_OF_SPINORFIELDS_32); } if (j != 0) { fprintf(stderr, "Not enough memory for spinor fields! Aborting...\n"); exit(-1); } if (g_running_phmc) { j = init_chi_spinor_field(VOLUMEPLUSRAND / 2, 20); if (j != 0) { fprintf(stderr, "Not enough memory for PHMC Chi fields! Aborting...\n"); exit(-1); } } g_mu = g_mu1; if (g_cart_id == 0) { /*construct the filenames for the observables and the parameters*/ strncpy(datafilename, filename, 200); strcat(datafilename, ".data"); strncpy(parameterfilename, filename, 200); strcat(parameterfilename, ".para"); parameterfile = fopen(parameterfilename, "w"); write_first_messages(parameterfile, "invert", git_hash); fclose(parameterfile); } /* define the geometry */ geometry(); /* define the boundary conditions for the fermion fields */ boundary(g_kappa); phmc_invmaxev = 1.; init_operators(); /* list and initialize measurements*/ if(g_proc_id == 0) { printf("\n"); for(int j = 0; j < no_measurements; j++) { printf("# measurement id %d, type = %d\n", j, measurement_list[j].type); } } init_measurements(); /* this could be maybe moved to init_operators */ #ifdef _USE_HALFSPINOR j = init_dirac_halfspinor(); if (j != 0) { fprintf(stderr, "Not enough memory for halffield! Aborting...\n"); exit(-1); } /* for mixed precision solvers, the 32 bit halfspinor field must always be there */ j = init_dirac_halfspinor32(); if (j != 0) { fprintf(stderr, "Not enough memory for 32-bit halffield! Aborting...\n"); exit(-1); } # if (defined _PERSISTENT) if (even_odd_flag) init_xchange_halffield(); # endif #endif for (j = 0; j < Nmeas; j++) { sprintf(conf_filename, "%s.%.4d", gauge_input_filename, nstore); if (g_cart_id == 0) { printf("#\n# Trying to read gauge field from file %s in %s precision.\n", conf_filename, (gauge_precision_read_flag == 32 ? "single" : "double")); fflush(stdout); } if( (i = read_gauge_field(conf_filename,g_gauge_field)) !=0) { fprintf(stderr, "Error %d while reading gauge field from %s\n Aborting...\n", i, conf_filename); exit(-2); } if (g_cart_id == 0) { printf("# Finished reading gauge field.\n"); fflush(stdout); } #ifdef TM_USE_MPI xchange_gauge(g_gauge_field); #endif /*Convert to a 32 bit gauge field, after xchange*/ convert_32_gauge_field(g_gauge_field_32, g_gauge_field, VOLUMEPLUSRAND); /*compute the energy of the gauge field*/ plaquette_energy = measure_plaquette( (const su3**) g_gauge_field); if (g_cart_id == 0) { printf("# The computed plaquette value is %e.\n", plaquette_energy / (6.*VOLUME*g_nproc)); fflush(stdout); } if (use_stout_flag == 1){ params_smear.rho = stout_rho; params_smear.iterations = stout_no_iter; /* if (stout_smear((su3_tuple*)(g_gauge_field[0]), ¶ms_smear, (su3_tuple*)(g_gauge_field[0])) != 0) */ /* exit(1) ; */ g_update_gauge_copy = 1; plaquette_energy = measure_plaquette( (const su3**) g_gauge_field); if (g_cart_id == 0) { printf("# The plaquette value after stouting is %e\n", plaquette_energy / (6.*VOLUME*g_nproc)); fflush(stdout); } } /* if any measurements are defined in the input file, do them here */ measurement * meas; for(int imeas = 0; imeas < no_measurements; imeas++){ meas = &measurement_list[imeas]; if (g_proc_id == 0) { fprintf(stdout, "#\n# Beginning online measurement.\n"); } meas->measurefunc(nstore, imeas, even_odd_flag); } if (reweighting_flag == 1) { reweighting_factor(reweighting_samples, nstore); } /* Compute minimal eigenvalues, if wanted */ if (compute_evs != 0) { eigenvalues(&no_eigenvalues, 5000, eigenvalue_precision, 0, compute_evs, nstore, even_odd_flag); } if (phmc_compute_evs != 0) { #ifdef TM_USE_MPI MPI_Finalize(); #endif return(0); } /* Compute the mode number or topological susceptibility using spectral projectors, if wanted*/ if(compute_modenumber != 0 || compute_topsus !=0){ invert_compute_modenumber(); } // set up blocks if Deflation is used if (g_dflgcr_flag) init_blocks(nblocks_t, nblocks_x, nblocks_y, nblocks_z); if(SourceInfo.type == SRC_TYPE_VOL || SourceInfo.type == SRC_TYPE_PION_TS || SourceInfo.type == SRC_TYPE_GEN_PION_TS) { index_start = 0; index_end = 1; } g_precWS=NULL; if(use_preconditioning == 1){ /* todo load fftw wisdom */ #if (defined HAVE_FFTW ) && !( defined TM_USE_MPI) loadFFTWWisdom(g_spinor_field[0],g_spinor_field[1],T,LX); #else use_preconditioning=0; #endif } if (g_cart_id == 0) { fprintf(stdout, "#\n"); /*Indicate starting of the operator part*/ } for(op_id = 0; op_id < no_operators; op_id++) { boundary(operator_list[op_id].kappa); g_kappa = operator_list[op_id].kappa; g_mu = operator_list[op_id].mu; g_c_sw = operator_list[op_id].c_sw; // DFLGCR and DFLFGMRES if(operator_list[op_id].solver == DFLGCR || operator_list[op_id].solver == DFLFGMRES) { generate_dfl_subspace(g_N_s, VOLUME, reproduce_randomnumber_flag); } if(use_preconditioning==1 && PRECWSOPERATORSELECT[operator_list[op_id].solver]!=PRECWS_NO ){ printf("# Using preconditioning with treelevel preconditioning operator: %s \n", precWSOpToString(PRECWSOPERATORSELECT[operator_list[op_id].solver])); /* initial preconditioning workspace */ operator_list[op_id].precWS=(spinorPrecWS*)malloc(sizeof(spinorPrecWS)); spinorPrecWS_Init(operator_list[op_id].precWS, operator_list[op_id].kappa, operator_list[op_id].mu/2./operator_list[op_id].kappa, -(0.5/operator_list[op_id].kappa-4.), PRECWSOPERATORSELECT[operator_list[op_id].solver]); g_precWS = operator_list[op_id].precWS; if(PRECWSOPERATORSELECT[operator_list[op_id].solver] == PRECWS_D_DAGGER_D) { fitPrecParams(op_id); } } for(isample = 0; isample < no_samples; isample++) { for (ix = index_start; ix < index_end; ix++) { if (g_cart_id == 0) { fprintf(stdout, "#\n"); /*Indicate starting of new index*/ } /* we use g_spinor_field[0-7] for sources and props for the moment */ /* 0-3 in case of 1 flavour */ /* 0-7 in case of 2 flavours */ prepare_source(nstore, isample, ix, op_id, read_source_flag, source_location, random_seed); //randmize initial guess for eigcg if needed-----experimental if( (operator_list[op_id].solver == INCREIGCG) && (operator_list[op_id].solver_params.eigcg_rand_guess_opt) ){ //randomize the initial guess gaussian_volume_source( operator_list[op_id].prop0, operator_list[op_id].prop1,isample,ix,0); //need to check this } operator_list[op_id].inverter(op_id, index_start, 1); } } if(use_preconditioning==1 && operator_list[op_id].precWS!=NULL ){ /* free preconditioning workspace */ spinorPrecWS_Free(operator_list[op_id].precWS); free(operator_list[op_id].precWS); } if(operator_list[op_id].type == OVERLAP){ free_Dov_WS(); } } nstore += Nsave; } #ifdef TM_USE_OMP free_omp_accumulators(); #endif free_blocks(); free_dfl_subspace(); free_gauge_field(); free_gauge_field_32(); free_geometry_indices(); free_spinor_field(); free_spinor_field_32(); free_moment_field(); free_chi_spinor_field(); free(filename); free(input_filename); free(SourceInfo.basename); free(PropInfo.basename); #ifdef TM_USE_QUDA _endQuda(); #endif #ifdef TM_USE_MPI MPI_Barrier(MPI_COMM_WORLD); MPI_Finalize(); #endif return(0); #ifdef _KOJAK_INST #pragma pomp inst end(main) #endif }