int main(int argc,char *argv[]) { #ifdef _USE_HALFSPINOR #undef _USE_HALFSPINOR printf("# WARNING: USE_HALFSPINOR will be ignored (not supported here).\n"); #endif if(even_odd_flag) { even_odd_flag=0; printf("# WARNING: even_odd_flag will be ignored (not supported here).\n"); } int j,j_max,k,k_max = 1; #ifdef HAVE_LIBLEMON paramsXlfInfo *xlfInfo; #endif int status = 0; static double t1,t2,dt,sdt,dts,qdt,sqdt; double antioptaway=0.0; #ifdef MPI static double dt2; DUM_DERI = 6; DUM_SOLVER = DUM_DERI+2; DUM_MATRIX = DUM_SOLVER+6; NO_OF_SPINORFIELDS = DUM_MATRIX+2; # ifdef OMP int mpi_thread_provided; MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_thread_provided); # else MPI_Init(&argc, &argv); # endif MPI_Comm_rank(MPI_COMM_WORLD, &g_proc_id); #else g_proc_id = 0; #endif g_rgi_C1 = 1.; /* Read the input file */ if((status = read_input("test_Dslash.input")) != 0) { fprintf(stderr, "Could not find input file: test_Dslash.input\nAborting...\n"); exit(-1); } #ifdef OMP init_openmp(); #endif tmlqcd_mpi_init(argc, argv); if(g_proc_id==0) { #ifdef SSE printf("# The code was compiled with SSE instructions\n"); #endif #ifdef SSE2 printf("# The code was compiled with SSE2 instructions\n"); #endif #ifdef SSE3 printf("# The code was compiled with SSE3 instructions\n"); #endif #ifdef P4 printf("# The code was compiled for Pentium4\n"); #endif #ifdef OPTERON printf("# The code was compiled for AMD Opteron\n"); #endif #ifdef _GAUGE_COPY printf("# The code was compiled with -D_GAUGE_COPY\n"); #endif #ifdef BGL printf("# The code was compiled for Blue Gene/L\n"); #endif #ifdef BGP printf("# The code was compiled for Blue Gene/P\n"); #endif #ifdef _USE_HALFSPINOR printf("# The code was compiled with -D_USE_HALFSPINOR\n"); #endif #ifdef _USE_SHMEM printf("# The code was compiled with -D_USE_SHMEM\n"); # ifdef _PERSISTENT printf("# The code was compiled for persistent MPI calls (halfspinor only)\n"); # endif #endif #ifdef MPI # ifdef _NON_BLOCKING printf("# The code was compiled for non-blocking MPI calls (spinor and gauge)\n"); # endif #endif printf("\n"); fflush(stdout); } #ifdef _GAUGE_COPY init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 1); #else init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 0); #endif init_geometry_indices(VOLUMEPLUSRAND + g_dbw2rand); if(even_odd_flag) { j = init_spinor_field(VOLUMEPLUSRAND/2, 2*k_max+1); } else { j = init_spinor_field(VOLUMEPLUSRAND, 2*k_max); } if ( j!= 0) { fprintf(stderr, "Not enough memory for spinor fields! Aborting...\n"); exit(0); } j = init_moment_field(VOLUME, VOLUMEPLUSRAND + g_dbw2rand); if ( j!= 0) { fprintf(stderr, "Not enough memory for moment fields! Aborting...\n"); exit(0); } if(g_proc_id == 0) { fprintf(stdout,"# The number of processes is %d \n",g_nproc); printf("# The lattice size is %d x %d x %d x %d\n", (int)(T*g_nproc_t), (int)(LX*g_nproc_x), (int)(LY*g_nproc_y), (int)(g_nproc_z*LZ)); printf("# The local lattice size is %d x %d x %d x %d\n", (int)(T), (int)(LX), (int)(LY),(int) LZ); // if(even_odd_flag) { // printf("# benchmarking the even/odd preconditioned Dirac operator\n"); // } // else { // printf("# benchmarking the standard Dirac operator\n"); // } fflush(stdout); } /* define the geometry */ geometry(); /* define the boundary conditions for the fermion fields */ boundary(g_kappa); #ifdef _USE_HALFSPINOR j = init_dirac_halfspinor(); if ( j!= 0) { fprintf(stderr, "Not enough memory for halfspinor fields! Aborting...\n"); exit(0); } if(g_sloppy_precision_flag == 1) { g_sloppy_precision = 1; j = init_dirac_halfspinor32(); if ( j!= 0) { fprintf(stderr, "Not enough memory for 32-Bit halfspinor fields! Aborting...\n"); exit(0); } } # if (defined _PERSISTENT) init_xchange_halffield(); # endif #endif status = check_geometry(); if (status != 0) { fprintf(stderr, "Checking of geometry failed. Unable to proceed.\nAborting....\n"); exit(1); } #if (defined MPI && !(defined _USE_SHMEM)) check_xchange(); #endif start_ranlux(1, 123456); random_gauge_field(reproduce_randomnumber_flag, g_gauge_field); #ifdef MPI /*For parallelization: exchange the gaugefield */ xchange_gauge(g_gauge_field); #endif /* the non even/odd case now */ /*initialize the pseudo-fermion fields*/ j_max=1; sdt=0.; for (k=0;k<k_max;k++) { random_spinor_field_lexic(g_spinor_field[k], reproduce_randomnumber_flag, RN_GAUSS); } #ifdef MPI MPI_Barrier(MPI_COMM_WORLD); #endif t1 = gettime(); /* here the actual Dslash */ D_psi(g_spinor_field[0], g_spinor_field[1]); t2 = gettime(); dt=t2-t1; #ifdef MPI MPI_Allreduce (&dt, &sdt, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); #else sdt = dt; #endif if(g_proc_id==0) { printf("# Time for Dslash %e sec.\n", sdt); printf("\n"); fflush(stdout); } #ifdef HAVE_LIBLEMON if(g_proc_id==0) { printf("# Performing parallel IO test ...\n"); } xlfInfo = construct_paramsXlfInfo(0.5, 0); write_gauge_field( "conf.test", 64, xlfInfo); free(xlfInfo); if(g_proc_id==0) { printf("# done ...\n"); } #endif #ifdef OMP free_omp_accumulators(); #endif free_gauge_field(); free_geometry_indices(); free_spinor_field(); free_moment_field(); #ifdef MPI MPI_Barrier(MPI_COMM_WORLD); MPI_Finalize(); #endif return(0); }
int main(int argc,char *argv[]) { int j,j_max,k,k_max = 2; paramsXlfInfo *xlfInfo; int ix, n, *nn,*mm,i; double delta, deltamax; spinor rsp; int status = 0; #ifdef MPI DUM_DERI = 6; DUM_SOLVER = DUM_DERI+2; DUM_MATRIX = DUM_SOLVER+6; NO_OF_SPINORFIELDS = DUM_MATRIX+2; MPI_Init(&argc, &argv); #endif g_rgi_C1 = 1.; /* Read the input file */ read_input("hopping_test.input"); tmlqcd_mpi_init(argc, argv); if(g_proc_id==0) { #ifdef SSE printf("# The code was compiled with SSE instructions\n"); #endif #ifdef SSE2 printf("# The code was compiled with SSE2 instructions\n"); #endif #ifdef SSE3 printf("# The code was compiled with SSE3 instructions\n"); #endif #ifdef P4 printf("# The code was compiled for Pentium4\n"); #endif #ifdef OPTERON printf("# The code was compiled for AMD Opteron\n"); #endif #ifdef _GAUGE_COPY printf("# The code was compiled with -D_GAUGE_COPY\n"); #endif #ifdef BGL printf("# The code was compiled for Blue Gene/L\n"); #endif #ifdef BGP printf("# The code was compiled for Blue Gene/P\n"); #endif #ifdef _USE_HALFSPINOR printf("# The code was compiled with -D_USE_HALFSPINOR\n"); #endif #ifdef _USE_SHMEM printf("# the code was compiled with -D_USE_SHMEM\n"); # ifdef _PERSISTENT printf("# the code was compiled for persistent MPI calls (halfspinor only)\n"); # endif #endif #ifdef _INDEX_INDEP_GEOM printf("# the code was compiled with index independent geometry\n"); #endif #ifdef MPI # ifdef _NON_BLOCKING printf("# the code was compiled for non-blocking MPI calls (spinor and gauge)\n"); # endif # ifdef _USE_TSPLITPAR printf("# the code was compiled with tsplit parallelization\n"); # endif #endif printf("\n"); fflush(stdout); } #ifdef _GAUGE_COPY init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 1); #else init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 0); #endif init_geometry_indices(VOLUMEPLUSRAND + g_dbw2rand); if(even_odd_flag) { j = init_spinor_field(VOLUMEPLUSRAND/2, 2*k_max+1); } else { j = init_spinor_field(VOLUMEPLUSRAND, 2*k_max); } if ( j!= 0) { fprintf(stderr, "Not enough memory for spinor fields! Aborting...\n"); exit(0); } j = init_moment_field(VOLUME, VOLUMEPLUSRAND); if ( j!= 0) { fprintf(stderr, "Not enough memory for moment fields! Aborting...\n"); exit(0); } if(g_proc_id == 0) { fprintf(stdout,"The number of processes is %d \n",g_nproc); printf("# The lattice size is %d x %d x %d x %d\n", (int)(T*g_nproc_t), (int)(LX*g_nproc_x), (int)(LY*g_nproc_y), (int)(g_nproc_z*LZ)); printf("# The local lattice size is %d x %d x %d x %d\n", (int)(T), (int)(LX), (int)(LY),(int) LZ); if(even_odd_flag) { printf("# testinging the even/odd preconditioned Dirac operator\n"); } else { printf("# testinging the standard Dirac operator\n"); } fflush(stdout); } /* define the geometry */ geometry(); /* define the boundary conditions for the fermion fields */ boundary(g_kappa); #ifdef _USE_HALFSPINOR j = init_dirac_halfspinor(); if ( j!= 0) { fprintf(stderr, "Not enough memory for halfspinor fields! Aborting...\n"); exit(0); } if(g_sloppy_precision_flag == 1) { g_sloppy_precision = 1; j = init_dirac_halfspinor32(); if ( j!= 0) { fprintf(stderr, "Not enough memory for 32-Bit halfspinor fields! Aborting...\n"); exit(0); } } # if (defined _PERSISTENT) init_xchange_halffield(); # endif #endif status = check_geometry(); if (status != 0) { fprintf(stderr, "Checking of geometry failed. Unable to proceed.\nAborting....\n"); exit(1); } #if (defined MPI && !(defined _USE_SHMEM)) check_xchange(); #endif start_ranlux(1, 123456); xlfInfo = construct_paramsXlfInfo(0.5, 0); random_gauge_field(reproduce_randomnumber_flag); if ( startoption == 2 ) { /* restart */ write_gauge_field(gauge_input_filename,gauge_precision_write_flag,xlfInfo); } else if ( startoption == 0 ) { /* cold */ unit_g_gauge_field(); } else if (startoption == 3 ) { /* continue */ read_gauge_field(gauge_input_filename); } else if ( startoption == 1 ) { /* hot */ } #ifdef MPI /*For parallelization: exchange the gaugefield */ xchange_gauge(); #endif #ifdef _GAUGE_COPY update_backward_gauge(); #endif if(even_odd_flag) { /*initialize the pseudo-fermion fields*/ j_max=1; for (k = 0; k < k_max; k++) { random_spinor_field(g_spinor_field[k], VOLUME/2, 0); } if (read_source_flag == 2) { /* save */ /* even first, odd second */ write_spinorfield_cm_single(g_spinor_field[0],g_spinor_field[1],SourceInfo.basename); } else if (read_source_flag == 1) { /* yes */ /* even first, odd second */ read_spinorfield_cm_single(g_spinor_field[0],g_spinor_field[1],SourceInfo.basename,-1,0); # if (!defined MPI) if (write_cp_flag == 1) { strcat(SourceInfo.basename,".2"); read_spinorfield_cm_single(g_spinor_field[2],g_spinor_field[3],SourceInfo.basename,-1,0); nn=(int*)calloc(VOLUME,sizeof(int)); if((void*)nn == NULL) return(100); mm=(int*)calloc(VOLUME,sizeof(int)); if((void*)mm == NULL) return(100); n=0; deltamax=0.0; for(ix=0;ix<VOLUME/2;ix++){ (rsp.s0).c0.re = (g_spinor_field[2][ix].s0).c0.re - (g_spinor_field[0][ix].s0).c0.re; (rsp.s0).c0.im = (g_spinor_field[2][ix].s0).c0.im - (g_spinor_field[0][ix].s0).c0.im; (rsp.s0).c1.re = (g_spinor_field[2][ix].s0).c1.re - (g_spinor_field[0][ix].s0).c1.re; (rsp.s0).c1.im = (g_spinor_field[2][ix].s0).c1.im - (g_spinor_field[0][ix].s0).c1.im; (rsp.s0).c2.re = (g_spinor_field[2][ix].s0).c2.re - (g_spinor_field[0][ix].s0).c2.re; (rsp.s0).c2.im = (g_spinor_field[2][ix].s0).c2.im - (g_spinor_field[0][ix].s0).c2.im; (rsp.s1).c0.re = (g_spinor_field[2][ix].s1).c0.re - (g_spinor_field[0][ix].s1).c0.re; (rsp.s1).c0.im = (g_spinor_field[2][ix].s1).c0.im - (g_spinor_field[0][ix].s1).c0.im; (rsp.s1).c1.re = (g_spinor_field[2][ix].s1).c1.re - (g_spinor_field[0][ix].s1).c1.re; (rsp.s1).c1.im = (g_spinor_field[2][ix].s1).c1.im - (g_spinor_field[0][ix].s1).c1.im; (rsp.s1).c2.re = (g_spinor_field[2][ix].s1).c2.re - (g_spinor_field[0][ix].s1).c2.re; (rsp.s1).c2.im = (g_spinor_field[2][ix].s1).c2.im - (g_spinor_field[0][ix].s1).c2.im; (rsp.s2).c0.re = (g_spinor_field[2][ix].s2).c0.re - (g_spinor_field[0][ix].s2).c0.re; (rsp.s2).c0.im = (g_spinor_field[2][ix].s2).c0.im - (g_spinor_field[0][ix].s2).c0.im; (rsp.s2).c1.re = (g_spinor_field[2][ix].s2).c1.re - (g_spinor_field[0][ix].s2).c1.re; (rsp.s2).c1.im = (g_spinor_field[2][ix].s2).c1.im - (g_spinor_field[0][ix].s2).c1.im; (rsp.s2).c2.re = (g_spinor_field[2][ix].s2).c2.re - (g_spinor_field[0][ix].s2).c2.re; (rsp.s2).c2.im = (g_spinor_field[2][ix].s2).c2.im - (g_spinor_field[0][ix].s2).c2.im; (rsp.s3).c0.re = (g_spinor_field[2][ix].s3).c0.re - (g_spinor_field[0][ix].s3).c0.re; (rsp.s3).c0.im = (g_spinor_field[2][ix].s3).c0.im - (g_spinor_field[0][ix].s3).c0.im; (rsp.s3).c1.re = (g_spinor_field[2][ix].s3).c1.re - (g_spinor_field[0][ix].s3).c1.re; (rsp.s3).c1.im = (g_spinor_field[2][ix].s3).c1.im - (g_spinor_field[0][ix].s3).c1.im; (rsp.s3).c2.re = (g_spinor_field[2][ix].s3).c2.re - (g_spinor_field[0][ix].s3).c2.re; (rsp.s3).c2.im = (g_spinor_field[2][ix].s3).c2.im - (g_spinor_field[0][ix].s3).c2.im; _spinor_norm_sq(delta,rsp); if (delta > 1.0e-12) { nn[n] = g_eo2lexic[ix]; mm[n]=ix; n++; } if(delta>deltamax) deltamax=delta; } if (n>0){ printf("mismatch in even spincolorfield in %d points:\n",n); for(i=0; i< MIN(n,1000); i++){ printf("%d,(%d,%d,%d,%d):%f vs. %f\n",nn[i],g_coord[nn[i]][0],g_coord[nn[i]][1],g_coord[nn[i]][2],g_coord[nn[i]][3],(g_spinor_field[2][mm[i]].s0).c0.re, (g_spinor_field[0][mm[i]].s0).c0.re);fflush(stdout); } } n = 0; for(ix=0;ix<VOLUME/2;ix++){ (rsp.s0).c0.re = (g_spinor_field[3][ix].s0).c0.re - (g_spinor_field[1][ix].s0).c0.re; (rsp.s0).c0.im = (g_spinor_field[3][ix].s0).c0.im - (g_spinor_field[1][ix].s0).c0.im; (rsp.s0).c1.re = (g_spinor_field[3][ix].s0).c1.re - (g_spinor_field[1][ix].s0).c1.re; (rsp.s0).c1.im = (g_spinor_field[3][ix].s0).c1.im - (g_spinor_field[1][ix].s0).c1.im; (rsp.s0).c2.re = (g_spinor_field[3][ix].s0).c2.re - (g_spinor_field[1][ix].s0).c2.re; (rsp.s0).c2.im = (g_spinor_field[3][ix].s0).c2.im - (g_spinor_field[1][ix].s0).c2.im; (rsp.s1).c0.re = (g_spinor_field[3][ix].s1).c0.re - (g_spinor_field[1][ix].s1).c0.re; (rsp.s1).c0.im = (g_spinor_field[3][ix].s1).c0.im - (g_spinor_field[1][ix].s1).c0.im; (rsp.s1).c1.re = (g_spinor_field[3][ix].s1).c1.re - (g_spinor_field[1][ix].s1).c1.re; (rsp.s1).c1.im = (g_spinor_field[3][ix].s1).c1.im - (g_spinor_field[1][ix].s1).c1.im; (rsp.s1).c2.re = (g_spinor_field[3][ix].s1).c2.re - (g_spinor_field[1][ix].s1).c2.re; (rsp.s1).c2.im = (g_spinor_field[3][ix].s1).c2.im - (g_spinor_field[1][ix].s1).c2.im; (rsp.s2).c0.re = (g_spinor_field[3][ix].s2).c0.re - (g_spinor_field[1][ix].s2).c0.re; (rsp.s2).c0.im = (g_spinor_field[3][ix].s2).c0.im - (g_spinor_field[1][ix].s2).c0.im; (rsp.s2).c1.re = (g_spinor_field[3][ix].s2).c1.re - (g_spinor_field[1][ix].s2).c1.re; (rsp.s2).c1.im = (g_spinor_field[3][ix].s2).c1.im - (g_spinor_field[1][ix].s2).c1.im; (rsp.s2).c2.re = (g_spinor_field[3][ix].s2).c2.re - (g_spinor_field[1][ix].s2).c2.re; (rsp.s2).c2.im = (g_spinor_field[3][ix].s2).c2.im - (g_spinor_field[1][ix].s2).c2.im; (rsp.s3).c0.re = (g_spinor_field[3][ix].s3).c0.re - (g_spinor_field[1][ix].s3).c0.re; (rsp.s3).c0.im = (g_spinor_field[3][ix].s3).c0.im - (g_spinor_field[1][ix].s3).c0.im; (rsp.s3).c1.re = (g_spinor_field[3][ix].s3).c1.re - (g_spinor_field[1][ix].s3).c1.re; (rsp.s3).c1.im = (g_spinor_field[3][ix].s3).c1.im - (g_spinor_field[1][ix].s3).c1.im; (rsp.s3).c2.re = (g_spinor_field[3][ix].s3).c2.re - (g_spinor_field[1][ix].s3).c2.re; (rsp.s3).c2.im = (g_spinor_field[3][ix].s3).c2.im - (g_spinor_field[1][ix].s3).c2.im; _spinor_norm_sq(delta,rsp); if (delta > 1.0e-12) { nn[n]=g_eo2lexic[ix+(VOLUME+RAND)/2]; mm[n]=ix; n++; } if(delta>deltamax) deltamax=delta; } if (n>0){ printf("mismatch in odd spincolorfield in %d points:\n",n); for(i=0; i< MIN(n,1000); i++){ printf("%d,(%d,%d,%d,%d):%f vs. %f\n",nn[i],g_coord[nn[i]][0],g_coord[nn[i]][1],g_coord[nn[i]][2],g_coord[nn[i]][3],(g_spinor_field[3][mm[i]].s0).c0.re, (g_spinor_field[1][mm[i]].s0).c0.re);fflush(stdout); } } printf("max delta=%e",deltamax);fflush(stdout); } # endif } if (read_source_flag > 0 && write_cp_flag == 0) { /* read-source yes or nobutsave; checkpoint no */ /* first spinorial arg is output, the second is input */ Hopping_Matrix(1, g_spinor_field[1], g_spinor_field[0]); /*ieo=1 M_{eo}*/ Hopping_Matrix(0, g_spinor_field[0], g_spinor_field[1]); /*ieo=0 M_{oe}*/ strcat(SourceInfo.basename,".out"); write_spinorfield_cm_single(g_spinor_field[0],g_spinor_field[1],SourceInfo.basename); printf("Check-field printed. Exiting...\n"); fflush(stdout); } #ifdef MPI MPI_Barrier(MPI_COMM_WORLD); MPI_Finalize(); #endif } free_gauge_field(); free_geometry_indices(); free_spinor_field(); free_moment_field(); return(0); }
int main(int argc,char *argv[]) { int j,j_max,k,k_max = 1; #ifdef HAVE_LIBLEMON paramsXlfInfo *xlfInfo; #endif int status = 0; static double t1,t2,dt,sdt,dts,qdt,sqdt; double antioptaway=0.0; #ifdef MPI static double dt2; DUM_DERI = 6; DUM_SOLVER = DUM_DERI+2; DUM_MATRIX = DUM_SOLVER+6; NO_OF_SPINORFIELDS = DUM_MATRIX+2; # ifdef OMP int mpi_thread_provided; MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_thread_provided); # else MPI_Init(&argc, &argv); # endif MPI_Comm_rank(MPI_COMM_WORLD, &g_proc_id); #else g_proc_id = 0; #endif g_rgi_C1 = 1.; /* Read the input file */ if((status = read_input("benchmark.input")) != 0) { fprintf(stderr, "Could not find input file: benchmark.input\nAborting...\n"); exit(-1); } #ifdef OMP if(omp_num_threads > 0) { omp_set_num_threads(omp_num_threads); } else { if( g_proc_id == 0 ) printf("# No value provided for OmpNumThreads, running in single-threaded mode!\n"); omp_num_threads = 1; omp_set_num_threads(omp_num_threads); } init_omp_accumulators(omp_num_threads); #endif tmlqcd_mpi_init(argc, argv); if(g_proc_id==0) { #ifdef SSE printf("# The code was compiled with SSE instructions\n"); #endif #ifdef SSE2 printf("# The code was compiled with SSE2 instructions\n"); #endif #ifdef SSE3 printf("# The code was compiled with SSE3 instructions\n"); #endif #ifdef P4 printf("# The code was compiled for Pentium4\n"); #endif #ifdef OPTERON printf("# The code was compiled for AMD Opteron\n"); #endif #ifdef _GAUGE_COPY printf("# The code was compiled with -D_GAUGE_COPY\n"); #endif #ifdef BGL printf("# The code was compiled for Blue Gene/L\n"); #endif #ifdef BGP printf("# The code was compiled for Blue Gene/P\n"); #endif #ifdef _USE_HALFSPINOR printf("# The code was compiled with -D_USE_HALFSPINOR\n"); #endif #ifdef _USE_SHMEM printf("# The code was compiled with -D_USE_SHMEM\n"); # ifdef _PERSISTENT printf("# The code was compiled for persistent MPI calls (halfspinor only)\n"); # endif #endif #ifdef MPI # ifdef _NON_BLOCKING printf("# The code was compiled for non-blocking MPI calls (spinor and gauge)\n"); # endif #endif printf("\n"); fflush(stdout); } #ifdef _GAUGE_COPY init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 1); #else init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 0); #endif init_geometry_indices(VOLUMEPLUSRAND + g_dbw2rand); if(even_odd_flag) { j = init_spinor_field(VOLUMEPLUSRAND/2, 2*k_max+1); } else { j = init_spinor_field(VOLUMEPLUSRAND, 2*k_max); } if ( j!= 0) { fprintf(stderr, "Not enough memory for spinor fields! Aborting...\n"); exit(0); } j = init_moment_field(VOLUME, VOLUMEPLUSRAND + g_dbw2rand); if ( j!= 0) { fprintf(stderr, "Not enough memory for moment fields! Aborting...\n"); exit(0); } if(g_proc_id == 0) { fprintf(stdout,"# The number of processes is %d \n",g_nproc); printf("# The lattice size is %d x %d x %d x %d\n", (int)(T*g_nproc_t), (int)(LX*g_nproc_x), (int)(LY*g_nproc_y), (int)(g_nproc_z*LZ)); printf("# The local lattice size is %d x %d x %d x %d\n", (int)(T), (int)(LX), (int)(LY),(int) LZ); if(even_odd_flag) { printf("# benchmarking the even/odd preconditioned Dirac operator\n"); } else { printf("# benchmarking the standard Dirac operator\n"); } fflush(stdout); } /* define the geometry */ geometry(); /* define the boundary conditions for the fermion fields */ boundary(g_kappa); #ifdef _USE_HALFSPINOR j = init_dirac_halfspinor(); if ( j!= 0) { fprintf(stderr, "Not enough memory for halfspinor fields! Aborting...\n"); exit(0); } if(g_sloppy_precision_flag == 1) { g_sloppy_precision = 1; j = init_dirac_halfspinor32(); if ( j!= 0) { fprintf(stderr, "Not enough memory for 32-Bit halfspinor fields! Aborting...\n"); exit(0); } } # if (defined _PERSISTENT) init_xchange_halffield(); # endif #endif status = check_geometry(); if (status != 0) { fprintf(stderr, "Checking of geometry failed. Unable to proceed.\nAborting....\n"); exit(1); } #if (defined MPI && !(defined _USE_SHMEM)) check_xchange(); #endif start_ranlux(1, 123456); random_gauge_field(reproduce_randomnumber_flag); #ifdef MPI /*For parallelization: exchange the gaugefield */ xchange_gauge(g_gauge_field); #endif if(even_odd_flag) { /*initialize the pseudo-fermion fields*/ j_max=2048; sdt=0.; for (k = 0; k < k_max; k++) { random_spinor_field(g_spinor_field[k], VOLUME/2, 0); } while(sdt < 30.) { #ifdef MPI MPI_Barrier(MPI_COMM_WORLD); #endif t1 = gettime(); antioptaway=0.0; for (j=0;j<j_max;j++) { for (k=0;k<k_max;k++) { Hopping_Matrix(0, g_spinor_field[k+k_max], g_spinor_field[k]); Hopping_Matrix(1, g_spinor_field[2*k_max], g_spinor_field[k+k_max]); antioptaway+=creal(g_spinor_field[2*k_max][0].s0.c0); } } t2 = gettime(); dt = t2-t1; #ifdef MPI MPI_Allreduce (&dt, &sdt, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); #else sdt = dt; #endif qdt=dt*dt; #ifdef MPI MPI_Allreduce (&qdt, &sqdt, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); #else sqdt = qdt; #endif sdt=sdt/((double)g_nproc); sqdt=sqrt(sqdt/g_nproc-sdt*sdt); j_max*=2; } j_max=j_max/2; dts=dt; sdt=1.0e6f*sdt/((double)(k_max*j_max*(VOLUME))); sqdt=1.0e6f*sqdt/((double)(k_max*j_max*(VOLUME))); if(g_proc_id==0) { printf("# The following result is just to make sure that the calculation is not optimized away: %e\n", antioptaway); printf("# Total compute time %e sec, variance of the time %e sec. (%d iterations).\n", sdt, sqdt, j_max); printf("# Communication switched on:\n# (%d Mflops [%d bit arithmetic])\n", (int)(1608.0f/sdt),(int)sizeof(spinor)/3); #ifdef OMP printf("# Mflops per OpenMP thread ~ %d\n",(int)(1608.0f/(omp_num_threads*sdt))); #endif printf("\n"); fflush(stdout); } #ifdef MPI /* isolated computation */ t1 = gettime(); antioptaway=0.0; for (j=0;j<j_max;j++) { for (k=0;k<k_max;k++) { Hopping_Matrix_nocom(0, g_spinor_field[k+k_max], g_spinor_field[k]); Hopping_Matrix_nocom(1, g_spinor_field[2*k_max], g_spinor_field[k+k_max]); antioptaway += creal(g_spinor_field[2*k_max][0].s0.c0); } } t2 = gettime(); dt2 = t2-t1; /* compute the bandwidth */ dt=dts-dt2; MPI_Allreduce (&dt, &sdt, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); sdt=sdt/((double)g_nproc); MPI_Allreduce (&dt2, &dt, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); dt=dt/((double)g_nproc); dt=1.0e6f*dt/((double)(k_max*j_max*(VOLUME))); if(g_proc_id==0) { printf("# The following result is printed just to make sure that the calculation is not optimized away: %e\n",antioptaway); printf("# Communication switched off: \n# (%d Mflops [%d bit arithmetic])\n", (int)(1608.0f/dt),(int)sizeof(spinor)/3); #ifdef OMP printf("# Mflops per OpenMP thread ~ %d\n",(int)(1608.0f/(omp_num_threads*dt))); #endif printf("\n"); fflush(stdout); } sdt=sdt/((double)k_max); sdt=sdt/((double)j_max); sdt=sdt/((double)(2*SLICE)); if(g_proc_id==0) { printf("# The size of the package is %d bytes.\n",(SLICE)*192); #ifdef _USE_HALFSPINOR printf("# The bandwidth is %5.2f + %5.2f MB/sec\n", 192./sdt/1024/1024, 192./sdt/1024./1024); #else printf("# The bandwidth is %5.2f + %5.2f MB/sec\n", 2.*192./sdt/1024/1024, 2.*192./sdt/1024./1024); #endif } #endif fflush(stdout); } else { /* the non even/odd case now */ /*initialize the pseudo-fermion fields*/ j_max=1; sdt=0.; for (k=0;k<k_max;k++) { random_spinor_field(g_spinor_field[k], VOLUME, 0); } while(sdt < 3.) { #ifdef MPI MPI_Barrier(MPI_COMM_WORLD); #endif t1 = gettime(); for (j=0;j<j_max;j++) { for (k=0;k<k_max;k++) { D_psi(g_spinor_field[k+k_max], g_spinor_field[k]); antioptaway+=creal(g_spinor_field[k+k_max][0].s0.c0); } } t2 = gettime(); dt=t2-t1; #ifdef MPI MPI_Allreduce (&dt, &sdt, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); #else sdt = dt; #endif qdt=dt*dt; #ifdef MPI MPI_Allreduce (&qdt, &sqdt, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); #else sqdt = qdt; #endif sdt=sdt/((double)g_nproc); sqdt=sqrt(sqdt/g_nproc-sdt*sdt); j_max*=2; } j_max=j_max/2; dts=dt; sdt=1.0e6f*sdt/((double)(k_max*j_max*(VOLUME))); sqdt=1.0e6f*sqdt/((double)(k_max*j_max*(VOLUME))); if(g_proc_id==0) { printf("# The following result is just to make sure that the calculation is not optimized away: %e\n", antioptaway); printf("# Total compute time %e sec, variance of the time %e sec. (%d iterations).\n", sdt, sqdt, j_max); printf("\n# (%d Mflops [%d bit arithmetic])\n", (int)(1680.0f/sdt),(int)sizeof(spinor)/3); #ifdef OMP printf("# Mflops per OpenMP thread ~ %d\n",(int)(1680.0f/(omp_num_threads*sdt))); #endif printf("\n"); fflush(stdout); } } #ifdef HAVE_LIBLEMON if(g_proc_id==0) { printf("# Performing parallel IO test ...\n"); } xlfInfo = construct_paramsXlfInfo(0.5, 0); write_gauge_field( "conf.test", 64, xlfInfo); free(xlfInfo); if(g_proc_id==0) { printf("# done ...\n"); } #endif #ifdef MPI MPI_Finalize(); #endif #ifdef OMP free_omp_accumulators(); #endif free_gauge_field(); free_geometry_indices(); free_spinor_field(); free_moment_field(); return(0); }