int main(int argc, char **argv) { int c, i, j, mu, nu; int count = 0; int filename_set = 0; int dims[4] = {0,0,0,0}; int l_LX_at, l_LXstart_at; int x0, x1, x2, x3, ix, iix, it; int sid, status, gid; double **corr=NULL, **corr2=NULL; double *tcorr=NULL, *tcorr2=NULL; double *work = (double*)NULL; double q[4], fnorm; int verbose = 0; int do_gt = 0; int nsource=0; char filename[100], contype[200]; double ratime, retime; double plaq; double spinor1[24], spinor2[24], U_[18]; double *gauge_trafo=(double*)NULL; double mom2, mom4; complex w, w1, *cp1, *cp2, *cp3; FILE *ofs; #ifdef MPI // MPI_Init(&argc, &argv); fprintf(stderr, "[jc_ud_x] Error, only non-mpi version implemented\n"); exit(1); #endif while ((c = getopt(argc, argv, "h?f:")) != -1) { switch (c) { case 'f': strcpy(filename, optarg); filename_set=1; break; case 'h': case '?': default: usage(); break; } } /* set the default values */ if(filename_set==0) strcpy(filename, "cvc.input"); fprintf(stdout, "# Reading input from file %s\n", filename); read_input_parser(filename); /* some checks on the input data */ if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) { if(g_proc_id==0) fprintf(stdout, "T and L's must be set\n"); usage(); } if(g_kappa == 0.) { if(g_proc_id==0) fprintf(stdout, "kappa should be > 0.n"); usage(); } fprintf(stdout, "\n**************************************************\n"); fprintf(stdout, "* jc_ud_x\n"); fprintf(stdout, "**************************************************\n\n"); /********************************* * initialize MPI parameters *********************************/ // mpi_init(argc, argv); /* initialize */ dims[0]=T_global; dims[1]=LX; dims[2]=LY; dims[3]=LZ; T = T_global; Tstart = 0; l_LX_at = LX; l_LXstart_at = 0; FFTW_LOC_VOLUME = T*LX*LY*LZ; fprintf(stdout, "# [%2d] parameters:\n"\ "# [%2d] T = %3d\n"\ "# [%2d] Tstart = %3d\n"\ "# [%2d] l_LX_at = %3d\n"\ "# [%2d] l_LXstart_at = %3d\n"\ "# [%2d] FFTW_LOC_VOLUME = %3d\n", g_cart_id, g_cart_id, T, g_cart_id, Tstart, g_cart_id, l_LX_at, g_cart_id, l_LXstart_at, g_cart_id, FFTW_LOC_VOLUME); if(init_geometry() != 0) { fprintf(stderr, "ERROR from init_geometry\n"); exit(1); } geometry(); /************************************************* * allocate mem for gauge field and spinor fields *************************************************/ alloc_gauge_field(&g_gauge_field, VOLUMEPLUSRAND); no_fields = 2; g_spinor_field = (double**)calloc(no_fields, sizeof(double*)); for(i=0; i<no_fields; i++) alloc_spinor_field(&g_spinor_field[i], VOLUMEPLUSRAND); /**************************************** * allocate memory for the contractions ****************************************/ nsource = (g_sourceid2 - g_sourceid + 1) / g_sourceid_step; if(g_cart_id==0) fprintf(stdout, "# nsource = %d\n", nsource); corr = (double**)calloc( nsource, sizeof(double*)); corr[0] = (double*)calloc( nsource*T*8, sizeof(double)); for(i=1;i<nsource;i++) corr[i] = corr[i-1] + 8*T; corr2 = (double**)calloc( nsource, sizeof(double*)); corr2[0] = (double*)calloc( nsource*8*T, sizeof(double)); for(i=1;i<nsource;i++) corr2[i] = corr2[i-1] + 8*T; tcorr = (double*)calloc(T*8, sizeof(double)); tcorr2 = (double*)calloc(T*8, sizeof(double)); /*********************************************** * start loop on gauge id.s ***********************************************/ for(gid=g_gaugeid; gid<=g_gaugeid2; gid++) { sprintf(filename, "%s.%.4d", gaugefilename_prefix, gid); if(g_cart_id==0) fprintf(stdout, "# reading gauge field from file %s\n", filename); read_lime_gauge_field_doubleprec(filename); xchange_gauge(); plaquette(&plaq); if(g_cart_id==0) fprintf(stdout, "# measured plaquette value: %25.16e\n", plaq); /* reset disc to zero */ for(ix=0; ix<nsource*8*T; ix++) corr[0][ix] = 0.; for(ix=0; ix<nsource*8*T; ix++) corr2[0][ix] = 0.; count=0; /*********************************************** * start loop on source id.s ***********************************************/ for(sid=g_sourceid; sid<=g_sourceid2; sid+=g_sourceid_step) { /* read the new propagator to g_spinor_field[0] */ ratime = (double)clock() / CLOCKS_PER_SEC; if(format==0) { sprintf(filename, "%s.%.4d.%.2d.inverted", filename_prefix, gid, sid); if(read_lime_spinor(g_spinor_field[0], filename, 0) != 0) break; } else if(format==1) { sprintf(filename, "%s.%.4d.%.5d.inverted", filename_prefix, gid, sid); if(read_cmi(g_spinor_field[0], filename) != 0) { fprintf(stderr, "\nError from read_cmi\n"); break; } } xchange_field(g_spinor_field[0]); retime = (double)clock() / CLOCKS_PER_SEC; if(g_cart_id==0) fprintf(stdout, "# time to read prop.: %e seconds\n", retime-ratime); ratime = (double)clock() / CLOCKS_PER_SEC; /* apply [1] = D_tm [0] */ Q_phi_tbc(g_spinor_field[1], g_spinor_field[0]); xchange_field(g_spinor_field[1]); retime = (double)clock() / CLOCKS_PER_SEC; if(g_cart_id==0) fprintf(stdout, "# time to apply D_W: %e seconds\n", retime-ratime); ratime = (double)clock() / CLOCKS_PER_SEC; /* calculate real and imaginary part */ for(mu=0; mu<4; mu++) { for(x0=0; x0<T; x0++) { for(x1=0; x1<LX; x1++) { for(x2=0; x2<LY; x2++) { for(x3=0; x3<LZ; x3++) { ix = g_ipt[x0][x1][x2][x3]; _cm_eq_cm_ti_co(U_, g_gauge_field+_GGI(ix,mu), &(co_phase_up[mu])); _fv_eq_cm_ti_fv(spinor1, U_, &g_spinor_field[0][_GSI(g_iup[ix][mu])]); _fv_eq_gamma_ti_fv(spinor2, mu, spinor1); _fv_mi_eq_fv(spinor2, spinor1); _co_eq_fv_dag_ti_fv(&w, &g_spinor_field[1][_GSI(ix)], spinor2); corr[count][2*(mu*T+x0) ] -= 0.5*w.re; corr[count][2*(mu*T+x0)+1] -= 0.5*w.im; _fv_eq_cm_dag_ti_fv(spinor1, U_, &g_spinor_field[0][_GSI(ix)]); _fv_eq_gamma_ti_fv(spinor2, mu, spinor1); _fv_pl_eq_fv(spinor2, spinor1); _co_eq_fv_dag_ti_fv(&w, &g_spinor_field[1][_GSI(g_iup[ix][mu])], spinor2); corr[count][2*(mu*T+x0) ] -= 0.5*w.re; corr[count][2*(mu*T+x0)+1] -= 0.5*w.im; _fv_eq_gamma_ti_fv(spinor1, mu, &g_spinor_field[0][_GSI(ix)]); _co_eq_fv_dag_ti_fv(&w, &g_spinor_field[1][_GSI(ix)], spinor1); corr2[count][2*(mu*T+x0) ] -= w.re; corr2[count][2*(mu*T+x0)+1] -= w.im; }}} } } // of mu count++; } // of sid retime = (double)clock() / CLOCKS_PER_SEC; if(g_cart_id==0) fprintf(stdout, "# time to calculate contractions: %e seconds\n", retime-ratime); for(ix=0;ix<8*T;ix++) tcorr[ix] = 0.; for(ix=0;ix<8*T;ix++) tcorr2[ix] = 0.; for(i=0;i<nsource-1;i++) { for(j=i+1;j<nsource;j++) { for(mu=0;mu<4;mu++) { for(x0=0;x0<T;x0++) { // times at source for(x1=0;x1<T;x1++) { // times at sink it = (x1 - x0 + T) % T; // conserved current tcorr[2*(mu*T+it) ] += corr[i][2*(mu*T+x1)] * corr[j][2*(mu*T+x0) ] - corr[i][2*(mu*T+x1)+1] * corr[j][2*(mu*T+x0)+1]; tcorr[2*(mu*T+it)+1] += corr[i][2*(mu*T+x1)] * corr[j][2*(mu*T+x0)+1] + corr[i][2*(mu*T+x1)+1] * corr[j][2*(mu*T+x0) ]; tcorr[2*(mu*T+it) ] += corr[j][2*(mu*T+x1)] * corr[i][2*(mu*T+x0) ] - corr[j][2*(mu*T+x1)+1] * corr[i][2*(mu*T+x0)+1]; tcorr[2*(mu*T+it)+1] += corr[j][2*(mu*T+x1)] * corr[i][2*(mu*T+x0)+1] + corr[j][2*(mu*T+x1)+1] * corr[i][2*(mu*T+x0) ]; // local current tcorr2[2*(mu*T+it) ] += corr2[i][2*(mu*T+x1)] * corr2[j][2*(mu*T+x0) ] - corr2[i][2*(mu*T+x1)+1] * corr2[j][2*(mu*T+x0)+1]; tcorr2[2*(mu*T+it)+1] += corr2[i][2*(mu*T+x1)] * corr2[j][2*(mu*T+x0)+1] + corr2[i][2*(mu*T+x1)+1] * corr2[j][2*(mu*T+x0) ]; tcorr2[2*(mu*T+it) ] += corr2[j][2*(mu*T+x1)] * corr2[i][2*(mu*T+x0) ] - corr2[j][2*(mu*T+x1)+1] * corr2[i][2*(mu*T+x0)+1]; tcorr2[2*(mu*T+it)+1] += corr2[j][2*(mu*T+x1)] * corr2[i][2*(mu*T+x0)+1] + corr2[j][2*(mu*T+x1)+1] * corr2[i][2*(mu*T+x0) ]; }} } }} fnorm = 1. / ( g_prop_normsqr * g_prop_normsqr * (double)(LX*LY*LZ) * (double)(LX*LY*LZ) * nsource * (nsource-1)); if(g_cart_id==0) fprintf(stdout, "X-fnorm = %e\n", fnorm); for(ix=0;ix<8*T;ix++) tcorr[ix] *= fnorm; for(ix=0;ix<8*T;ix++) tcorr2[ix] *= fnorm; /************************************************ * save results ************************************************/ if(g_cart_id == 0) fprintf(stdout, "# save results for gauge id %d and sid %d\n", gid, sid); /* save the result in position space */ sprintf(filename, "jc_u_tp0.%.4d.%.4d", gid, sid); ofs = fopen(filename, "w"); for(x0=0;x0<T;x0++) fprintf(ofs, "%d%25.16e%25.16e%25.16e%25.16e%25.16e%25.16e%25.16e%25.16e\n", x0, tcorr[2*(0*T+x0)], tcorr[2*(0*T+x0)+1], tcorr[2*(1*T+x0)], tcorr[2*(1*T+x0)+1], tcorr[2*(2*T+x0)], tcorr[2*(2*T+x0)+1], tcorr[2*(3*T+x0)], tcorr[2*(3*T+x0)+1]); fclose(ofs); } /* of loop on gid */ /*********************************************** * free the allocated memory, finalize ***********************************************/ free(g_gauge_field); for(i=0; i<no_fields; i++) free(g_spinor_field[i]); free(g_spinor_field); free_geometry(); free(corr); free(corr2); free(tcorr); free(tcorr2); return(0); }
int main(int argc, char **argv) { int c, i, mu, nu; int count = 0; int filename_set = 0; int dims[4] = {0,0,0,0}; int l_LX_at, l_LXstart_at; int x0, x1, x2, x3, ix, iix; int sid, status; double *disc = (double*)NULL; double *data = (double*)NULL; double *bias = (double*)NULL; double *work = (double*)NULL; double q[4], fnorm; int verbose = 0; char filename[100], contype[200]; double ratime, retime; double plaq; double spinor1[24], spinor2[24], U_[18]; complex w, w1, *cp1, *cp2, *cp3, *cp4; fftw_complex *in=(fftw_complex*)NULL; #ifdef MPI fftwnd_mpi_plan plan_p, plan_m; #else fftwnd_plan plan_p, plan_m; #endif #ifdef MPI MPI_Init(&argc, &argv); #endif while ((c = getopt(argc, argv, "h?vf:")) != -1) { switch (c) { case 'v': verbose = 1; break; case 'f': strcpy(filename, optarg); filename_set=1; break; case 'h': case '?': default: usage(); break; } } /* set the default values */ if(filename_set==0) strcpy(filename, "cvc.input"); fprintf(stdout, "# Reading input from file %s\n", filename); read_input_parser(filename); /* some checks on the input data */ if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) { if(g_proc_id==0) fprintf(stdout, "T and L's must be set\n"); usage(); } if(g_kappa <= 0.) { if(g_proc_id==0) fprintf(stdout, "kappa should be > 0.\n"); usage(); } if(hpe_order%2==0 && hpe_order>0) { if(g_proc_id==0) fprintf(stdout, "HPE order should be odd\n"); usage(); } fprintf(stdout, "\n**************************************************\n"\ "* vp_disc_hpe_stoch_subtract with HPE of order %d\n"\ "**************************************************\n\n", hpe_order); /********************************* * initialize MPI parameters *********************************/ mpi_init(argc, argv); /* initialize fftw */ dims[0]=T_global; dims[1]=LX; dims[2]=LY; dims[3]=LZ; #ifdef MPI plan_p = fftwnd_mpi_create_plan(g_cart_grid, 4, dims, FFTW_BACKWARD, FFTW_MEASURE); plan_m = fftwnd_mpi_create_plan(g_cart_grid, 4, dims, FFTW_FORWARD, FFTW_MEASURE); fftwnd_mpi_local_sizes(plan_p, &T, &Tstart, &l_LX_at, &l_LXstart_at, &FFTW_LOC_VOLUME); #else plan_p = fftwnd_create_plan(4, dims, FFTW_BACKWARD, FFTW_MEASURE | FFTW_IN_PLACE); plan_m = fftwnd_create_plan(4, dims, FFTW_FORWARD, FFTW_MEASURE | FFTW_IN_PLACE); T = T_global; Tstart = 0; l_LX_at = LX; l_LXstart_at = 0; FFTW_LOC_VOLUME = T*LX*LY*LZ; #endif fprintf(stdout, "# [%2d] fftw parameters:\n"\ "# [%2d] T = %3d\n"\ "# [%2d] Tstart = %3d\n"\ "# [%2d] l_LX_at = %3d\n"\ "# [%2d] l_LXstart_at = %3d\n"\ "# [%2d] FFTW_LOC_VOLUME = %3d\n", g_cart_id, g_cart_id, T, g_cart_id, Tstart, g_cart_id, l_LX_at, g_cart_id, l_LXstart_at, g_cart_id, FFTW_LOC_VOLUME); #ifdef MPI if(T==0) { fprintf(stderr, "[%2d] local T is zero; exit\n", g_cart_id); MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); exit(101); } #endif if(init_geometry() != 0) { fprintf(stderr, "ERROR from init_geometry\n"); #ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); #endif exit(102); } geometry(); /************************************************ * read the gauge field, measure the plaquette ************************************************/ alloc_gauge_field(&g_gauge_field, VOLUMEPLUSRAND); sprintf(filename, "%s.%.4d", gaugefilename_prefix, Nconf); if(g_cart_id==0) fprintf(stdout, "# reading gauge field from file %s\n", filename); read_lime_gauge_field_doubleprec(filename); xchange_gauge(); plaquette(&plaq); if(g_cart_id==0) fprintf(stdout, "# measured plaquette value: %25.16e\n", plaq); /**************************************** * allocate memory for the spinor fields ****************************************/ no_fields = 3; g_spinor_field = (double**)calloc(no_fields, sizeof(double*)); for(i=0; i<no_fields; i++) alloc_spinor_field(&g_spinor_field[i], VOLUMEPLUSRAND); /**************************************** * allocate memory for the contractions ****************************************/ disc = (double*)calloc(16*VOLUME, sizeof(double)); if( disc == (double*)NULL ) { fprintf(stderr, "could not allocate memory for disc\n"); # ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); # endif exit(103); } data = (double*)calloc(16*VOLUME, sizeof(double)); if( data== (double*)NULL ) { fprintf(stderr, "could not allocate memory for data\n"); # ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); # endif exit(104); } for(ix=0; ix<16*VOLUME; ix++) data[ix] = 0.; work = (double*)calloc(32*VOLUME, sizeof(double)); if( work == (double*)NULL ) { fprintf(stderr, "could not allocate memory for work\n"); # ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); # endif exit(105); } bias = (double*)calloc(32*VOLUME, sizeof(double)); if( bias == (double*)NULL ) { fprintf(stderr, "could not allocate memory for bias\n"); # ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); # endif exit(106); } for(ix=0; ix<32*VOLUME; ix++) bias[ix] = 0.; /**************************************** * prepare Fourier transformation arrays ****************************************/ in = (fftw_complex*)malloc(FFTW_LOC_VOLUME*sizeof(fftw_complex)); if(in==(fftw_complex*)NULL) { #ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); #endif exit(107); } /*********************************************** * start loop on source id.s ***********************************************/ for(sid=g_sourceid; sid<=g_sourceid2; sid+=g_sourceid_step) { for(ix=0; ix<16*VOLUME; ix++) disc[ix] = 0.; /* read the new propagator */ #ifdef MPI ratime = MPI_Wtime(); #else ratime = (double)clock() / CLOCKS_PER_SEC; #endif if(format==0) { sprintf(filename, "%s.%.4d.%.2d.inverted", filename_prefix, Nconf, sid); if(read_lime_spinor(g_spinor_field[2], filename, 0) != 0) break; } else if(format==1) { sprintf(filename, "%s.%.4d.%.5d.inverted", filename_prefix, Nconf, sid); if(read_cmi(g_spinor_field[2], filename) != 0) break; } xchange_field(g_spinor_field[2]); #ifdef MPI retime = MPI_Wtime(); #else retime = (double)clock() / CLOCKS_PER_SEC; #endif if(g_cart_id==0) fprintf(stdout, "# time to read prop.: %e seconds\n", retime-ratime); count++; /************************************************ * calculate the source: apply Q_phi_tbc ************************************************/ #ifdef MPI ratime = MPI_Wtime(); #else ratime = (double)clock() / CLOCKS_PER_SEC; #endif Q_phi_tbc(g_spinor_field[0], g_spinor_field[2]); xchange_field(g_spinor_field[0]); #ifdef MPI retime = MPI_Wtime(); #else retime = (double)clock() / CLOCKS_PER_SEC; #endif if(g_cart_id==0) fprintf(stdout, "# time to calculate source: %e seconds\n", retime-ratime); #ifdef MPI ratime = MPI_Wtime(); #else ratime = (double)clock() / CLOCKS_PER_SEC; #endif /************************************************ * HPE: apply BH to order hpe_order+2 ************************************************/ if(hpe_order>0) { BHn(g_spinor_field[1], g_spinor_field[2], hpe_order+2); } else { memcpy((void*)g_spinor_field[1], (void*)g_spinor_field[2], 24*VOLUMEPLUSRAND*sizeof(double)); } /************************************************ * add new contractions to (existing) disc ************************************************/ for(mu=0; mu<4; mu++) { iix = _GWI(mu,0,VOLUME); for(ix=0; ix<VOLUME; ix++) { _cm_eq_cm_ti_co(U_, &g_gauge_field[_GGI(ix, mu)], &co_phase_up[mu]); _fv_eq_cm_ti_fv(spinor1, U_, &g_spinor_field[1][_GSI(g_iup[ix][mu])]); _fv_eq_gamma_ti_fv(spinor2, mu, spinor1); _fv_mi_eq_fv(spinor2, spinor1); _co_eq_fv_dag_ti_fv(&w, &g_spinor_field[0][_GSI(ix)], spinor2); disc[iix ] = -0.5 * w.re; disc[iix+1] = -0.5 * w.im; data[iix ] -= 0.5 * w.re; data[iix+1] -= 0.5 * w.im; _fv_eq_cm_dag_ti_fv(spinor1, U_, &g_spinor_field[1][_GSI(ix)]); _fv_eq_gamma_ti_fv(spinor2, mu, spinor1); _fv_pl_eq_fv(spinor2, spinor1); _co_eq_fv_dag_ti_fv(&w, &g_spinor_field[0][_GSI(g_iup[ix][mu])], spinor2); disc[iix ] -= 0.5 * w.re; disc[iix+1] -= 0.5 * w.im; data[iix ] -= 0.5 * w.re; data[iix+1] -= 0.5 * w.im; iix += 2; } } #ifdef MPI retime = MPI_Wtime(); #else retime = (double)clock() / CLOCKS_PER_SEC; #endif if(g_cart_id==0) fprintf(stdout, "# time to contract cvc: %e seconds\n", retime-ratime); #ifdef MPI ratime = MPI_Wtime(); #else ratime = (double)clock() / CLOCKS_PER_SEC; #endif for(mu=0; mu<4; mu++) { memcpy((void*)in, (void*)(disc+_GWI(mu,0,VOLUME)), 2*VOLUME*sizeof(double)); #ifdef MPI fftwnd_mpi(plan_m, 1, in, NULL, FFTW_NORMAL_ORDER); #else fftwnd_one(plan_m, in, NULL); #endif memcpy((void*)(disc+_GWI(4+mu,0,VOLUME)), (void*)in, 2*VOLUME*sizeof(double)); memcpy((void*)in, (void*)(disc+_GWI(mu,0,VOLUME)), 2*VOLUME*sizeof(double)); #ifdef MPI fftwnd_mpi(plan_p, 1, in, NULL, FFTW_NORMAL_ORDER); #else fftwnd_one(plan_p, in, NULL); #endif memcpy((void*)(disc+_GWI(mu,0,VOLUME)), (void*)in, 2*VOLUME*sizeof(double)); } /* of mu =0 ,..., 3*/ for(mu=0; mu<4; mu++) { for(nu=0; nu<4; nu++) { cp1 = (complex*)(disc+_GWI(mu, 0,VOLUME)); cp2 = (complex*)(disc+_GWI(4+nu, 0,VOLUME)); cp3 = (complex*)(bias+_GWI(4*mu+nu,0,VOLUME)); for(ix=0; ix<VOLUME; ix++) { _co_eq_co_ti_co(&w1, cp1, cp2); cp3->re += w1.re; cp3->im += w1.im; cp1++; cp2++; cp3++; } }} #ifdef MPI retime = MPI_Wtime(); #else retime = (double)clock() / CLOCKS_PER_SEC; #endif if(g_cart_id==0) fprintf(stdout, "# time for Fourier trafo and adding to bias: %e seconds\n", retime-ratime); } /* of loop on sid */ /************************************************ * save results for count == Nsave ************************************************/ if(count==Nsave) { if(g_cart_id == 0) fprintf(stdout, "# save results for count = %d\n", count); for(ix=0; ix<16*VOLUME; ix++) disc[ix] = 0.; if(hpe_order>0) { sprintf(filename, "vp_disc_hpe%.2d_loops_X.%.4d", hpe_order, Nconf); if(g_cart_id==0) fprintf(stdout, "# reading loop part from file %s\n", filename); if( (status = read_lime_contraction(disc, filename, 4, 0)) != 0 ) { #ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); #endif exit(108); } } /* save the result in position space */ fnorm = 1. / ( (double)count * g_prop_normsqr ); if(g_cart_id==0) fprintf(stdout, "# X-fnorm = %e\n", fnorm); for(mu=0; mu<4; mu++) { for(ix=0; ix<VOLUME; ix++) { work[_GWI(mu,ix,VOLUME) ] = data[_GWI(mu,ix,VOLUME) ] * fnorm + disc[_GWI(mu,ix,VOLUME) ]; work[_GWI(mu,ix,VOLUME)+1] = data[_GWI(mu,ix,VOLUME)+1] * fnorm + disc[_GWI(mu,ix,VOLUME)+1]; } } sprintf(filename, "vp_disc_hpe%.2d_subtracted_X.%.4d.%.4d", hpe_order, Nconf, count); sprintf(contype, "cvc-disc-hpe-loops-%2d-to-%2d-stoch-subtracted-X", hpe_order, hpe_order+2); write_lime_contraction(work, filename, 64, 4, contype, Nconf, count); /* sprintf(filename, "vp_disc_hpe%.2d_subtracted_X.%.4d.%.4d.ascii", hpe_order, Nconf, count); write_contraction(work, NULL, filename, 4, 2, 0); */ #ifdef MPI ratime = MPI_Wtime(); #else ratime = (double)clock() / CLOCKS_PER_SEC; #endif for(mu=0; mu<4; mu++) { memcpy((void*)in, (void*)(data+_GWI(mu,0,VOLUME)), 2*VOLUME*sizeof(double)); #ifdef MPI fftwnd_mpi(plan_m, 1, in, NULL, FFTW_NORMAL_ORDER); #else fftwnd_one(plan_m, in, NULL); #endif memcpy((void*)(data+_GWI(4+mu,0,VOLUME)), (void*)in, 2*VOLUME*sizeof(double)); memcpy((void*)in, (void*)(data+_GWI(mu,0,VOLUME)), 2*VOLUME*sizeof(double)); #ifdef MPI fftwnd_mpi(plan_p, 1, in, NULL, FFTW_NORMAL_ORDER); #else fftwnd_one(plan_p, in, NULL); #endif memcpy((void*)(data+_GWI(mu,0,VOLUME)), (void*)in, 2*VOLUME*sizeof(double)); } fnorm = 1. / ( g_prop_normsqr*g_prop_normsqr * (double)count * (double)(count-1) ); if(g_cart_id==0) fprintf(stdout, "# P-fnorm for purely stochastic part = %e\n", fnorm); for(mu=0; mu<4; mu++) { for(nu=0; nu<4; nu++) { cp1 = (complex*)(data+_GWI(mu, 0,VOLUME)); cp2 = (complex*)(data+_GWI(4+nu, 0,VOLUME)); cp3 = (complex*)(work+_GWI(4*mu+nu,0,VOLUME)); cp4 = (complex*)(bias+_GWI(4*mu+nu,0,VOLUME)); for(ix=0; ix<VOLUME; ix++) { _co_eq_co_ti_co(&w1, cp1, cp2); cp3->re = ( w1.re - cp4->re ) * fnorm; cp3->im = ( w1.im - cp4->im ) * fnorm; cp1++; cp2++; cp3++; cp4++; } }} for(mu=0; mu<4; mu++) { memcpy((void*)in, (void*)(disc+_GWI(mu,0,VOLUME)), 2*VOLUME*sizeof(double)); #ifdef MPI fftwnd_mpi(plan_m, 1, in, NULL, FFTW_NORMAL_ORDER); #else fftwnd_one(plan_m, in, NULL); #endif memcpy((void*)(disc+_GWI(4+mu,0,VOLUME)), (void*)in, 2*VOLUME*sizeof(double)); memcpy((void*)in, (void*)(disc+_GWI(mu,0,VOLUME)), 2*VOLUME*sizeof(double)); #ifdef MPI fftwnd_mpi(plan_p, 1, in, NULL, FFTW_NORMAL_ORDER); #else fftwnd_one(plan_p, in, NULL); #endif memcpy((void*)(disc+_GWI(mu,0,VOLUME)), (void*)in, 2*VOLUME*sizeof(double)); } fnorm = 1. / ( g_prop_normsqr * (double)count ); if(g_cart_id==0) fprintf(stdout, "# P-fnorm for mixed stochastic-loop part = %e\n", fnorm); for(mu=0; mu<4; mu++) { for(nu=0; nu<4; nu++) { cp1 = (complex*)(data + _GWI(mu, 0,VOLUME)); cp2 = (complex*)(disc + _GWI(4+nu, 0,VOLUME)); cp3 = (complex*)(work + _GWI(4*mu+nu,0,VOLUME)); for(ix=0; ix<VOLUME; ix++) { _co_eq_co_ti_co(&w1, cp1, cp2); cp3->re += w1.re * fnorm; cp3->im += w1.im * fnorm; cp1++; cp2++; cp3++; } cp1 = (complex*)(disc + _GWI(mu, 0,VOLUME)); cp2 = (complex*)(data + _GWI(4+nu, 0,VOLUME)); cp3 = (complex*)(work + _GWI(4*mu+nu,0,VOLUME)); for(ix=0; ix<VOLUME; ix++) { _co_eq_co_ti_co(&w1, cp1, cp2); cp3->re += w1.re * fnorm; cp3->im += w1.im * fnorm; cp1++; cp2++; cp3++; } }} fnorm = 1. / ( (double)T_global * (double)(LX*LY*LZ) ); if(g_cart_id==0) fprintf(stdout, "# P-fnorm for final estimator (1/T/V) = %e\n", fnorm); for(mu=0; mu<4; mu++) { for(nu=0; nu<4; nu++) { cp1 = (complex*)(disc + _GWI(mu, 0,VOLUME)); cp2 = (complex*)(disc + _GWI(4+nu, 0,VOLUME)); cp3 = (complex*)(work + _GWI(4*mu+nu,0,VOLUME)); for(x0=0; x0<T; x0++) { q[0] = (double)(x0+Tstart) / (double)T_global; for(x1=0; x1<LX; x1++) { q[1] = (double)x1 / (double)LX; for(x2=0; x2<LY; x2++) { q[2] = (double)x2 / (double)LY; for(x3=0; x3<LZ; x3++) { q[3] = (double)x3 / (double)LZ; ix = g_ipt[x0][x1][x2][x3]; w.re = cos(M_PI * ( q[mu] - q[nu] ) ); w.im = sin(M_PI * ( q[mu] - q[nu] ) ); _co_eq_co_ti_co(&w1, cp1, cp2); cp3->re += w1.re; cp3->im += w1.im; _co_eq_co_ti_co(&w1, cp3, &w); cp3->re = w1.re * fnorm; cp3->im = w1.im * fnorm; cp1++; cp2++; cp3++; }}}} }} sprintf(filename, "vp_disc_hpe%.2d_subtracted_P.%.4d.%.4d", hpe_order, Nconf, count); sprintf(contype, "cvc-disc-hpe-loops-%2d-to-%2d-stoch-subtracted-P", hpe_order, hpe_order+2); write_lime_contraction(work, filename, 64, 16, contype, Nconf, count); /* sprintf(filename, "vp_disc_hpe%.2d_subtracted_P.%.4d.%.4d.ascii", hpe_order, Nconf, count); write_contraction(work, NULL, filename, 16, 2, 0); */ #ifdef MPI retime = MPI_Wtime(); #else retime = (double)clock() / CLOCKS_PER_SEC; #endif if(g_cart_id==0) fprintf(stdout, "# time to save cvc results: %e seconds\n", retime-ratime); } /* of if count == Nsave */ /*********************************************** * free the allocated memory, finalize ***********************************************/ free(g_gauge_field); for(i=0; i<no_fields; i++) free(g_spinor_field[i]); free(g_spinor_field); free_geometry(); fftw_free(in); free(disc); free(bias); free(data); free(work); #ifdef MPI fftwnd_mpi_destroy_plan(plan_p); fftwnd_mpi_destroy_plan(plan_m); MPI_Finalize(); #else fftwnd_destroy_plan(plan_p); fftwnd_destroy_plan(plan_m); #endif return(0); }
int main(int argc, char **argv) { int c, i, mu; int count = 0; int filename_set = 0; int l_LX_at, l_LXstart_at; int x0, x1, ix, idx; int VOL3; int sid; double *disc = (double*)NULL; int verbose = 0; char filename[100]; double ratime, retime; double plaq; double spinor1[24], spinor2[24]; double _2kappamu; double *gauge_field_f=NULL, *gauge_field_timeslice=NULL; double v4norm = 0., vvnorm = 0.; complex w; FILE *ofs1, *ofs2; /* double sign_adj5[] = {-1., -1., -1., -1., +1., +1., +1., +1., +1., +1., -1., -1., -1., 1., -1., -1.}; */ double hopexp_coeff[8], addreal, addimag; int gindex[] = { 5 , 1 , 2 , 3 , 6 ,10 ,11 ,12 , 4 , 7 , 8 , 9 , 0 ,15 , 14 ,13 }; int isimag[] = { 0 , 0 , 0 , 0 , 1 , 1 , 1 , 1 , 0 , 1 , 1 , 1 , 0 , 1 , 1 , 1 }; double gsign[] = {-1., 1., 1., 1., -1., 1., 1., 1., 1., 1., 1., 1., 1., 1., -1., 1.}; #ifdef MPI MPI_Status status; #endif #ifdef MPI MPI_Init(&argc, &argv); #endif while ((c = getopt(argc, argv, "h?vgf:")) != -1) { switch (c) { case 'v': verbose = 1; break; case 'f': strcpy(filename, optarg); filename_set=1; break; case 'h': case '?': default: usage(); break; } } /* set the default values */ if(filename_set==0) strcpy(filename, "cvc.input"); fprintf(stdout, "# reading input from file %s\n", filename); read_input_parser(filename); /* some checks on the input data */ if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) { if(g_proc_id==0) fprintf(stdout, "T and L's must be set\n"); usage(); } if(g_kappa == 0.) { if(g_proc_id==0) fprintf(stdout, "kappa should be > 0.n"); usage(); } /* initialize MPI parameters */ mpi_init(argc, argv); #ifdef MPI T = T_global / g_nproc; Tstart = g_cart_id * T; l_LX_at = LX; l_LXstart_at = 0; FFTW_LOC_VOLUME = T*LX*LY*LZ; VOL3 = LX*LY*LZ; #else T = T_global; Tstart = 0; l_LX_at = LX; l_LXstart_at = 0; FFTW_LOC_VOLUME = T*LX*LY*LZ; VOL3 = LX*LY*LZ; #endif fprintf(stdout, "# [%2d] parameters:\n"\ "# [%2d] T = %3d\n"\ "# [%2d] Tstart = %3d\n"\ "# [%2d] l_LX_at = %3d\n"\ "# [%2d] l_LXstart_at = %3d\n"\ "# [%2d] FFTW_LOC_VOLUME = %3d\n", g_cart_id, g_cart_id, T, g_cart_id, Tstart, g_cart_id, l_LX_at, g_cart_id, l_LXstart_at, g_cart_id, FFTW_LOC_VOLUME); if(init_geometry() != 0) { fprintf(stderr, "ERROR from init_geometry\n"); #ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); #endif exit(1); } geometry(); /* read the gauge field */ alloc_gauge_field(&g_gauge_field, VOLUMEPLUSRAND); sprintf(filename, "%s.%.4d", gaugefilename_prefix, Nconf); if(g_cart_id==0) fprintf(stdout, "reading gauge field from file %s\n", filename); read_lime_gauge_field_doubleprec(filename); xchange_gauge(); /* measure the plaquette */ plaquette(&plaq); if(g_cart_id==0) fprintf(stdout, "# measured plaquette value: %25.16e\n", plaq); if(Nlong > -1) { /* N_ape = 5; */ alpha_ape = 0.4; if(g_cart_id==0) fprintf(stdout, "# apply fuzzing of gauge field and propagators with parameters:\n"\ "# Nlong = %d\n# N_ape = %d\n# alpha_ape = %f\n", Nlong, N_ape, alpha_ape); alloc_gauge_field(&gauge_field_f, VOLUMEPLUSRAND); if( (gauge_field_timeslice = (double*)malloc(72*VOL3*sizeof(double))) == (double*)NULL ) { fprintf(stderr, "Error, could not allocate mem for gauge_field_timeslice\n"); #ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); #endif exit(2); } for(x0=0; x0<T; x0++) { memcpy((void*)gauge_field_timeslice, (void*)(g_gauge_field+_GGI(g_ipt[x0][0][0][0],0)), 72*VOL3*sizeof(double)); for(i=0; i<N_ape; i++) { APE_Smearing_Step_Timeslice(gauge_field_timeslice, alpha_ape); } fuzzed_links_Timeslice(gauge_field_f, gauge_field_timeslice, Nlong, x0); } free(gauge_field_timeslice); } /* test: print the fuzzed APE smeared gauge field to stdout */ /* for(ix=0; ix<36*VOLUME; ix++) { fprintf(stdout, "%6d%25.16e%25.16e%25.16e%25.16e\n", ix, gauge_field_f[2*ix], gauge_field_f[2*ix+1], g_gauge_field[2*ix], g_gauge_field[2*ix+1]); } */ /* allocate memory for the spinor fields */ no_fields = 4; g_spinor_field = (double**)calloc(no_fields, sizeof(double*)); for(i=0; i<no_fields; i++) alloc_spinor_field(&g_spinor_field[i], VOLUMEPLUSRAND); /* allocate memory for the contractions */ disc = (double*)calloc(4*16*T*2, sizeof(double)); if( disc==(double*)NULL ) { fprintf(stderr, "could not allocate memory for disc\n"); #ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); #endif exit(3); } for(ix=0; ix<4*32*T; ix++) disc[ix] = 0.; if(g_cart_id==0) { sprintf(filename, "cvc_2pt_disc_vv.%.4d", Nconf); ofs1 = fopen(filename, "w"); sprintf(filename, "cvc_2pt_disc_v4.%.4d", Nconf); ofs2 = fopen(filename, "w"); if(ofs1==(FILE*)NULL || ofs2==(FILE*)NULL) { #ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); #endif exit(5); } } /* add the HPE coefficients */ if(format==1) { addimag = 2*g_kappa*g_mu/sqrt(1 + 4*g_kappa*g_kappa*g_mu*g_mu)* LX*LY*LZ*3*4*2.*g_kappa*g_kappa*4; addreal = 1./sqrt(1 + 4*g_kappa*g_kappa*g_mu*g_mu)*LX*LY*LZ*3*4*2.*g_kappa*g_kappa*4; v4norm = 1. / ( 8. * g_kappa * g_kappa ); vvnorm = g_mu / ( 4. * g_kappa ); } else { addimag = 2*g_kappa*g_mu/sqrt(1 + 4*g_kappa*g_kappa*g_mu*g_mu)* LX*LY*LZ*3*4*2.*g_kappa*2; addreal = 1./sqrt(1 + 4*g_kappa*g_kappa*g_mu*g_mu)*LX*LY*LZ*3*4*2.*g_kappa*2; v4norm = 1. / ( 4. * g_kappa ); vvnorm = g_mu / ( 4. * g_kappa ); } /* calculate additional contributions for 1 and gamma_5 */ _2kappamu = 2.*g_kappa*g_mu; hopexp_coeff[0] = 24. * g_kappa * LX*LY*LZ / (1. + _2kappamu*_2kappamu); hopexp_coeff[1] = 0.; hopexp_coeff[2] = -768. * g_kappa*g_kappa*g_kappa * LX*LY*LZ * _2kappamu*_2kappamu / ( (1.+_2kappamu*_2kappamu)*(1.+_2kappamu*_2kappamu)*(1.+_2kappamu*_2kappamu) ); hopexp_coeff[3] = 0.; hopexp_coeff[4] = 0.; hopexp_coeff[5] = -24.*g_kappa * LX*LY*LZ * _2kappamu / (1. + _2kappamu*_2kappamu); hopexp_coeff[6] = 0.; hopexp_coeff[7] = -384. * g_kappa*g_kappa*g_kappa * LX*LY*LZ * (1.-_2kappamu*_2kappamu)*_2kappamu / ( (1.+_2kappamu*_2kappamu)*(1.+_2kappamu*_2kappamu)*(1.+_2kappamu*_2kappamu) ); /* start loop on source id.s */ for(sid=g_sourceid; sid<=g_sourceid2; sid+=g_sourceid_step) { for(ix=0; ix<4*32*T; ix++) disc[ix] = 0.; /* read the new propagator */ sprintf(filename, "%s.%.4d.%.5d.inverted", filename_prefix, Nconf, sid); /* sprintf(filename, "%s.%.4d.%.2d.inverted", filename_prefix, Nconf, sid); */ if(read_lime_spinor(g_spinor_field[1], filename, 0) != 0) { fprintf(stderr, "[%2d] Error, could not read from file %s\n", g_cart_id, filename); #ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); #endif exit(4); } count++; xchange_field(g_spinor_field[1]); /* calculate the source: apply Q_phi_tbc */ #ifdef MPI ratime = MPI_Wtime(); #else ratime = (double)clock() / CLOCKS_PER_SEC; #endif Q_phi_tbc(g_spinor_field[0], g_spinor_field[1]); xchange_field(g_spinor_field[0]); #ifdef MPI retime = MPI_Wtime(); #else retime = (double)clock() / CLOCKS_PER_SEC; #endif if(g_cart_id==0) fprintf(stdout, "# time to apply Q_tm %e seconds\n", retime-ratime); /* apply gamma5_BdagH4_gamma5 */ gamma5_BdagH4_gamma5(g_spinor_field[2], g_spinor_field[0], g_spinor_field[3]); /* attention: additional factor 2kappa because of CMI format */ /* if(format==1) { for(ix=0; ix<VOLUME; ix++) { _fv_ti_eq_re(&g_spinor_field[2][_GSI(ix)], 2.*g_kappa); } } */ if(Nlong>-1) { if(g_cart_id==0) fprintf(stdout, "# fuzzing propagator with Nlong = %d\n", Nlong); memcpy((void*)g_spinor_field[3], (void*)g_spinor_field[1], 24*VOLUMEPLUSRAND*sizeof(double)); Fuzz_prop(gauge_field_f, g_spinor_field[3], Nlong); } /* add new contractions to disc */ #ifdef MPI ratime = MPI_Wtime(); #else ratime = (double)clock() / CLOCKS_PER_SEC; #endif for(x0=0; x0<T; x0++) { /* loop on time */ for(x1=0; x1<VOL3; x1++) { /* loop on sites in timeslice */ ix = x0*VOL3 + x1; for(mu=0; mu<16; mu++) { /* loop on index of gamma matrix */ _fv_eq_gamma_ti_fv(spinor1, mu, &g_spinor_field[1][_GSI(ix)]); _co_eq_fv_dag_ti_fv(&w, &g_spinor_field[2][_GSI(ix)], spinor1); disc[2*( x0*16+mu) ] += w.re; disc[2*( x0*16+mu)+1] += w.im; _fv_eq_gamma_ti_fv(spinor1, 5, &g_spinor_field[1][_GSI(ix)]); _fv_eq_gamma_ti_fv(spinor2, mu, spinor1); _co_eq_fv_dag_ti_fv(&w, &g_spinor_field[1][_GSI(ix)], spinor2); disc[2*(16*T + x0*16+mu) ] += w.re; disc[2*(16*T + x0*16+mu)+1] += w.im; if(Nlong>-1) { _fv_eq_gamma_ti_fv(spinor1, mu, &g_spinor_field[3][_GSI(ix)]); _co_eq_fv_dag_ti_fv(&w, &g_spinor_field[2][_GSI(ix)], spinor1); disc[2*(32*T + x0*16+mu) ] += w.re; disc[2*(32*T + x0*16+mu)+1] += w.im; _fv_eq_gamma_ti_fv(spinor1, 5, &g_spinor_field[3][_GSI(ix)]); _fv_eq_gamma_ti_fv(spinor2, mu, spinor1); _co_eq_fv_dag_ti_fv(&w, &g_spinor_field[1][_GSI(ix)], spinor2); disc[2*(48*T + x0*16+mu) ] += w.re; disc[2*(48*T + x0*16+mu)+1] += w.im; } } } } if(g_cart_id==0) fprintf(stdout, "# addimag = %25.16e\n", addimag); if(g_cart_id==0) fprintf(stdout, "# addreal = %25.16e\n", addreal); for(x0=0; x0<T; x0++) { disc[2*( x0*16+4) ] += addreal; disc[2*( x0*16+5)+1] -= addimag; /* if(Nlong>-1) { disc[2*(32*T + x0*16+4) ] += addreal; disc[2*(32*T + x0*16+5)+1] -= addimag; } */ } #ifdef MPI retime = MPI_Wtime(); #else retime = (double)clock() / CLOCKS_PER_SEC; #endif if(g_cart_id==0) fprintf(stdout, "# contractions in %e seconds\n", retime-ratime); /* write current disc to file */ if(g_cart_id==0) { if(sid==g_sourceid) fprintf(ofs1, "#%6d%3d%3d%3d%3d\t%f\t%f\n", Nconf, T, LX, LY, LZ, g_kappa, g_mu); if(sid==g_sourceid) fprintf(ofs2, "#%6d%3d%3d%3d%3d\t%f\t%f\n", Nconf, T, LX, LY, LZ, g_kappa, g_mu); for(x0=0; x0<T; x0++) { for(mu=0; mu<16; mu++) { idx = gindex[mu]; ix = 16*x0 + idx; if(isimag[mu]==0) { fprintf(ofs2, "%6d%3d%4d%4d%25.16e%25.16e%25.16e%25.16e\n", Nconf, mu, x0, sid, gsign[mu]*disc[2* ix ]*v4norm, gsign[mu]*disc[2* ix +1]*v4norm, gsign[mu]*disc[2*(32*T+ix)]*v4norm, gsign[mu]*disc[2*(32*T+ix)+1]*v4norm); } else { fprintf(ofs2, "%6d%3d%4d%4d%25.16e%25.16e%25.16e%25.16e\n", Nconf, mu, x0, sid, gsign[mu]*disc[2*( ix)+1]*v4norm, -gsign[mu]*disc[2* ix ]*v4norm, gsign[mu]*disc[2*(32*T+ix)+1]*v4norm, -gsign[mu]*disc[2*(32*T+ix)]*v4norm); } } } for(x0=0; x0<T; x0++) { for(mu=0; mu<16; mu++) { idx = gindex[mu]; ix = 16*x0 + idx; if(isimag[mu]==0) { fprintf(ofs1, "%6d%3d%4d%4d%25.16e%25.16e%25.16e%25.16e\n", Nconf, mu, x0, sid, gsign[mu]*disc[2*(16*T+ix)+1]*vvnorm, -gsign[mu]*disc[2*(16*T+ix)]*vvnorm, gsign[mu]*disc[2*(48*T+ix)+1]*vvnorm, -gsign[mu]*disc[2*(48*T+ix)]*vvnorm); } else { fprintf(ofs1, "%6d%3d%4d%4d%25.16e%25.16e%25.16e%25.16e\n", Nconf, mu, x0, sid, -gsign[mu]*disc[2*(16*T+ix)]*vvnorm, -gsign[mu]*disc[2*(16*T+ix)+1]*vvnorm, -gsign[mu]*disc[2*(48*T+ix)]*vvnorm, -gsign[mu]*disc[2*(48*T+ix)+1]*vvnorm); } } } #ifdef MPI for(c=1; c<g_nproc; c++) { MPI_Recv(disc, 128*T, MPI_DOUBLE, c, 100+c, g_cart_grid, &status); for(x0=0; x0<T; x0++) { for(mu=0; mu<16; mu++) { idx=gindex[mu]; ix = 16*x0 + idx; if(isimag[mu]==0) { fprintf(ofs2, "%6d%3d%4d%4d%25.16e%25.16e%25.16e%25.16e\n", Nconf, mu, c*T+x0, sid, gsign[mu]*disc[2* ix ]*v4norm, gsign[mu]*disc[2* ix +1]*v4norm, gsign[mu]*disc[2*(32*T+ix)]*v4norm, gsign[mu]*disc[2*(32*T+ix)+1]*v4norm); } else { fprintf(ofs2, "%6d%3d%4d%4d%25.16e%25.16e%25.16e%25.16e\n", Nconf, mu, c*T+x0, sid, gsign[mu]*disc[2*( ix)+1]*v4norm, -gsign[mu]*disc[2* ix ]*v4norm, gsign[mu]*disc[2*(32*T+ix)+1]*v4norm, -gsign[mu]*disc[2*(32*T+ix)]*v4norm); } } } for(x0=0; x0<T; x0++) { for(mu=0; mu<16; mu++) { idx = gindex[mu]; ix = 16*x0 + idx; if(isimag[mu]==0) { fprintf(ofs1, "%6d%3d%4d%4d%25.16e%25.16e%25.16e%25.16e\n", Nconf, mu, c*T+x0, sid, gsign[mu]*disc[2*(16*T+ix)+1]*vvnorm, -gsign[mu]*disc[2*(16*T+ix)]*vvnorm, gsign[mu]*disc[2*(48*T+ix)+1]*vvnorm, -gsign[mu]*disc[2*(48*T+ix)]*vvnorm); } else { fprintf(ofs1, "%6d%3d%4d%4d%25.16e%25.16e%25.16e%25.16e\n", Nconf, mu, c*T+x0, sid, -gsign[mu]*disc[2*(16*T+ix)]*vvnorm, -gsign[mu]*disc[2*(16*T+ix)+1]*vvnorm, -gsign[mu]*disc[2*(48*T+ix)]*vvnorm, -gsign[mu]*disc[2*(48*T+ix)+1]*vvnorm); } } } } #endif } #ifdef MPI else { for(c=1; c<g_nproc; c++) { if(g_cart_id==c) { MPI_Send(disc, 128*T, MPI_DOUBLE, 0, 100+c, g_cart_grid); } } } #endif } /* of loop on sid */ if(g_cart_id==0) { fclose(ofs1); fclose(ofs2); } if(g_cart_id==0) { fprintf(stdout, "# contributions from HPE:\n"); fprintf(stdout, "(1) X = id\t%25.16e%25.16e\n"\ " \t%25.16e%25.16e\n"\ "(2) X = 5\t%25.16e%25.16e\n"\ " \t%25.16e%25.16e\n", hopexp_coeff[0], hopexp_coeff[1], hopexp_coeff[2], hopexp_coeff[3], hopexp_coeff[4], hopexp_coeff[5], hopexp_coeff[6], hopexp_coeff[7]); } /* free the allocated memory, finalize */ free(g_gauge_field); g_gauge_field=(double*)NULL; for(i=0; i<no_fields; i++) free(g_spinor_field[i]); free(g_spinor_field); g_spinor_field=(double**)NULL; free_geometry(); free(disc); if(Nlong>-1) free(gauge_field_f); #ifdef MPI MPI_Finalize(); #endif return(0); }
int main(int argc, char **argv) { int c, i, mu, nu; int count = 0; int filename_set = 0; int dims[4] = {0,0,0,0}; int l_LX_at, l_LXstart_at; int x0, x1, x2, x3, ix, iix; int sid; double *disc = (double*)NULL; double *work = (double*)NULL; double q[4], fnorm; int verbose = 0; int do_gt = 0; char filename[100], contype[200]; double ratime, retime; double plaq; double spinor1[24], spinor2[24], U_[18]; complex w, w1, *cp1, *cp2, *cp3; FILE *ofs; fftw_complex *in=(fftw_complex*)NULL; #ifdef MPI fftwnd_mpi_plan plan_p, plan_m; int *status; #else fftwnd_plan plan_p, plan_m; #endif #ifdef MPI MPI_Init(&argc, &argv); #endif while ((c = getopt(argc, argv, "h?vgf:")) != -1) { switch (c) { case 'v': verbose = 1; break; case 'g': do_gt = 1; break; case 'f': strcpy(filename, optarg); filename_set=1; break; case 'h': case '?': default: usage(); break; } } /* set the default values */ if(filename_set==0) strcpy(filename, "cvc.input"); fprintf(stdout, "# Reading input from file %s\n", filename); read_input_parser(filename); /* some checks on the input data */ if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) { if(g_proc_id==0) fprintf(stdout, "T and L's must be set\n"); usage(); } if(g_kappa == 0.) { if(g_proc_id==0) fprintf(stdout, "kappa should be > 0.n"); usage(); } /* initialize MPI parameters */ mpi_init(argc, argv); #ifdef MPI if((status = (int*)calloc(g_nproc, sizeof(int))) == (int*)NULL) { MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); exit(7); } #endif /* initialize fftw */ dims[0]=T_global; dims[1]=LX; dims[2]=LY; dims[3]=LZ; #ifdef MPI plan_p = fftwnd_mpi_create_plan(g_cart_grid, 4, dims, FFTW_BACKWARD, FFTW_MEASURE); plan_m = fftwnd_mpi_create_plan(g_cart_grid, 4, dims, FFTW_FORWARD, FFTW_MEASURE); fftwnd_mpi_local_sizes(plan_p, &T, &Tstart, &l_LX_at, &l_LXstart_at, &FFTW_LOC_VOLUME); #else plan_p = fftwnd_create_plan(4, dims, FFTW_BACKWARD, FFTW_MEASURE | FFTW_IN_PLACE); plan_m = fftwnd_create_plan(4, dims, FFTW_FORWARD, FFTW_MEASURE | FFTW_IN_PLACE); T = T_global; Tstart = 0; l_LX_at = LX; l_LXstart_at = 0; FFTW_LOC_VOLUME = T*LX*LY*LZ; #endif fprintf(stdout, "# [%2d] fftw parameters:\n"\ "# [%2d] T = %3d\n"\ "# [%2d] Tstart = %3d\n"\ "# [%2d] l_LX_at = %3d\n"\ "# [%2d] l_LXstart_at = %3d\n"\ "# [%2d] FFTW_LOC_VOLUME = %3d\n", g_cart_id, g_cart_id, T, g_cart_id, Tstart, g_cart_id, l_LX_at, g_cart_id, l_LXstart_at, g_cart_id, FFTW_LOC_VOLUME); #ifdef MPI if(T==0) { fprintf(stderr, "[%2d] local T is zero; exit\n", g_cart_id); MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); exit(2); } #endif if(init_geometry() != 0) { fprintf(stderr, "ERROR from init_geometry\n"); #ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); #endif exit(1); } geometry(); /* read the gauge field */ alloc_gauge_field(&g_gauge_field, VOLUMEPLUSRAND); sprintf(filename, "%s.%.4d", gaugefilename_prefix, Nconf); if(g_cart_id==0) fprintf(stdout, "reading gauge field from file %s\n", filename); read_lime_gauge_field_doubleprec(filename); #ifdef MPI xchange_gauge(); #endif /* measure the plaquette */ plaquette(&plaq); if(g_cart_id==0) fprintf(stdout, "measured plaquette value: %25.16e\n", plaq); /* allocate memory for the spinor fields */ no_fields = 3; g_spinor_field = (double**)calloc(no_fields, sizeof(double*)); for(i=0; i<no_fields; i++) alloc_spinor_field(&g_spinor_field[i], VOLUMEPLUSRAND); /* allocate memory for the contractions */ disc = (double*)calloc( 8*VOLUME, sizeof(double)); if( disc == (double*)NULL ) { fprintf(stderr, "could not allocate memory for disc\n"); # ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); # endif exit(3); } for(ix=0; ix<8*VOLUME; ix++) disc[ix] = 0.; work = (double*)calloc(48*VOLUME, sizeof(double)); if( work == (double*)NULL ) { fprintf(stderr, "could not allocate memory for work\n"); # ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); # endif exit(3); } /* prepare Fourier transformation arrays */ in = (fftw_complex*)malloc(FFTW_LOC_VOLUME*sizeof(fftw_complex)); if(in==(fftw_complex*)NULL) { #ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); #endif exit(4); } /*********************************************** * start loop on source id.s ***********************************************/ for(sid=g_sourceid; sid<=g_sourceid2; sid++) { /* read the new propagator */ #ifdef MPI ratime = MPI_Wtime(); #else ratime = (double)clock() / CLOCKS_PER_SEC; #endif if(format==0) { sprintf(filename, "%s.%.4d.%.2d.inverted", filename_prefix, Nconf, sid); if(read_lime_spinor(g_spinor_field[2], filename, 0) != 0) break; } else if(format==1) { sprintf(filename, "%s.%.4d.%.5d.inverted", filename_prefix, Nconf, sid); if(read_cmi(g_spinor_field[2], filename) != 0) break; } xchange_field(g_spinor_field[2]); #ifdef MPI retime = MPI_Wtime(); #else retime = (double)clock() / CLOCKS_PER_SEC; #endif fprintf(stdout, "time to read prop.: %e seconds\n", retime-ratime); count++; /************************************************ * calculate the source: apply Q_phi_tbc ************************************************/ #ifdef MPI ratime = MPI_Wtime(); #else ratime = (double)clock() / CLOCKS_PER_SEC; #endif Q_phi_tbc(g_spinor_field[0], g_spinor_field[2]); xchange_field(g_spinor_field[0]); #ifdef MPI retime = MPI_Wtime(); #else retime = (double)clock() / CLOCKS_PER_SEC; #endif if(g_cart_id==0) fprintf(stdout, "time to calculate source: %e seconds\n", retime-ratime); /************************************************ * HPE: apply BH3 ************************************************/ BH3(g_spinor_field[1], g_spinor_field[2]); /* add new contractions to (existing) disc */ # ifdef MPI ratime = MPI_Wtime(); # else ratime = (double)clock() / CLOCKS_PER_SEC; # endif for(mu=0; mu<4; mu++) { /* loop on Lorentz index of the current */ iix = _GWI(mu,0,VOLUME); for(ix=0; ix<VOLUME; ix++) { /* loop on lattice sites */ _cm_eq_cm_ti_co(U_, &g_gauge_field[_GGI(ix, mu)], &co_phase_up[mu]); /* first contribution */ _fv_eq_cm_ti_fv(spinor1, U_, &g_spinor_field[1][_GSI(g_iup[ix][mu])]); _fv_eq_gamma_ti_fv(spinor2, mu, spinor1); _fv_mi_eq_fv(spinor2, spinor1); _co_eq_fv_dag_ti_fv(&w, &g_spinor_field[0][_GSI(ix)], spinor2); disc[iix ] -= 0.5 * w.re; disc[iix+1] -= 0.5 * w.im; /* second contribution */ _fv_eq_cm_dag_ti_fv(spinor1, U_, &g_spinor_field[1][_GSI(ix)]); _fv_eq_gamma_ti_fv(spinor2, mu, spinor1); _fv_pl_eq_fv(spinor2, spinor1); _co_eq_fv_dag_ti_fv(&w, &g_spinor_field[0][_GSI(g_iup[ix][mu])], spinor2); disc[iix ] -= 0.5 * w.re; disc[iix+1] -= 0.5 * w.im; iix += 2; } /* of ix */ } /* of mu */ #ifdef MPI retime = MPI_Wtime(); #else retime = (double)clock() / CLOCKS_PER_SEC; #endif if(g_cart_id==0) fprintf(stdout, "[%2d] time to contract cvc: %e seconds\n", g_cart_id, retime-ratime); /************************************************ * save results for count = multiple of Nsave ************************************************/ if(count%Nsave == 0) { if(g_cart_id == 0) fprintf(stdout, "save results for count = %d\n", count); /* save the result in position space */ fnorm = 1. / ( (double)count * g_prop_normsqr ); if(g_cart_id==0) fprintf(stdout, "# X-fnorm = %e\n", fnorm); for(mu=0; mu<4; mu++) { for(ix=0; ix<VOLUME; ix++) { work[_GWI(mu,ix,VOLUME) ] = disc[_GWI(mu,ix,VOLUME) ] * fnorm; work[_GWI(mu,ix,VOLUME)+1] = disc[_GWI(mu,ix,VOLUME)+1] * fnorm; } } sprintf(filename, "cvc_hpe_X.%.4d.%.4d", Nconf, count); sprintf(contype, "cvc_disc_hpe_loops_3_to_3_stoch_X"); write_lime_contraction(work, filename, 64, 4, contype, Nconf, count); #ifdef MPI ratime = MPI_Wtime(); #else ratime = (double)clock() / CLOCKS_PER_SEC; #endif /* Fourier transform data, copy to work */ for(mu=0; mu<4; mu++) { memcpy((void*)in, (void*)(work+_GWI(mu,0,VOLUME)), 2*VOLUME*sizeof(double)); #ifdef MPI fftwnd_mpi(plan_m, 1, in, NULL, FFTW_NORMAL_ORDER); #else fftwnd_one(plan_m, in, NULL); #endif memcpy((void*)(work+_GWI(4+mu,0,VOLUME)), (void*)in, 2*VOLUME*sizeof(double)); memcpy((void*)in, (void*)(work+_GWI(mu,0,VOLUME)), 2*VOLUME*sizeof(double)); #ifdef MPI fftwnd_mpi(plan_p, 1, in, NULL, FFTW_NORMAL_ORDER); #else fftwnd_one(plan_p, in, NULL); #endif memcpy((void*)(work+_GWI(mu,0,VOLUME)), (void*)in, 2*VOLUME*sizeof(double)); } /* of mu =0 ,..., 3*/ fnorm = 1. / (double)(T_global*LX*LY*LZ); if(g_cart_id==0) fprintf(stdout, "# P-fnorm = %e\n", fnorm); for(mu=0; mu<4; mu++) { for(nu=0; nu<4; nu++) { cp1 = (complex*)(work+_GWI(mu,0,VOLUME)); cp2 = (complex*)(work+_GWI(4+nu,0,VOLUME)); cp3 = (complex*)(work+_GWI(8+4*mu+nu,0,VOLUME)); for(x0=0; x0<T; x0++) { q[0] = (double)(x0+Tstart) / (double)T_global; for(x1=0; x1<LX; x1++) { q[1] = (double)(x1) / (double)LX; for(x2=0; x2<LY; x2++) { q[2] = (double)(x2) / (double)LY; for(x3=0; x3<LZ; x3++) { q[3] = (double)(x3) / (double)LZ; ix = g_ipt[x0][x1][x2][x3]; w.re = cos( M_PI * (q[mu]-q[nu]) ); w.im = sin( M_PI * (q[mu]-q[nu]) ); _co_eq_co_ti_co(&w1, cp1, cp2); _co_eq_co_ti_co(cp3, &w1, &w); _co_ti_eq_re(cp3, fnorm); cp1++; cp2++; cp3++; }}}} }} /* save the result in momentum space */ sprintf(filename, "cvc_hpe_P.%.4d.%.4d", Nconf, count); sprintf(contype, "cvc_disc_hpe_loops_3_to_3_stoch_P"); write_lime_contraction(work+_GWI(8,0,VOLUME), filename, 64, 16, contype, Nconf, count); #ifdef MPI retime = MPI_Wtime(); #else retime = (double)clock() / CLOCKS_PER_SEC; #endif if(g_cart_id==0) fprintf(stdout, "time to save cvc results: %e seconds\n", retime-ratime); } /* of count % Nsave == 0 */ } /* of loop on sid */ /*********************************************** * free the allocated memory, finalize ***********************************************/ free(g_gauge_field); for(i=0; i<no_fields; i++) free(g_spinor_field[i]); free(g_spinor_field); free_geometry(); fftw_free(in); free(disc); free(work); #ifdef MPI fftwnd_mpi_destroy_plan(plan_p); fftwnd_mpi_destroy_plan(plan_m); free(status); MPI_Finalize(); #else fftwnd_destroy_plan(plan_p); fftwnd_destroy_plan(plan_m); #endif return(0); }
int main(int argc, char *argv[]) { FILE *parameterfile = NULL; int c, j, i, ix = 0, isample = 0, op_id = 0; char * filename = NULL; char datafilename[50]; char parameterfilename[50]; char conf_filename[50]; char * input_filename = NULL; double plaquette_energy; struct stout_parameters params_smear; spinor **s, *s_; #ifdef _KOJAK_INST #pragma pomp inst init #pragma pomp inst begin(main) #endif #if (defined SSE || defined SSE2 || SSE3) signal(SIGILL, &catch_ill_inst); #endif DUM_DERI = 8; DUM_MATRIX = DUM_DERI + 5; NO_OF_SPINORFIELDS = DUM_MATRIX + 2; verbose = 0; g_use_clover_flag = 0; #ifdef MPI MPI_Init(&argc, &argv); #endif while ((c = getopt(argc, argv, "h?vVf:o:")) != -1) { switch (c) { case 'f': input_filename = calloc(200, sizeof(char)); strcpy(input_filename, optarg); break; case 'o': filename = calloc(200, sizeof(char)); strcpy(filename, optarg); break; case 'v': verbose = 1; break; case 'V': fprintf(stdout,"%s %s\n",PACKAGE_STRING,git_hash); exit(0); break; case 'h': case '?': default: usage(); break; } } if (input_filename == NULL) { input_filename = "invert.input"; } if (filename == NULL) { filename = "output"; } /* Read the input file */ if( (j = read_input(input_filename)) != 0) { fprintf(stderr, "Could not find input file: %s\nAborting...\n", input_filename); exit(-1); } /* this DBW2 stuff is not needed for the inversion ! */ if (g_dflgcr_flag == 1) { even_odd_flag = 0; } g_rgi_C1 = 0; if (Nsave == 0) { Nsave = 1; } if (g_running_phmc) { NO_OF_SPINORFIELDS = DUM_MATRIX + 8; } tmlqcd_mpi_init(argc, argv); g_dbw2rand = 0; /* starts the single and double precision random number */ /* generator */ start_ranlux(rlxd_level, random_seed); /* we need to make sure that we don't have even_odd_flag = 1 */ /* if any of the operators doesn't use it */ /* in this way even/odd can still be used by other operators */ for(j = 0; j < no_operators; j++) if(!operator_list[j].even_odd_flag) even_odd_flag = 0; #ifndef MPI g_dbw2rand = 0; #endif #ifdef _GAUGE_COPY j = init_gauge_field(VOLUMEPLUSRAND, 1); #else j = init_gauge_field(VOLUMEPLUSRAND, 0); #endif if (j != 0) { fprintf(stderr, "Not enough memory for gauge_fields! Aborting...\n"); exit(-1); } j = init_geometry_indices(VOLUMEPLUSRAND); if (j != 0) { fprintf(stderr, "Not enough memory for geometry indices! Aborting...\n"); exit(-1); } if (no_monomials > 0) { if (even_odd_flag) { j = init_monomials(VOLUMEPLUSRAND / 2, even_odd_flag); } else { j = init_monomials(VOLUMEPLUSRAND, even_odd_flag); } if (j != 0) { fprintf(stderr, "Not enough memory for monomial pseudo fermion fields! Aborting...\n"); exit(-1); } } if (even_odd_flag) { j = init_spinor_field(VOLUMEPLUSRAND / 2, NO_OF_SPINORFIELDS); } else { j = init_spinor_field(VOLUMEPLUSRAND, NO_OF_SPINORFIELDS); } if (j != 0) { fprintf(stderr, "Not enough memory for spinor fields! Aborting...\n"); exit(-1); } if (g_running_phmc) { j = init_chi_spinor_field(VOLUMEPLUSRAND / 2, 20); if (j != 0) { fprintf(stderr, "Not enough memory for PHMC Chi fields! Aborting...\n"); exit(-1); } } g_mu = g_mu1; if (g_cart_id == 0) { /*construct the filenames for the observables and the parameters*/ strcpy(datafilename, filename); strcat(datafilename, ".data"); strcpy(parameterfilename, filename); strcat(parameterfilename, ".para"); parameterfile = fopen(parameterfilename, "w"); write_first_messages(parameterfile, 1); fclose(parameterfile); } /* define the geometry */ geometry(); /* define the boundary conditions for the fermion fields */ boundary(g_kappa); phmc_invmaxev = 1.; init_operators(); /* this could be maybe moved to init_operators */ #ifdef _USE_HALFSPINOR j = init_dirac_halfspinor(); if (j != 0) { fprintf(stderr, "Not enough memory for halffield! Aborting...\n"); exit(-1); } if (g_sloppy_precision_flag == 1) { j = init_dirac_halfspinor32(); if (j != 0) { fprintf(stderr, "Not enough memory for 32-bit halffield! Aborting...\n"); exit(-1); } } # if (defined _PERSISTENT) if (even_odd_flag) init_xchange_halffield(); # endif #endif for (j = 0; j < Nmeas; j++) { sprintf(conf_filename, "%s.%.4d", gauge_input_filename, nstore); if (g_cart_id == 0) { printf("#\n# Trying to read gauge field from file %s in %s precision.\n", conf_filename, (gauge_precision_read_flag == 32 ? "single" : "double")); fflush(stdout); } if( (i = read_gauge_field(conf_filename)) !=0) { fprintf(stderr, "Error %d while reading gauge field from %s\n Aborting...\n", i, conf_filename); exit(-2); } if (g_cart_id == 0) { printf("# Finished reading gauge field.\n"); fflush(stdout); } #ifdef MPI xchange_gauge(); #endif /*compute the energy of the gauge field*/ plaquette_energy = measure_gauge_action(); if (g_cart_id == 0) { printf("# The computed plaquette value is %e.\n", plaquette_energy / (6.*VOLUME*g_nproc)); fflush(stdout); } if (use_stout_flag == 1){ params_smear.rho = stout_rho; params_smear.iterations = stout_no_iter; if (stout_smear((su3_tuple*)(g_gauge_field[0]), ¶ms_smear, (su3_tuple*)(g_gauge_field[0])) != 0) exit(1) ; g_update_gauge_copy = 1; g_update_gauge_energy = 1; g_update_rectangle_energy = 1; plaquette_energy = measure_gauge_action(); if (g_cart_id == 0) { printf("# The plaquette value after stouting is %e\n", plaquette_energy / (6.*VOLUME*g_nproc)); fflush(stdout); } } if (reweighting_flag == 1) { reweighting_factor(reweighting_samples, nstore); } /* Compute minimal eigenvalues, if wanted */ if (compute_evs != 0) { eigenvalues(&no_eigenvalues, 5000, eigenvalue_precision, 0, compute_evs, nstore, even_odd_flag); } if (phmc_compute_evs != 0) { #ifdef MPI MPI_Finalize(); #endif return(0); } /* Compute the mode number or topological susceptibility using spectral projectors, if wanted*/ if(compute_modenumber != 0 || compute_topsus !=0){ s_ = calloc(no_sources_z2*VOLUMEPLUSRAND+1, sizeof(spinor)); s = calloc(no_sources_z2, sizeof(spinor*)); if(s_ == NULL) { printf("Not enough memory in %s: %d",__FILE__,__LINE__); exit(42); } if(s == NULL) { printf("Not enough memory in %s: %d",__FILE__,__LINE__); exit(42); } for(i = 0; i < no_sources_z2; i++) { #if (defined SSE3 || defined SSE2 || defined SSE) s[i] = (spinor*)(((unsigned long int)(s_)+ALIGN_BASE)&~ALIGN_BASE)+i*VOLUMEPLUSRAND; #else s[i] = s_+i*VOLUMEPLUSRAND; #endif z2_random_spinor_field(s[i], VOLUME); /* what is this here needed for?? */ /* spinor *aux_,*aux; */ /* #if ( defined SSE || defined SSE2 || defined SSE3 ) */ /* aux_=calloc(VOLUMEPLUSRAND+1, sizeof(spinor)); */ /* aux = (spinor *)(((unsigned long int)(aux_)+ALIGN_BASE)&~ALIGN_BASE); */ /* #else */ /* aux_=calloc(VOLUMEPLUSRAND, sizeof(spinor)); */ /* aux = aux_; */ /* #endif */ if(g_proc_id == 0) { printf("source %d \n", i); } if(compute_modenumber != 0){ mode_number(s[i], mstarsq); } if(compute_topsus !=0) { top_sus(s[i], mstarsq); } } free(s); free(s_); } /* move to operators as well */ if (g_dflgcr_flag == 1) { /* set up deflation blocks */ init_blocks(nblocks_t, nblocks_x, nblocks_y, nblocks_z); /* the can stay here for now, but later we probably need */ /* something like init_dfl_solver called somewhere else */ /* create set of approximate lowest eigenvectors ("global deflation subspace") */ /* g_mu = 0.; */ /* boundary(0.125); */ generate_dfl_subspace(g_N_s, VOLUME); /* boundary(g_kappa); */ /* g_mu = g_mu1; */ /* Compute little Dirac operators */ /* alt_block_compute_little_D(); */ if (g_debug_level > 0) { check_projectors(); check_local_D(); } if (g_debug_level > 1) { check_little_D_inversion(); } } if(SourceInfo.type == 1) { index_start = 0; index_end = 1; } g_precWS=NULL; if(use_preconditioning == 1){ /* todo load fftw wisdom */ #if (defined HAVE_FFTW ) && !( defined MPI) loadFFTWWisdom(g_spinor_field[0],g_spinor_field[1],T,LX); #else use_preconditioning=0; #endif } if (g_cart_id == 0) { fprintf(stdout, "#\n"); /*Indicate starting of the operator part*/ } for(op_id = 0; op_id < no_operators; op_id++) { boundary(operator_list[op_id].kappa); g_kappa = operator_list[op_id].kappa; g_mu = 0.; if(use_preconditioning==1 && PRECWSOPERATORSELECT[operator_list[op_id].solver]!=PRECWS_NO ){ printf("# Using preconditioning with treelevel preconditioning operator: %s \n", precWSOpToString(PRECWSOPERATORSELECT[operator_list[op_id].solver])); /* initial preconditioning workspace */ operator_list[op_id].precWS=(spinorPrecWS*)malloc(sizeof(spinorPrecWS)); spinorPrecWS_Init(operator_list[op_id].precWS, operator_list[op_id].kappa, operator_list[op_id].mu/2./operator_list[op_id].kappa, -(0.5/operator_list[op_id].kappa-4.), PRECWSOPERATORSELECT[operator_list[op_id].solver]); g_precWS = operator_list[op_id].precWS; if(PRECWSOPERATORSELECT[operator_list[op_id].solver] == PRECWS_D_DAGGER_D) { fitPrecParams(op_id); } } for(isample = 0; isample < no_samples; isample++) { for (ix = index_start; ix < index_end; ix++) { if (g_cart_id == 0) { fprintf(stdout, "#\n"); /*Indicate starting of new index*/ } /* we use g_spinor_field[0-7] for sources and props for the moment */ /* 0-3 in case of 1 flavour */ /* 0-7 in case of 2 flavours */ prepare_source(nstore, isample, ix, op_id, read_source_flag, source_location); operator_list[op_id].inverter(op_id, index_start); } } if(use_preconditioning==1 && operator_list[op_id].precWS!=NULL ){ /* free preconditioning workspace */ spinorPrecWS_Free(operator_list[op_id].precWS); free(operator_list[op_id].precWS); } if(operator_list[op_id].type == OVERLAP){ free_Dov_WS(); } } nstore += Nsave; } #ifdef MPI MPI_Finalize(); #endif free_blocks(); free_dfl_subspace(); free_gauge_field(); free_geometry_indices(); free_spinor_field(); free_moment_field(); free_chi_spinor_field(); return(0); #ifdef _KOJAK_INST #pragma pomp inst end(main) #endif }
int main(int argc,char *argv[]) { FILE *parameterfile=NULL; int c, j, is=0, ic=0; int x, X, y, Y, z, Z, t, tt, i, sum; char * filename = NULL; char datafilename[50]; char parameterfilename[50]; char conf_filename[50]; char * input_filename = NULL; double plaquette_energy, nrm; double * norm; struct stout_parameters params_smear; #ifdef _GAUGE_COPY int kb=0; #endif #ifdef MPI double atime=0., etime=0.; #endif #ifdef _KOJAK_INST #pragma pomp inst init #pragma pomp inst begin(main) #endif DUM_DERI = 6; /* DUM_DERI + 2 is enough (not 7) */ DUM_SOLVER = DUM_DERI+2; DUM_MATRIX = DUM_SOLVER+6; /* DUM_MATRIX + 2 is enough (not 6) */ NO_OF_SPINORFIELDS = DUM_MATRIX+2; verbose = 0; g_use_clover_flag = 0; g_nr_of_psf = 1; #ifdef MPI MPI_Init(&argc, &argv); #endif while ((c = getopt(argc, argv, "h?f:o:")) != -1) { switch (c) { case 'f': input_filename = calloc(200, sizeof(char)); strcpy(input_filename,optarg); break; case 'o': filename = calloc(200, sizeof(char)); strcpy(filename,optarg); break; case 'h': case '?': default: usage(); break; } } if(input_filename == NULL){ input_filename = "hmc.input"; } if(filename == NULL){ filename = "output"; } /* Read the input file */ read_input(input_filename); /* here we want no even/odd preconditioning */ even_odd_flag = 0; /* this DBW2 stuff is not needed for the inversion ! */ g_rgi_C1 = 0; if(Nsave == 0){ Nsave = 1; } tmlqcd_mpi_init(argc, argv); g_dbw2rand = 0; #ifndef MPI g_dbw2rand = 0; #endif #ifdef _GAUGE_COPY j = init_gauge_field(VOLUMEPLUSRAND, 1); #else j = init_gauge_field(VOLUMEPLUSRAND, 0); #endif if ( j!= 0) { fprintf(stderr, "Not enough memory for gauge_fields! Aborting...\n"); exit(-1); } j = init_geometry_indices(VOLUMEPLUSRAND); if ( j!= 0) { fprintf(stderr, "Not enough memory for geometry indices! Aborting...\n"); exit(-1); } if(even_odd_flag) { j = init_spinor_field(VOLUMEPLUSRAND/2, NO_OF_SPINORFIELDS); } else { j = init_spinor_field(VOLUMEPLUSRAND, NO_OF_SPINORFIELDS); } if ( j!= 0) { fprintf(stderr, "Not enough memory for spinor fields! Aborting...\n"); exit(-1); } g_mu = g_mu1; if(g_proc_id == 0){ /*construct the filenames for the observables and the parameters*/ strcpy(datafilename,filename); strcat(datafilename,".data"); strcpy(parameterfilename,filename); strcat(parameterfilename,".para"); parameterfile=fopen(parameterfilename, "w"); write_first_messages(parameterfile, 0, 1); } /* define the geometry */ geometry(); /* define the boundary conditions for the fermion fields */ boundary(); #ifdef _USE_HALFSPINOR j = init_dirac_halfspinor(); if ( j!= 0) { fprintf(stderr, "Not enough memory for halffield! Aborting...\n"); exit(-1); } if(g_sloppy_precision_flag == 1) { j = init_dirac_halfspinor32(); if ( j!= 0) { fprintf(stderr, "Not enough memory for 32-Bit halffield! Aborting...\n"); exit(-1); } } # if (defined _PERSISTENT) init_xchange_halffield(); # endif #endif norm = (double*)calloc(3.*LX/2.+T/2., sizeof(double)); for(j=0;j<Nmeas; j++) { sprintf(conf_filename,"%s.%.4d", gauge_input_filename, nstore); if (g_proc_id == 0){ printf("Reading Gauge field from file %s\n", conf_filename); fflush(stdout); } read_lime_gauge_field(conf_filename); if (g_proc_id == 0){ printf("done!\n"); fflush(stdout); } #ifdef MPI xchange_gauge(g_gauge_field); #endif /* Compute minimal eigenvalues, if wanted */ if(compute_evs != 0) { eigenvalues(&no_eigenvalues, 1000, eigenvalue_precision, 0, compute_evs, nstore, even_odd_flag); } /*compute the energy of the gauge field*/ plaquette_energy = measure_gauge_action(g_gauge_field); if(g_proc_id == 0) { printf("The plaquette value is %e\n", plaquette_energy/(6.*VOLUME*g_nproc)); fflush(stdout); } if (use_stout_flag == 1){ params_smear.rho = stout_rho; params_smear.iterations = stout_no_iter; if (stout_smear((su3_tuple*)(g_gauge_field[0]), ¶ms_smear, (su3_tuple*)(g_gauge_field[0])) != 0) exit(1) ; g_update_gauge_copy = 1; g_update_gauge_energy = 1; g_update_rectangle_energy = 1; plaquette_energy = measure_gauge_action(g_gauge_field); if (g_proc_id == 0) { printf("# The plaquette value after stouting is %e\n", plaquette_energy / (6.*VOLUME*g_nproc)); fflush(stdout); } } source_spinor_field(g_spinor_field[0], g_spinor_field[1], 0, 0); convert_eo_to_lexic(g_spinor_field[DUM_DERI], g_spinor_field[0], g_spinor_field[1]); D_psi(g_spinor_field[DUM_DERI+1], g_spinor_field[DUM_DERI]); if(even_odd_flag) { i = invert_eo(g_spinor_field[2], g_spinor_field[3], g_spinor_field[0], g_spinor_field[1], solver_precision, max_solver_iterations, solver_flag, g_relative_precision_flag, sub_evs_cg_flag, even_odd_flag, 0, NULL, -1); convert_eo_to_lexic(g_spinor_field[DUM_DERI+1], g_spinor_field[2], g_spinor_field[3]); } for(i = 0; i < 3*LX/2+T/2; i++){ norm[i] = 0.; } for(x = 0; x < LX; x++){ if(x > LX/2) X = LX-x; else X = x; for(y = 0; y < LY; y++){ if(y > LY/2) Y = LY-y; else Y = y; for(z = 0; z < LZ; z++){ if(z > LZ/2) Z = LZ-z; else Z = z; for(t = 0; t < T; t++){ if(t > T/2) tt = T - t; else tt = t; sum = X + Y + Z + tt; _spinor_norm_sq(nrm, g_spinor_field[DUM_DERI+1][ g_ipt[t][x][y][z] ]); /* _spinor_norm_sq(nrm, qprop[0][0][1][ g_ipt[t][x][y][z] ]); */ printf("%e %e\n", creal(g_spinor_field[DUM_DERI+1][ g_ipt[t][x][y][z] ].s0.c0), cimag(g_spinor_field[DUM_DERI+1][ g_ipt[t][x][y][z] ].s0.c0)); nrm = sqrt( nrm ); printf("%1.12e\n", nrm); if(nrm > norm[sum]) norm[sum] = nrm; } } } } for(i = 0; i < 3*L/2+T/2; i++){ printf("%d %1.12e\n", i, norm[i]); } printf("\n"); nstore+=Nsave; } #ifdef MPI MPI_Finalize(); #endif free_gauge_field(); free_geometry_indices(); free_spinor_field(); free_moment_field(); return(0); #ifdef _KOJAK_INST #pragma pomp inst end(main) #endif }
int main(int argc, char **argv) { int c, i, mu, nu; int count = 0; int filename_set = 0; int dims[4] = {0,0,0,0}; int l_LX_at, l_LXstart_at; int x0, x1, x2, x3, ix; int sid; double *disc = (double*)NULL; double *work = (double*)NULL; double *disc_diag = (double*)NULL; double phase[4]; int verbose = 0; int do_gt = 0; char filename[100]; double ratime, retime; double plaq; double spinor1[24], spinor2[24], U_[18]; complex w, w1, psi1[4], psi2[4]; FILE *ofs; fftw_complex *in=(fftw_complex*)NULL; #ifdef MPI fftwnd_mpi_plan plan_p, plan_m; int *status; #else fftwnd_plan plan_p, plan_m; #endif #ifdef MPI MPI_Init(&argc, &argv); #endif while ((c = getopt(argc, argv, "h?vgf:")) != -1) { switch (c) { case 'v': verbose = 1; break; case 'g': do_gt = 1; break; case 'f': strcpy(filename, optarg); filename_set=1; break; case 'h': case '?': default: usage(); break; } } /* set the default values */ set_default_input_values(); if(filename_set==0) strcpy(filename, "cvc.input"); /* read the input file */ read_input(filename); /* some checks on the input data */ if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) { if(g_proc_id==0) fprintf(stdout, "T and L's must be set\n"); usage(); } if(g_kappa == 0.) { if(g_proc_id==0) fprintf(stdout, "kappa should be > 0.n"); usage(); } /* initialize MPI parameters */ mpi_init(argc, argv); #ifdef MPI if((status = (int*)calloc(g_nproc, sizeof(int))) == (int*)NULL) { MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); exit(7); } #endif /* initialize fftw */ dims[0]=T_global; dims[1]=LX; dims[2]=LY; dims[3]=LZ; #ifdef MPI plan_p = fftwnd_mpi_create_plan(g_cart_grid, 4, dims, FFTW_BACKWARD, FFTW_MEASURE); plan_m = fftwnd_mpi_create_plan(g_cart_grid, 4, dims, FFTW_FORWARD, FFTW_MEASURE); fftwnd_mpi_local_sizes(plan_p, &T, &Tstart, &l_LX_at, &l_LXstart_at, &FFTW_LOC_VOLUME); #else plan_p = fftwnd_create_plan(4, dims, FFTW_BACKWARD, FFTW_MEASURE | FFTW_IN_PLACE); plan_m = fftwnd_create_plan(4, dims, FFTW_FORWARD, FFTW_MEASURE | FFTW_IN_PLACE); T = T_global; Tstart = 0; l_LX_at = LX; l_LXstart_at = 0; FFTW_LOC_VOLUME = T*LX*LY*LZ; #endif fprintf(stdout, "# [%2d] fftw parameters:\n"\ "# [%2d] T = %3d\n"\ "# [%2d] Tstart = %3d\n"\ "# [%2d] l_LX_at = %3d\n"\ "# [%2d] l_LXstart_at = %3d\n"\ "# [%2d] FFTW_LOC_VOLUME = %3d\n", g_cart_id, g_cart_id, T, g_cart_id, Tstart, g_cart_id, l_LX_at, g_cart_id, l_LXstart_at, g_cart_id, FFTW_LOC_VOLUME); #ifdef MPI if(T==0) { fprintf(stderr, "[%2d] local T is zero; exit\n", g_cart_id); MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); exit(2); } #endif if(init_geometry() != 0) { fprintf(stderr, "ERROR from init_geometry\n"); #ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); #endif exit(1); } geometry(); /* read the gauge field */ alloc_gauge_field(&g_gauge_field, VOLUMEPLUSRAND); sprintf(filename, "%s.%.4d", gaugefilename_prefix, Nconf); if(g_cart_id==0) fprintf(stdout, "reading gauge field from file %s\n", filename); read_lime_gauge_field_doubleprec(filename); #ifdef MPI xchange_gauge(); #endif /* measure the plaquette */ plaquette(&plaq); if(g_cart_id==0) fprintf(stdout, "measured plaquette value: %25.16e\n", plaq); /* allocate memory for the spinor fields */ no_fields = 2; g_spinor_field = (double**)calloc(no_fields, sizeof(double*)); for(i=0; i<no_fields; i++) alloc_spinor_field(&g_spinor_field[i], VOLUMEPLUSRAND); /* allocate memory for the contractions */ disc = (double*)calloc(8*VOLUME, sizeof(double)); work = (double*)calloc(20*VOLUME, sizeof(double)); if( (disc==(double*)NULL) || (work==(double*)NULL) ) { fprintf(stderr, "could not allocate memory for disc/work\n"); #ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); #endif exit(3); } for(ix=0; ix<8*VOLUME; ix++) disc[ix] = 0.; if(g_subtract == 1) { /* allocate memory for disc_diag */ disc_diag = (double*)calloc(20*VOLUME, sizeof(double)); if( disc_diag == (double*)NULL ) { fprintf(stderr, "could not allocate memory for disc_diag\n"); #ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); #endif exit(8); } for(ix=0; ix<20*VOLUME; ix++) disc_diag[ix] = 0.; } /* prepare Fourier transformation arrays */ in = (fftw_complex*)malloc(FFTW_LOC_VOLUME*sizeof(fftw_complex)); if(in==(fftw_complex*)NULL) { #ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); #endif exit(4); } if(g_resume==1) { /* read current disc from file */ sprintf(filename, ".outcvc_current.%.4d", Nconf); c = read_contraction(disc, &count, filename, 4); if( (g_subtract==1) && (c==0) ) { sprintf(filename, ".outcvc_diag_current.%.4d", Nconf); c = read_contraction(disc_diag, (int*)NULL, filename, 10); } #ifdef MPI MPI_Gather(&c, 1, MPI_INT, status, 1, MPI_INT, 0, g_cart_grid); if(g_cart_id==0) { /* check the entries in status */ for(i=0; i<g_nproc; i++) if(status[i]!=0) { status[0] = 1; break; } } MPI_Bcast(status, 1, MPI_INT, 0, g_cart_grid); if(status[0]==1) { for(ix=0; ix<8*VOLUME; ix++) disc[ix] = 0.; count = 0; } #else if(c != 0) { fprintf(stdout, "could not read current disc; start new\n"); for(ix=0; ix<8*VOLUME; ix++) disc[ix] = 0.; if(g_subtract==1) for(ix=0; ix<20*VOLUME; ix++) disc_diag[ix] = 0.; count = 0; } #endif if(g_cart_id==0) fprintf(stdout, "starting with count = %d\n", count); } /* of g_resume == 1 */ /* start loop on source id.s */ for(sid=g_sourceid; sid<=g_sourceid2; sid++) { /* read the new propagator */ /* sprintf(filename, "%s.%.4d.%.2d", filename_prefix, Nconf, sid); */ sprintf(filename, "source.%.4d.%.2d.inverted", Nconf, sid); if(format==0) { if(read_lime_spinor(g_spinor_field[1], filename, 0) != 0) break; } else if(format==1) { if(read_cmi(g_spinor_field[1], filename) != 0) break; } count++; xchange_field(g_spinor_field[1]); /* calculate the source: apply Q_phi_tbc */ Q_phi_tbc(g_spinor_field[0], g_spinor_field[1]); xchange_field(g_spinor_field[0]); /* sprintf(filename, "%s.source.%.2d", filename, g_cart_id); ofs = fopen(filename, "w"); printf_spinor_field(g_spinor_field[0], ofs); fclose(ofs); */ /* add new contractions to (existing) disc */ #ifdef MPI ratime = MPI_Wtime(); #else ratime = (double)clock() / CLOCKS_PER_SEC; #endif for(ix=0; ix<VOLUME; ix++) { /* loop on lattice sites */ for(mu=0; mu<4; mu++) { /* loop on Lorentz index of the current */ _cm_eq_cm_ti_co(U_, &g_gauge_field[_GGI(ix, mu)], &co_phase_up[mu]); /* first contribution */ _fv_eq_cm_ti_fv(spinor1, U_, &g_spinor_field[1][_GSI(g_iup[ix][mu])]); _fv_eq_gamma_ti_fv(spinor2, mu, spinor1); _fv_mi_eq_fv(spinor2, spinor1); _co_eq_fv_dag_ti_fv(&w, &g_spinor_field[0][_GSI(ix)], spinor2); disc[_GJI(ix, mu) ] -= 0.25 * w.re; disc[_GJI(ix, mu)+1] -= 0.25 * w.im; if(g_subtract==1) { work[_GWI(mu,ix,VOLUME) ] = -0.25 * w.re; work[_GWI(mu,ix,VOLUME)+1] = -0.25 * w.im; } /* second contribution */ _fv_eq_cm_dag_ti_fv(spinor1, U_, &g_spinor_field[1][_GSI(ix)]); _fv_eq_gamma_ti_fv(spinor2, mu, spinor1); _fv_pl_eq_fv(spinor2, spinor1); _co_eq_fv_dag_ti_fv(&w, &g_spinor_field[0][_GSI(g_iup[ix][mu])], spinor2); disc[_GJI(ix, mu) ] -= 0.25 * w.re; disc[_GJI(ix, mu)+1] -= 0.25 * w.im; if(g_subtract==1) { work[_GWI(mu,ix,VOLUME) ] -= 0.25 * w.re; work[_GWI(mu,ix,VOLUME)+1] -= 0.25 * w.im; } } } #ifdef MPI retime = MPI_Wtime(); #else retime = (double)clock() / CLOCKS_PER_SEC; #endif fprintf(stdout, "[%2d] contractions in %e seconds\n", g_cart_id, retime-ratime); if(g_subtract==1) { /* add current contribution to disc_diag */ for(mu=0; mu<4; mu++) { for(i=0; i<4; i++) phase[i] = (double)(i==mu); memcpy((void*)in, (void*)&work[_GWI(mu,0,VOLUME)], 2*VOLUME*sizeof(double)); #ifdef MPI fftwnd_mpi(plan_m, 1, in, NULL, FFTW_NORMAL_ORDER); #else fftwnd_one(plan_m, in, NULL); #endif for(x0=0; x0<T; x0++) { for(x1=0; x1<LX; x1++) { for(x2=0; x2<LY; x2++) { for(x3=0; x3<LZ; x3++) { ix = g_ipt[x0][x1][x2][x3]; w.re = cos( M_PI * (phase[0]*(double)(Tstart+x0)/(double)T_global + phase[1]*(double)x1/(double)LX + phase[2]*(double)x2/(double)LY + phase[3]*(double)x3/(double)LZ) ); w.im = -sin( M_PI * (phase[0]*(double)(Tstart+x0)/(double)T_global + phase[1]*(double)x1/(double)LX + phase[2]*(double)x2/(double)LY + phase[3]*(double)x3/(double)LZ) ); _co_eq_co_ti_co(&w1, &in[ix], &w); work[_GWI(4+mu,ix,VOLUME) ] = w1.re; work[_GWI(4+mu,ix,VOLUME)+1] = w1.im; } } } } memcpy((void*)in, (void*)&work[_GWI(mu,0,VOLUME)], 2*VOLUME*sizeof(double)); #ifdef MPI fftwnd_mpi(plan_p, 1, in, NULL, FFTW_NORMAL_ORDER); #else fftwnd_one(plan_p, in, NULL); #endif for(x0=0; x0<T; x0++) { for(x1=0; x1<LX; x1++) { for(x2=0; x2<LY; x2++) { for(x3=0; x3<LZ; x3++) { ix = g_ipt[x0][x1][x2][x3]; w.re = cos( M_PI * (phase[0]*(double)(Tstart+x0)/(double)T_global + phase[1]*(double)x1/(double)LX + phase[2]*(double)x2/(double)LY + phase[3]*(double)x3/(double)LZ) ); w.im = sin( M_PI * (phase[0]*(double)(Tstart+x0)/(double)T_global + phase[1]*(double)x1/(double)LX + phase[2]*(double)x2/(double)LY + phase[3]*(double)x3/(double)LZ) ); _co_eq_co_ti_co(&w1, &in[ix], &w); work[_GWI(mu,ix,VOLUME) ] = w1.re; work[_GWI(mu,ix,VOLUME)+1] = w1.im; } } } } } /* of mu */ for(ix=0; ix<VOLUME; ix++) { i=-1; for(mu=0; mu<4; mu++) { for(nu=mu; nu<4; nu++) { i++; _co_eq_co_ti_co(&w, (complex*)&work[_GWI(mu,ix,VOLUME)], (complex*)&work[_GWI(4+nu,ix,VOLUME)]); disc_diag[_GWI(ix,i,10) ] += w.re; disc_diag[_GWI(ix,i,10)+1] += w.im; } } } } /* of g_subtract == 1 */ /* save results for count = multiple of Nsave */ if(count%Nsave == 0) { #ifdef MPI ratime = MPI_Wtime(); #else ratime = (double)clock() / CLOCKS_PER_SEC; #endif if(g_cart_id == 0) fprintf(stdout, "save results for count = %d\n", count); /* save the result in position space */ sprintf(filename, "outcvc_X.%.4d.%.4d", Nconf, count); write_contraction(disc, NULL, filename, 4, 1, 0); /* Fourier transform data, copy to work */ for(mu=0; mu<4; mu++) { for(i=0; i<4; i++) phase[i] = (double)(i==mu); for(ix=0; ix<VOLUME; ix++) { in[ix].re = disc[_GJI(ix,mu) ]; in[ix].im = disc[_GJI(ix,mu)+1]; } #ifdef MPI fftwnd_mpi(plan_m, 1, in, NULL, FFTW_NORMAL_ORDER); #else fftwnd_one(plan_m, in, NULL); #endif for(x0=0; x0<T; x0++) { for(x1=0; x1<LX; x1++) { for(x2=0; x2<LY; x2++) { for(x3=0; x3<LZ; x3++) { ix = g_ipt[x0][x1][x2][x3]; w.re = cos( M_PI * (phase[0]*(double)(Tstart+x0)/(double)T_global + phase[1]*(double)x1/(double)LX + phase[2]*(double)x2/(double)LY + phase[3]*(double)x3/(double)LZ) ); w.im = -sin( M_PI * (phase[0]*(double)(Tstart+x0)/(double)T_global + phase[1]*(double)x1/(double)LX + phase[2]*(double)x2/(double)LY + phase[3]*(double)x3/(double)LZ) ); _co_eq_co_ti_co(&w1, &in[ix], &w); work[_GWI(ix,4+mu,8) ] = w1.re / (double)count; work[_GWI(ix,4+mu,8)+1] = w1.im / (double)count; } } } } for(ix=0; ix<VOLUME; ix++) { in[ix].re = disc[_GJI(ix, mu) ]; in[ix].im = disc[_GJI(ix, mu)+1]; } #ifdef MPI fftwnd_mpi(plan_p, 1, in, NULL, FFTW_NORMAL_ORDER); #else fftwnd_one(plan_p, in, NULL); #endif for(x0=0; x0<T; x0++) { for(x1=0; x1<LX; x1++) { for(x2=0; x2<LY; x2++) { for(x3=0; x3<LZ; x3++) { ix = g_ipt[x0][x1][x2][x3]; w.re = cos( M_PI * (phase[0]*(double)(Tstart+x0)/(double)T_global + phase[1]*(double)x1/(double)LX + phase[2]*(double)x2/(double)LY + phase[3]*(double)x3/(double)LZ) ); w.im = sin( M_PI * (phase[0]*(double)(Tstart+x0)/(double)T_global + phase[1]*(double)x1/(double)LX + phase[2]*(double)x2/(double)LY + phase[3]*(double)x3/(double)LZ) ); _co_eq_co_ti_co(&w1, &in[ix], &w); work[_GWI(ix,mu,8) ] = w1.re / (double)count; work[_GWI(ix,mu,8)+1] = w1.im / (double)count; } } } } } /* of mu =0 ,..., 3*/ /* save the result in momentum space */ sprintf(filename, "outcvc_P.%.4d.%.4d", Nconf, count); write_contraction(work, NULL, filename, 8, 1, 0); /* calculate the correlations 00, 01, 02, 03, 11, 12, ..., 23, 33 */ for(ix=VOLUME-1; ix>=0; ix--) { /* copy current data to auxilliary vector */ memcpy((void*)psi1, (void*)&work[_GWI(ix,0,8)], 8*sizeof(double)); memcpy((void*)psi2, (void*)&work[_GWI(ix,4,8)], 8*sizeof(double)); i = -1; for(mu=0; mu<4; mu++) { for(nu=mu; nu<4; nu++) { i++; _co_eq_co_ti_co(&w,&psi1[mu],&psi2[nu]); if(g_subtract !=1 ) { work[_GWI(ix,i,10) ] = w.re / (double)(T_global*LX*LY*LZ); work[_GWI(ix,i,10)+1] = w.im / (double)(T_global*LX*LY*LZ); } else { work[_GWI(ix,i,10) ] = ( w.re - disc_diag[_GWI(ix,i,10) ]/(double)(count*count) ) / (double)(T_global*LX*LY*LZ); work[_GWI(ix,i,10)+1] = ( w.im - disc_diag[_GWI(ix,i,10)+1]/(double)(count*count) ) / (double)(T_global*LX*LY*LZ); } } } } /* save current results to file */ sprintf(filename, "outcvc_final.%.4d.%.4d", Nconf, count); write_contraction(work, (int*)NULL, filename, 10, 1, 0); #ifdef MPI retime = MPI_Wtime(); #else retime = (double)clock() / CLOCKS_PER_SEC; #endif fprintf(stdout, "[%2d] time to save results: %e seconds\n", g_cart_id, retime-ratime); } /* of count % Nsave == 0 */ } /* of loop on sid */ /* write current disc to file */ sprintf(filename, ".outcvc_current.%.4d", Nconf); write_contraction(disc, &count, filename, 4, 0, 0); if(g_subtract == 1) { /* write current disc_diag to file */ sprintf(filename, ".outcvc_diag_current.%.4d", Nconf); write_contraction(disc_diag, (int*)NULL, filename, 10, 0, 0); } /* free the allocated memory, finalize */ free(g_gauge_field); g_gauge_field=(double*)NULL; for(i=0; i<no_fields; i++) { free(g_spinor_field[i]); g_spinor_field[i] = (double*)NULL; } free(g_spinor_field); g_spinor_field=(double**)NULL; free_geometry(); fftw_free(in); free(disc); free(work); if(g_subtract==1) free(disc_diag); #ifdef MPI fftwnd_mpi_destroy_plan(plan_p); fftwnd_mpi_destroy_plan(plan_m); free(status); MPI_Finalize(); #else fftwnd_destroy_plan(plan_p); fftwnd_destroy_plan(plan_m); #endif return(0); }
int main(int argc, char **argv) { int c, i, mu, status; int ispin, icol, isc; int n_c = 3; int n_s = 4; int count = 0; int filename_set = 0; int dims[4] = {0,0,0,0}; int l_LX_at, l_LXstart_at; int x0, x1, x2, x3, ix, iix, iy; int sl0, sl1, sl2, sl3, have_source_flag=0; int source_proc_coords[4], lsl0, lsl1, lsl2, lsl3, source_proc_id; int check_residuum = 0; unsigned int VOL3; int do_gt = 0; int full_orbit = 0; char filename[200], source_filename[200]; double ratime, retime; double plaq_r=0., plaq_m=0., norm, norm2; // double spinor1[24], spinor2[24]; double *gauge_qdp[4], *gauge_field_timeslice=NULL, *gauge_field_smeared=NULL; double _1_2_kappa, _2_kappa, phase; FILE *ofs; int mu_trans[4] = {3, 0, 1, 2}; int threadid, nthreads; int timeslice; char rng_file_in[100], rng_file_out[100]; int *source_momentum=NULL; int source_momentum_class = -1; int source_momentum_no = 0; int source_momentum_runs = 1; int imom; /************************************************/ int qlatt_nclass; int *qlatt_id=NULL, *qlatt_count=NULL, **qlatt_rep=NULL, **qlatt_map=NULL; double **qlatt_list=NULL; /************************************************/ /*********************************************** * QUDA parameters ***********************************************/ QudaPrecision cpu_prec = QUDA_DOUBLE_PRECISION; QudaPrecision cuda_prec = QUDA_DOUBLE_PRECISION; QudaPrecision cuda_prec_sloppy = QUDA_DOUBLE_PRECISION; QudaGaugeParam gauge_param = newQudaGaugeParam(); QudaInvertParam inv_param = newQudaInvertParam(); #ifdef MPI MPI_Init(&argc, &argv); #endif while ((c = getopt(argc, argv, "och?vgf:p:")) != -1) { switch (c) { case 'v': g_verbose = 1; break; case 'g': do_gt = 1; break; case 'f': strcpy(filename, optarg); filename_set=1; break; case 'c': check_residuum = 1; fprintf(stdout, "# [invert_quda] will check residuum again\n"); break; case 'p': n_c = atoi(optarg); fprintf(stdout, "# [invert_quda] will use number of colors = %d\n", n_c); break; case 'o': full_orbit = 1; fprintf(stdout, "# [invert_quda] will invert for full orbit, if source momentum set\n"); break; case 'h': case '?': default: usage(); break; } } // get the time stamp g_the_time = time(NULL); /************************************** * set the default values, read input **************************************/ if(filename_set==0) strcpy(filename, "cvc.input"); if(g_proc_id==0) fprintf(stdout, "# Reading input from file %s\n", filename); read_input_parser(filename); /* some checks on the input data */ if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) { if(g_proc_id==0) fprintf(stderr, "[invert_quda] Error, T and L's must be set\n"); usage(); } if(g_kappa == 0.) { if(g_proc_id==0) fprintf(stderr, "[invert_quda] Error, kappa should be > 0.n"); usage(); } // set number of openmp threads #ifdef OPENMP omp_set_num_threads(g_num_threads); #else fprintf(stdout, "[invert_quda_cg] Warning, resetting global number of threads to 1\n"); g_num_threads = 1; #endif /* initialize MPI parameters */ mpi_init(argc, argv); // the volume of a timeslice VOL3 = LX*LY*LZ; fprintf(stdout, "# [%2d] parameters:\n"\ "# [%2d] T = %3d\n"\ "# [%2d] Tstart = %3d\n",\ g_cart_id, g_cart_id, T, g_cart_id, Tstart); #ifdef MPI if(T==0) { fprintf(stderr, "[%2d] local T is zero; exit\n", g_cart_id); MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); exit(2); } #endif if(init_geometry() != 0) { fprintf(stderr, "ERROR from init_geometry\n"); #ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); #endif exit(1); } geometry(); /************************************** * initialize the QUDA library **************************************/ fprintf(stdout, "# [invert_quda] initializing quda\n"); initQuda(g_gpu_device_number); /************************************** * prepare the gauge field **************************************/ // read the gauge field from file alloc_gauge_field(&g_gauge_field, VOLUMEPLUSRAND); if(strcmp( gaugefilename_prefix, "identity")==0 ) { if(g_cart_id==0) fprintf(stdout, "# [invert_quda] Setting up unit gauge field\n"); for(ix=0;ix<VOLUME; ix++) { for(mu=0;mu<4;mu++) { _cm_eq_id(g_gauge_field+_GGI(ix,mu)); } } } else { if(g_gauge_file_format == 0) { // ILDG sprintf(filename, "%s.%.4d", gaugefilename_prefix, Nconf); if(g_cart_id==0) fprintf(stdout, "# Reading gauge field from file %s\n", filename); status = read_lime_gauge_field_doubleprec(filename); } else if(g_gauge_file_format == 1) { // NERSC sprintf(filename, "%s.%.5d", gaugefilename_prefix, Nconf); if(g_cart_id==0) fprintf(stdout, "# Reading gauge field from file %s\n", filename); status = read_nersc_gauge_field(g_gauge_field, filename, &plaq_r); } if(status != 0) { fprintf(stderr, "[invert_quda] Error, could not read gauge field"); #ifdef MPI MPI_Abort(MPI_COMM_WORLD, 12); MPI_Finalize(); #endif exit(12); } } #ifdef MPI xchange_gauge(); #endif // measure the plaquette plaquette(&plaq_m); if(g_cart_id==0) fprintf(stdout, "# Measured plaquette value: %25.16e\n", plaq_m); if(g_cart_id==0) fprintf(stdout, "# Read plaquette value : %25.16e\n", plaq_r); // allocate the smeared / qdp ordered gauge field alloc_gauge_field(&gauge_field_smeared, VOLUME); for(i=0;i<4;i++) { gauge_qdp[i] = gauge_field_smeared + i*18*VOLUME; } // transcribe the gauge field #ifdef OPENMP omp_set_num_threads(g_num_threads); #pragma omp parallel for private(ix,iy,mu) #endif for(ix=0;ix<VOLUME;ix++) { iy = g_lexic2eot[ix]; for(mu=0;mu<4;mu++) { _cm_eq_cm(gauge_qdp[mu_trans[mu]]+18*iy, g_gauge_field+_GGI(ix,mu)); } } // multiply timeslice T-1 with factor of -1 (antiperiodic boundary condition) #ifdef OPENMP omp_set_num_threads(g_num_threads); #pragma omp parallel for private(ix,iy) #endif for(ix=0;ix<VOL3;ix++) { iix = (T-1)*VOL3 + ix; iy = g_lexic2eot[iix]; _cm_ti_eq_re(gauge_qdp[mu_trans[0]]+18*iy, -1.); } // QUDA gauge parameters gauge_param.X[0] = LX_global; gauge_param.X[1] = LY_global; gauge_param.X[2] = LZ_global; gauge_param.X[3] = T_global; gauge_param.anisotropy = 1.0; gauge_param.type = QUDA_WILSON_LINKS; gauge_param.gauge_order = QUDA_QDP_GAUGE_ORDER; gauge_param.t_boundary = QUDA_ANTI_PERIODIC_T; gauge_param.cpu_prec = cpu_prec; gauge_param.cuda_prec = cuda_prec; gauge_param.reconstruct = QUDA_RECONSTRUCT_12; gauge_param.cuda_prec_sloppy = cuda_prec_sloppy; gauge_param.reconstruct_sloppy = QUDA_RECONSTRUCT_12; gauge_param.gauge_fix = QUDA_GAUGE_FIXED_NO; gauge_param.ga_pad = 0; // load the gauge field fprintf(stdout, "# [invert_quda] loading gauge field\n"); loadGaugeQuda((void*)gauge_qdp, &gauge_param); gauge_qdp[0] = NULL; gauge_qdp[1] = NULL; gauge_qdp[2] = NULL; gauge_qdp[3] = NULL; /********************************************* * APE smear the gauge field *********************************************/ memcpy(gauge_field_smeared, g_gauge_field, 72*VOLUME*sizeof(double)); if(N_ape>0) { fprintf(stdout, "# [invert_quda] APE smearing gauge field with paramters N_APE=%d, alpha_APE=%e\n", N_ape, alpha_ape); #ifdef OPENMP APE_Smearing_Step_threads(gauge_field_smeared, N_ape, alpha_ape); #else for(i=0; i<N_ape; i++) { APE_Smearing_Step(gauge_field_smeared, alpha_ape); } #endif } /* allocate memory for the spinor fields */ no_fields = 3; g_spinor_field = (double**)calloc(no_fields, sizeof(double*)); for(i=0; i<no_fields; i++) alloc_spinor_field(&g_spinor_field[i], VOLUMEPLUSRAND); /* the source locaton */ sl0 = g_source_location / (LX_global*LY_global*LZ); sl1 = ( g_source_location % (LX_global*LY_global*LZ) ) / ( LY_global*LZ); sl2 = ( g_source_location % ( LY_global*LZ) ) / ( LZ); sl3 = g_source_location % LZ; if(g_cart_id==0) fprintf(stdout, "# [invert_quda] global sl = (%d, %d, %d, %d)\n", sl0, sl1, sl2, sl3); source_proc_coords[0] = sl0 / T; source_proc_coords[1] = sl1 / LX; source_proc_coords[2] = sl2 / LY; source_proc_coords[3] = sl3 / LZ; #ifdef MPI MPI_Cart_rank(g_cart_grid, source_proc_coords, &source_proc_id); #else source_proc_id = 0; #endif have_source_flag = source_proc_id == g_cart_id; lsl0 = sl0 % T; lsl1 = sl1 % LX; lsl2 = sl2 % LY; lsl3 = sl3 % LZ; if(have_source_flag) { fprintf(stdout, "# [invert_quda] process %d has the source at (%d, %d, %d, %d)\n", g_cart_id, lsl0, lsl1, lsl2, lsl3); } // QUDA inverter parameters inv_param.dslash_type = QUDA_WILSON_DSLASH; // inv_param.inv_type = QUDA_BICGSTAB_INVERTER; inv_param.inv_type = QUDA_CG_INVERTER; inv_param.kappa = g_kappa; inv_param.tol = solver_precision; inv_param.maxiter = niter_max; inv_param.reliable_delta = reliable_delta; inv_param.solution_type = QUDA_MAT_SOLUTION; // inv_param.solve_type = QUDA_DIRECT_PC_SOLVE; inv_param.solve_type = QUDA_NORMEQ_PC_SOLVE; inv_param.matpc_type = QUDA_MATPC_EVEN_EVEN; // QUDA_MATPC_EVEN_EVEN; inv_param.dagger = QUDA_DAG_NO; inv_param.mass_normalization = QUDA_KAPPA_NORMALIZATION; //;QUDA_MASS_NORMALIZATION; inv_param.cpu_prec = cpu_prec; inv_param.cuda_prec = cuda_prec; inv_param.cuda_prec_sloppy = cuda_prec_sloppy; inv_param.preserve_source = QUDA_PRESERVE_SOURCE_NO; inv_param.dirac_order = QUDA_DIRAC_ORDER; inv_param.sp_pad = 0; inv_param.cl_pad = 0; inv_param.verbosity = QUDA_VERBOSE; // write initial rng state to file if(g_source_type==2 && g_coherent_source==2) { sprintf(rng_file_out, "%s.0", g_rng_filename); if( init_rng_stat_file (g_seed, rng_file_out) != 0 ) { fprintf(stderr, "[invert_quda] Error, could not write rng status\n"); exit(210); } } else if(g_source_type==3 || g_source_type==4) { if( init_rng_state(g_seed, &g_rng_state) != 0 ) { fprintf(stderr, "[invert_quda] Error, could initialize rng state\n"); exit(211); } } // check the source momenta if(g_source_momentum_set) { source_momentum = (int*)malloc(3*sizeof(int)); if(g_source_momentum[0]<0) g_source_momentum[0] += LX; if(g_source_momentum[1]<0) g_source_momentum[1] += LY; if(g_source_momentum[2]<0) g_source_momentum[2] += LZ; fprintf(stdout, "# [invert_quda] using final source momentum ( %d, %d, %d )\n", g_source_momentum[0], g_source_momentum[1], g_source_momentum[2]); if(full_orbit) { status = make_qcont_orbits_3d_parity_avg( &qlatt_id, &qlatt_count, &qlatt_list, &qlatt_nclass, &qlatt_rep, &qlatt_map); if(status != 0) { fprintf(stderr, "\n[invert_quda] Error while creating O_3-lists\n"); exit(4); } source_momentum_class = qlatt_id[g_ipt[0][g_source_momentum[0]][g_source_momentum[1]][g_source_momentum[2]]]; source_momentum_no = qlatt_count[source_momentum_class]; source_momentum_runs = source_momentum_class==0 ? 1 : source_momentum_no + 1; fprintf(stdout, "# [] source momentum belongs to class %d with %d members, which means %d runs\n", source_momentum_class, source_momentum_no, source_momentum_runs); } } /*********************************************** * loop on spin-color-index ***********************************************/ for(isc=g_source_index[0]; isc<=g_source_index[1]; isc++) { ispin = isc / n_c; icol = isc % n_c; for(imom=0; imom<source_momentum_runs; imom++) { /*********************************************** * set source momentum ***********************************************/ if(g_source_momentum_set) { if(imom == 0) { if(full_orbit) { source_momentum[0] = 0; source_momentum[1] = 0; source_momentum[2] = 0; } else { source_momentum[0] = g_source_momentum[0]; source_momentum[1] = g_source_momentum[1]; source_momentum[2] = g_source_momentum[2]; } } else { source_momentum[0] = qlatt_map[source_momentum_class][imom-1] / (LY*LZ); source_momentum[1] = ( qlatt_map[source_momentum_class][imom-1] % (LY*LZ) ) / LZ; source_momentum[2] = qlatt_map[source_momentum_class][imom-1] % LZ; } fprintf(stdout, "# [] run no. %d, source momentum (%d, %d, %d)\n", imom, source_momentum[0], source_momentum[1], source_momentum[2]); } /*********************************************** * prepare the souce ***********************************************/ if(g_read_source == 0) { // create source switch(g_source_type) { case 0: // point source fprintf(stdout, "# [invert_quda] Creating point source\n"); for(ix=0;ix<24*VOLUME;ix++) g_spinor_field[0][ix] = 0.; if(have_source_flag) { if(g_source_momentum_set) { phase = 2*M_PI*( source_momentum[0]*lsl1/(double)LX + source_momentum[1]*lsl2/(double)LY + source_momentum[2]*lsl3/(double)LZ ); g_spinor_field[0][_GSI(g_source_location) + 2*(n_c*ispin+icol) ] = cos(phase); g_spinor_field[0][_GSI(g_source_location) + 2*(n_c*ispin+icol)+1] = sin(phase); } else { g_spinor_field[0][_GSI(g_source_location) + 2*(n_c*ispin+icol) ] = 1.; } } if(g_source_momentum_set) { sprintf(source_filename, "%s.%.4d.t%.2dx%.2dy%.2dz%.2d.%.2d.qx%.2dqy%.2dqz%.2d", filename_prefix, Nconf, sl0, sl1, sl2, sl3, n_c*ispin+icol, source_momentum[0], source_momentum[1], source_momentum[2]); } else { sprintf(source_filename, "%s.%.4d.t%.2dx%.2dy%.2dz%.2d.%.2d", filename_prefix, Nconf, sl0, sl1, sl2, sl3, n_c*ispin+icol); } break; case 2: // timeslice source if(g_coherent_source==1) { fprintf(stdout, "# [invert_quda] Creating coherent timeslice source\n"); status = prepare_coherent_timeslice_source(g_spinor_field[0], gauge_field_smeared, g_coherent_source_base, g_coherent_source_delta, VOLUME, g_rng_filename, NULL); if(status != 0) { fprintf(stderr, "[invert_quda] Error from prepare source, status was %d\n", status); exit(123); } timeslice = g_coherent_source_base; } else { if(g_coherent_source==2) { strcpy(rng_file_in, rng_file_out); if(isc == g_source_index[1]) { strcpy(rng_file_out, g_rng_filename); } else { sprintf(rng_file_out, "%s.%d", g_rng_filename, isc+1); } timeslice = (g_coherent_source_base+isc*g_coherent_source_delta)%T_global; fprintf(stdout, "# [invert_quda] Creating timeslice source\n"); status = prepare_timeslice_source(g_spinor_field[0], gauge_field_smeared, timeslice, VOLUME, rng_file_in, rng_file_out); if(status != 0) { fprintf(stderr, "[invert_quda] Error from prepare source, status was %d\n", status); exit(123); } } else { fprintf(stdout, "# [invert_quda] Creating timeslice source\n"); status = prepare_timeslice_source(g_spinor_field[0], gauge_field_smeared, g_source_timeslice, VOLUME, g_rng_filename, g_rng_filename); if(status != 0) { fprintf(stderr, "[invert_quda] Error from prepare source, status was %d\n", status); exit(124); } timeslice = g_source_timeslice; } } if(g_source_momentum_set) { sprintf(source_filename, "%s.%.4d.%.2d.%.5d.qx%.2dqy%.2dqz%.2d", filename_prefix, Nconf, timeslice, isc, source_momentum[0], source_momentum[1], source_momentum[2]); } else { sprintf(source_filename, "%s.%.4d.%.2d.%.5d", filename_prefix, Nconf, timeslice, isc); } break; case 3: // timeslice sources for one-end trick (spin dilution) fprintf(stdout, "# [invert_quda] Creating timeslice source for one-end-trick\n"); status = prepare_timeslice_source_one_end(g_spinor_field[0], gauge_field_smeared, g_source_timeslice, source_momentum, isc%n_s, g_rng_state, \ ( isc%n_s==(n_s-1) && imom==source_momentum_runs-1 ) ); if(status != 0) { fprintf(stderr, "[invert_quda] Error from prepare source, status was %d\n", status); exit(125); } c = N_Jacobi > 0 ? isc%n_s + n_s : isc%n_s; if(g_source_momentum_set) { sprintf(source_filename, "%s.%.4d.%.2d.%.2d.qx%.2dqy%.2dqz%.2d", filename_prefix, Nconf, g_source_timeslice, c, source_momentum[0], source_momentum[1], source_momentum[2]); } else { sprintf(source_filename, "%s.%.4d.%.2d.%.2d", filename_prefix, Nconf, g_source_timeslice, c); } break; case 4: // timeslice sources for one-end trick (spin and color dilution ) fprintf(stdout, "# [invert_quda] Creating timeslice source for one-end-trick\n"); status = prepare_timeslice_source_one_end_color(g_spinor_field[0], gauge_field_smeared, g_source_timeslice, source_momentum,\ isc%(n_s*n_c), g_rng_state, ( isc%(n_s*n_c)==(n_s*n_c-1) && imom==source_momentum_runs-1 ) ); if(status != 0) { fprintf(stderr, "[invert_quda] Error from prepare source, status was %d\n", status); exit(126); } c = N_Jacobi > 0 ? isc%(n_s*n_c) + (n_s*n_c) : isc%(n_s*n_c); if(g_source_momentum_set) { sprintf(source_filename, "%s.%.4d.%.2d.%.2d.qx%.2dqy%.2dqz%.2d", filename_prefix, Nconf, g_source_timeslice, c, source_momentum[0], source_momentum[1], source_momentum[2]); } else { sprintf(source_filename, "%s.%.4d.%.2d.%.2d", filename_prefix, Nconf, g_source_timeslice, c); } break; default: fprintf(stderr, "\nError, unrecognized source type\n"); exit(32); break; } } else { // read source switch(g_source_type) { case 0: // point source if(g_source_momentum_set) { sprintf(source_filename, "%s.%.4d.t%.2dx%.2dy%.2dz%.2d.%.2d.qx%.2dqy%.2dqz%.2d", \ filename_prefix2, Nconf, sl0, sl1, sl2, sl3, isc, source_momentum[0], source_momentum[1], source_momentum[2]); } else { sprintf(source_filename, "%s.%.4d.t%.2dx%.2dy%.2dz%.2d.%.2d", filename_prefix2, Nconf, sl0, sl1, sl2, sl3, isc); } fprintf(stdout, "# [invert_quda] reading source from file %s\n", source_filename); status = read_lime_spinor(g_spinor_field[0], source_filename, 0); if(status != 0) { fprintf(stderr, "# [invert_quda] Errro, could not read source from file %s\n", source_filename); exit(115); } break; case 2: // timeslice source if(g_source_momentum_set) { sprintf(source_filename, "%s.%.4d.%.2d.%.5d.qx%.2dqy%.2dqz%.2d", filename_prefix2, Nconf, g_source_timeslice, isc, source_momentum[0], source_momentum[1], source_momentum[2]); } else { sprintf(source_filename, "%s.%.4d.%.2d.%.5d", filename_prefix2, Nconf, g_source_timeslice, isc); } fprintf(stdout, "# [invert_quda] reading source from file %s\n", source_filename); status = read_lime_spinor(g_spinor_field[0], source_filename, 0); if(status != 0) { fprintf(stderr, "# [invert_quda] Errro, could not read source from file %s\n", source_filename); exit(115); } break; default: fprintf(stderr, "[] Error, unrecognized source type for reading\n"); exit(104); break; } } // of if g_read_source //sprintf(filename, "%s.ascii", source_filename); //ofs = fopen(filename, "w"); //printf_spinor_field(g_spinor_field[0], ofs); //fclose(ofs); if(g_write_source) { status = write_propagator(g_spinor_field[0], source_filename, 0, g_propagator_precision); if(status != 0) { fprintf(stderr, "Error from write_propagator, status was %d\n", status); exit(27); } } // smearing if(N_Jacobi > 0) { #ifdef OPENMP Jacobi_Smearing_Step_one_threads(gauge_field_smeared, g_spinor_field[0], g_spinor_field[1], N_Jacobi, kappa_Jacobi); #else for(c=0; c<N_Jacobi; c++) { Jacobi_Smearing_Step_one(gauge_field_smeared, g_spinor_field[0], g_spinor_field[1], kappa_Jacobi); } #endif } // multiply with g2 for(ix=0;ix<VOLUME;ix++) { _fv_eq_gamma_ti_fv(g_spinor_field[1]+_GSI(ix), 2, g_spinor_field[0]+_GSI(ix)); } // transcribe the spinor field to even-odd ordering with coordinates (x,y,z,t) for(ix=0;ix<VOLUME;ix++) { iy = g_lexic2eot[ix]; _fv_eq_fv(g_spinor_field[2]+_GSI(iy), g_spinor_field[1]+_GSI(ix)); } /*********************************************** * perform the inversion ***********************************************/ fprintf(stdout, "# [invert_quda] starting inversion\n"); ratime = (double)clock() / CLOCKS_PER_SEC; for(ix=0;ix<VOLUME;ix++) { _fv_eq_zero(g_spinor_field[1]+_GSI(ix) ); } invertQuda(g_spinor_field[1], g_spinor_field[2], &inv_param); retime = (double)clock() / CLOCKS_PER_SEC; fprintf(stdout, "# [invert_quda] inversion done in %e seconds\n", retime-ratime); fprintf(stdout, "# [invert_quda] Device memory used:\n\tSpinor: %f GiB\n\tGauge: %f GiB\n", inv_param.spinorGiB, gauge_param.gaugeGiB); if(inv_param.mass_normalization == QUDA_KAPPA_NORMALIZATION) { _2_kappa = 2. * g_kappa; for(ix=0;ix<VOLUME;ix++) { _fv_ti_eq_re(g_spinor_field[1]+_GSI(ix), _2_kappa ); } } // transcribe the spinor field to lexicographical order with (t,x,y,z) for(ix=0;ix<VOLUME;ix++) { iy = g_lexic2eot[ix]; _fv_eq_fv(g_spinor_field[2]+_GSI(ix), g_spinor_field[1]+_GSI(iy)); } // multiply with g2 for(ix=0;ix<VOLUME;ix++) { _fv_eq_gamma_ti_fv(g_spinor_field[1]+_GSI(ix), 2, g_spinor_field[2]+_GSI(ix)); } /*********************************************** * check residuum ***********************************************/ if(check_residuum) { // apply the Wilson Dirac operator in the gamma-basis defined in cvc_linalg, // which uses the tmLQCD conventions (same as in contractions) // without explicit boundary conditions Q_Wilson_phi(g_spinor_field[2], g_spinor_field[1]); for(ix=0;ix<VOLUME;ix++) { _fv_mi_eq_fv(g_spinor_field[2]+_GSI(ix), g_spinor_field[0]+_GSI(ix)); } spinor_scalar_product_re(&norm, g_spinor_field[2], g_spinor_field[2], VOLUME); spinor_scalar_product_re(&norm2, g_spinor_field[0], g_spinor_field[0], VOLUME); fprintf(stdout, "\n# [invert_quda] absolut residuum squared: %e; relative residuum %e\n", norm, sqrt(norm/norm2) ); } /*********************************************** * write the solution ***********************************************/ sprintf(filename, "%s.inverted", source_filename); fprintf(stdout, "# [invert_quda] writing propagator to file %s\n", filename); status = write_propagator(g_spinor_field[1], filename, 0, g_propagator_precision); if(status != 0) { fprintf(stderr, "Error from write_propagator, status was %d\n", status); exit(22); } } // of loop on momenta } // of isc /*********************************************** * free the allocated memory, finalize ***********************************************/ // finalize the QUDA library fprintf(stdout, "# [invert_quda] finalizing quda\n"); endQuda(); free(g_gauge_field); free(gauge_field_smeared); for(i=0; i<no_fields; i++) free(g_spinor_field[i]); free(g_spinor_field); free_geometry(); if(g_source_momentum_set && full_orbit) { finalize_q_orbits(&qlatt_id, &qlatt_count, &qlatt_list, &qlatt_rep); if(qlatt_map != NULL) { free(qlatt_map[0]); free(qlatt_map); } } if(source_momentum != NULL) free(source_momentum); #ifdef MPI MPI_Finalize(); #endif if(g_cart_id==0) { g_the_time = time(NULL); fprintf(stdout, "\n# [invert_quda] %s# [invert_quda] end of run\n", ctime(&g_the_time)); fprintf(stderr, "\n# [invert_quda] %s# [invert_quda] end of run\n", ctime(&g_the_time)); } return(0); }
int main(int argc, char *argv[]) { FILE *parameterfile = NULL; int j, i, ix = 0, isample = 0, op_id = 0; char datafilename[206]; char parameterfilename[206]; char conf_filename[50]; char * input_filename = NULL; char * filename = NULL; double plaquette_energy; struct stout_parameters params_smear; #ifdef _KOJAK_INST #pragma pomp inst init #pragma pomp inst begin(main) #endif #if (defined SSE || defined SSE2 || SSE3) signal(SIGILL, &catch_ill_inst); #endif DUM_DERI = 8; DUM_MATRIX = DUM_DERI + 5; NO_OF_SPINORFIELDS = DUM_MATRIX + 4; //4 extra fields (corresponding to DUM_MATRIX+0..5) for deg. and ND matrix mult. NO_OF_SPINORFIELDS_32 = 6; verbose = 0; g_use_clover_flag = 0; process_args(argc,argv,&input_filename,&filename); set_default_filenames(&input_filename, &filename); init_parallel_and_read_input(argc, argv, input_filename); /* this DBW2 stuff is not needed for the inversion ! */ if (g_dflgcr_flag == 1) { even_odd_flag = 0; } g_rgi_C1 = 0; if (Nsave == 0) { Nsave = 1; } if (g_running_phmc) { NO_OF_SPINORFIELDS = DUM_MATRIX + 8; } tmlqcd_mpi_init(argc, argv); g_dbw2rand = 0; /* starts the single and double precision random number */ /* generator */ start_ranlux(rlxd_level, random_seed^nstore); /* we need to make sure that we don't have even_odd_flag = 1 */ /* if any of the operators doesn't use it */ /* in this way even/odd can still be used by other operators */ for(j = 0; j < no_operators; j++) if(!operator_list[j].even_odd_flag) even_odd_flag = 0; #ifndef TM_USE_MPI g_dbw2rand = 0; #endif #ifdef _GAUGE_COPY j = init_gauge_field(VOLUMEPLUSRAND, 1); j += init_gauge_field_32(VOLUMEPLUSRAND, 1); #else j = init_gauge_field(VOLUMEPLUSRAND, 0); j += init_gauge_field_32(VOLUMEPLUSRAND, 0); #endif if (j != 0) { fprintf(stderr, "Not enough memory for gauge_fields! Aborting...\n"); exit(-1); } j = init_geometry_indices(VOLUMEPLUSRAND); if (j != 0) { fprintf(stderr, "Not enough memory for geometry indices! Aborting...\n"); exit(-1); } if (no_monomials > 0) { if (even_odd_flag) { j = init_monomials(VOLUMEPLUSRAND / 2, even_odd_flag); } else { j = init_monomials(VOLUMEPLUSRAND, even_odd_flag); } if (j != 0) { fprintf(stderr, "Not enough memory for monomial pseudo fermion fields! Aborting...\n"); exit(-1); } } if (even_odd_flag) { j = init_spinor_field(VOLUMEPLUSRAND / 2, NO_OF_SPINORFIELDS); j += init_spinor_field_32(VOLUMEPLUSRAND / 2, NO_OF_SPINORFIELDS_32); } else { j = init_spinor_field(VOLUMEPLUSRAND, NO_OF_SPINORFIELDS); j += init_spinor_field_32(VOLUMEPLUSRAND, NO_OF_SPINORFIELDS_32); } if (j != 0) { fprintf(stderr, "Not enough memory for spinor fields! Aborting...\n"); exit(-1); } if (g_running_phmc) { j = init_chi_spinor_field(VOLUMEPLUSRAND / 2, 20); if (j != 0) { fprintf(stderr, "Not enough memory for PHMC Chi fields! Aborting...\n"); exit(-1); } } g_mu = g_mu1; if (g_cart_id == 0) { /*construct the filenames for the observables and the parameters*/ strncpy(datafilename, filename, 200); strcat(datafilename, ".data"); strncpy(parameterfilename, filename, 200); strcat(parameterfilename, ".para"); parameterfile = fopen(parameterfilename, "w"); write_first_messages(parameterfile, "invert", git_hash); fclose(parameterfile); } /* define the geometry */ geometry(); /* define the boundary conditions for the fermion fields */ boundary(g_kappa); phmc_invmaxev = 1.; init_operators(); /* list and initialize measurements*/ if(g_proc_id == 0) { printf("\n"); for(int j = 0; j < no_measurements; j++) { printf("# measurement id %d, type = %d\n", j, measurement_list[j].type); } } init_measurements(); /* this could be maybe moved to init_operators */ #ifdef _USE_HALFSPINOR j = init_dirac_halfspinor(); if (j != 0) { fprintf(stderr, "Not enough memory for halffield! Aborting...\n"); exit(-1); } /* for mixed precision solvers, the 32 bit halfspinor field must always be there */ j = init_dirac_halfspinor32(); if (j != 0) { fprintf(stderr, "Not enough memory for 32-bit halffield! Aborting...\n"); exit(-1); } # if (defined _PERSISTENT) if (even_odd_flag) init_xchange_halffield(); # endif #endif for (j = 0; j < Nmeas; j++) { sprintf(conf_filename, "%s.%.4d", gauge_input_filename, nstore); if (g_cart_id == 0) { printf("#\n# Trying to read gauge field from file %s in %s precision.\n", conf_filename, (gauge_precision_read_flag == 32 ? "single" : "double")); fflush(stdout); } if( (i = read_gauge_field(conf_filename,g_gauge_field)) !=0) { fprintf(stderr, "Error %d while reading gauge field from %s\n Aborting...\n", i, conf_filename); exit(-2); } if (g_cart_id == 0) { printf("# Finished reading gauge field.\n"); fflush(stdout); } #ifdef TM_USE_MPI xchange_gauge(g_gauge_field); #endif /*Convert to a 32 bit gauge field, after xchange*/ convert_32_gauge_field(g_gauge_field_32, g_gauge_field, VOLUMEPLUSRAND); /*compute the energy of the gauge field*/ plaquette_energy = measure_plaquette( (const su3**) g_gauge_field); if (g_cart_id == 0) { printf("# The computed plaquette value is %e.\n", plaquette_energy / (6.*VOLUME*g_nproc)); fflush(stdout); } if (use_stout_flag == 1){ params_smear.rho = stout_rho; params_smear.iterations = stout_no_iter; /* if (stout_smear((su3_tuple*)(g_gauge_field[0]), ¶ms_smear, (su3_tuple*)(g_gauge_field[0])) != 0) */ /* exit(1) ; */ g_update_gauge_copy = 1; plaquette_energy = measure_plaquette( (const su3**) g_gauge_field); if (g_cart_id == 0) { printf("# The plaquette value after stouting is %e\n", plaquette_energy / (6.*VOLUME*g_nproc)); fflush(stdout); } } /* if any measurements are defined in the input file, do them here */ measurement * meas; for(int imeas = 0; imeas < no_measurements; imeas++){ meas = &measurement_list[imeas]; if (g_proc_id == 0) { fprintf(stdout, "#\n# Beginning online measurement.\n"); } meas->measurefunc(nstore, imeas, even_odd_flag); } if (reweighting_flag == 1) { reweighting_factor(reweighting_samples, nstore); } /* Compute minimal eigenvalues, if wanted */ if (compute_evs != 0) { eigenvalues(&no_eigenvalues, 5000, eigenvalue_precision, 0, compute_evs, nstore, even_odd_flag); } if (phmc_compute_evs != 0) { #ifdef TM_USE_MPI MPI_Finalize(); #endif return(0); } /* Compute the mode number or topological susceptibility using spectral projectors, if wanted*/ if(compute_modenumber != 0 || compute_topsus !=0){ invert_compute_modenumber(); } // set up blocks if Deflation is used if (g_dflgcr_flag) init_blocks(nblocks_t, nblocks_x, nblocks_y, nblocks_z); if(SourceInfo.type == SRC_TYPE_VOL || SourceInfo.type == SRC_TYPE_PION_TS || SourceInfo.type == SRC_TYPE_GEN_PION_TS) { index_start = 0; index_end = 1; } g_precWS=NULL; if(use_preconditioning == 1){ /* todo load fftw wisdom */ #if (defined HAVE_FFTW ) && !( defined TM_USE_MPI) loadFFTWWisdom(g_spinor_field[0],g_spinor_field[1],T,LX); #else use_preconditioning=0; #endif } if (g_cart_id == 0) { fprintf(stdout, "#\n"); /*Indicate starting of the operator part*/ } for(op_id = 0; op_id < no_operators; op_id++) { boundary(operator_list[op_id].kappa); g_kappa = operator_list[op_id].kappa; g_mu = operator_list[op_id].mu; g_c_sw = operator_list[op_id].c_sw; // DFLGCR and DFLFGMRES if(operator_list[op_id].solver == DFLGCR || operator_list[op_id].solver == DFLFGMRES) { generate_dfl_subspace(g_N_s, VOLUME, reproduce_randomnumber_flag); } if(use_preconditioning==1 && PRECWSOPERATORSELECT[operator_list[op_id].solver]!=PRECWS_NO ){ printf("# Using preconditioning with treelevel preconditioning operator: %s \n", precWSOpToString(PRECWSOPERATORSELECT[operator_list[op_id].solver])); /* initial preconditioning workspace */ operator_list[op_id].precWS=(spinorPrecWS*)malloc(sizeof(spinorPrecWS)); spinorPrecWS_Init(operator_list[op_id].precWS, operator_list[op_id].kappa, operator_list[op_id].mu/2./operator_list[op_id].kappa, -(0.5/operator_list[op_id].kappa-4.), PRECWSOPERATORSELECT[operator_list[op_id].solver]); g_precWS = operator_list[op_id].precWS; if(PRECWSOPERATORSELECT[operator_list[op_id].solver] == PRECWS_D_DAGGER_D) { fitPrecParams(op_id); } } for(isample = 0; isample < no_samples; isample++) { for (ix = index_start; ix < index_end; ix++) { if (g_cart_id == 0) { fprintf(stdout, "#\n"); /*Indicate starting of new index*/ } /* we use g_spinor_field[0-7] for sources and props for the moment */ /* 0-3 in case of 1 flavour */ /* 0-7 in case of 2 flavours */ prepare_source(nstore, isample, ix, op_id, read_source_flag, source_location, random_seed); //randmize initial guess for eigcg if needed-----experimental if( (operator_list[op_id].solver == INCREIGCG) && (operator_list[op_id].solver_params.eigcg_rand_guess_opt) ){ //randomize the initial guess gaussian_volume_source( operator_list[op_id].prop0, operator_list[op_id].prop1,isample,ix,0); //need to check this } operator_list[op_id].inverter(op_id, index_start, 1); } } if(use_preconditioning==1 && operator_list[op_id].precWS!=NULL ){ /* free preconditioning workspace */ spinorPrecWS_Free(operator_list[op_id].precWS); free(operator_list[op_id].precWS); } if(operator_list[op_id].type == OVERLAP){ free_Dov_WS(); } } nstore += Nsave; } #ifdef TM_USE_OMP free_omp_accumulators(); #endif free_blocks(); free_dfl_subspace(); free_gauge_field(); free_gauge_field_32(); free_geometry_indices(); free_spinor_field(); free_spinor_field_32(); free_moment_field(); free_chi_spinor_field(); free(filename); free(input_filename); free(SourceInfo.basename); free(PropInfo.basename); #ifdef TM_USE_QUDA _endQuda(); #endif #ifdef TM_USE_MPI MPI_Barrier(MPI_COMM_WORLD); MPI_Finalize(); #endif return(0); #ifdef _KOJAK_INST #pragma pomp inst end(main) #endif }
int invert_doublet_eo(spinor * const Even_new_s, spinor * const Odd_new_s, spinor * const Even_new_c, spinor * const Odd_new_c, spinor * const Even_s, spinor * const Odd_s, spinor * const Even_c, spinor * const Odd_c, const double precision, const int max_iter, const int solver_flag, const int rel_prec) { int iter = 0; #ifdef HAVE_GPU # ifdef TEMPORALGAUGE /* initialize temporal gauge here */ int retval; double dret1, dret2; double plaquette1 = 0.0; double plaquette2 = 0.0; if (usegpu_flag) { /* need VOLUME here (not N=VOLUME/2)*/ if ((retval = init_temporalgauge_trafo(VOLUME, g_gauge_field)) != 0 ) { // initializes the transformation matrices if (g_proc_id == 0) printf("Error while gauge fixing to temporal gauge. Aborting...\n"); // g_tempgauge_field as a copy of g_gauge_field exit(200); } /* do trafo */ plaquette1 = measure_plaquette(g_gauge_field); apply_gtrafo(g_gauge_field, g_trafo); // transformation of the gauge field plaquette2 = measure_plaquette(g_gauge_field); if (g_proc_id == 0) printf("\tPlaquette before gauge fixing: %.16e\n", plaquette1/6./VOLUME); if (g_proc_id == 0) printf("\tPlaquette after gauge fixing: %.16e\n", plaquette2/6./VOLUME); /* do trafo to odd_s part of source */ dret1 = square_norm(Odd_s, VOLUME/2 , 1); apply_gtrafo_spinor_odd(Odd_s, g_trafo); // odd spinor transformation, strange dret2 = square_norm(Odd_s, VOLUME/2, 1); if (g_proc_id == 0) printf("\tsquare norm before gauge fixing: %.16e\n", dret1); if (g_proc_id == 0) printf("\tsquare norm after gauge fixing: %.16e\n", dret2); /* do trafo to odd_c part of source */ dret1 = square_norm(Odd_c, VOLUME/2 , 1); apply_gtrafo_spinor_odd(Odd_c, g_trafo); // odd spinor transformation, charm dret2 = square_norm(Odd_c, VOLUME/2, 1); if (g_proc_id == 0) printf("\tsquare norm before gauge fixing: %.16e\n", dret1); if (g_proc_id == 0) printf("\tsquare norm after gauge fixing: %.16e\n", dret2); /* do trafo to even_s part of source */ dret1 = square_norm(Even_s, VOLUME/2 , 1); apply_gtrafo_spinor_even(Even_s, g_trafo); // even spinor transformation, strange dret2 = square_norm(Even_s, VOLUME/2, 1); if (g_proc_id == 0) printf("\tsquare norm before gauge fixing: %.16e\n", dret1); if (g_proc_id == 0) printf("\tsquare norm after gauge fixing: %.16e\n", dret2); /* do trafo to even_c part of source */ dret1 = square_norm(Even_c, VOLUME/2 , 1); apply_gtrafo_spinor_even(Even_c, g_trafo); // even spinor transformation, charm dret2 = square_norm(Even_c, VOLUME/2, 1); if (g_proc_id == 0) printf("\tsquare norm before gauge fixing: %.16e\n", dret1); if (g_proc_id == 0) printf("\tsquare norm after gauge fixing: %.16e\n", dret2); # ifdef MPI xchange_gauge(g_gauge_field); # endif } # endif #endif /* HAVE_GPU*/ /* here comes the inversion using even/odd preconditioning */ if(g_proc_id == 0) {printf("# Using even/odd preconditioning!\n"); fflush(stdout);} M_ee_inv_ndpsi(Even_new_s, Even_new_c, Even_s, Even_c, g_mubar, g_epsbar); Hopping_Matrix(OE, g_spinor_field[DUM_DERI], Even_new_s); Hopping_Matrix(OE, g_spinor_field[DUM_DERI+1], Even_new_c); /* The sign is plus, since in Hopping_Matrix */ /* the minus is missing */ assign_mul_add_r(g_spinor_field[DUM_DERI], +1., Odd_s, VOLUME/2); assign_mul_add_r(g_spinor_field[DUM_DERI+1], +1., Odd_c, VOLUME/2); /* Do the inversion with the preconditioned */ /* matrix to get the odd sites */ /* Here we invert the hermitean operator squared */ if(g_proc_id == 0) { printf("# Using CG for TMWILSON flavour doublet!\n"); fflush(stdout); } gamma5(g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI], VOLUME/2); gamma5(g_spinor_field[DUM_DERI+1], g_spinor_field[DUM_DERI+1], VOLUME/2); #ifdef HAVE_GPU if (usegpu_flag) { // GPU, mixed precision solver # if defined(MPI) && defined(PARALLELT) iter = mixedsolve_eo_nd(Odd_new_s, Odd_new_c, g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1], max_iter, precision, rel_prec); # elif !defined(MPI) && !defined(PARALLELT) iter = mixedsolve_eo_nd(Odd_new_s, Odd_new_c, g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1], max_iter, precision, rel_prec); # else printf("MPI and/or PARALLELT are not appropriately set for the GPU implementation. Aborting...\n"); exit(-1); # endif } else { // CPU, conjugate gradient iter = cg_her_nd(Odd_new_s, Odd_new_c, g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1], max_iter, precision, rel_prec, VOLUME/2, &Qtm_pm_ndpsi); } #else // CPU, conjugate gradient iter = cg_her_nd(Odd_new_s, Odd_new_c, g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1], max_iter, precision, rel_prec, VOLUME/2, &Qtm_pm_ndpsi); #endif Qtm_dagger_ndpsi(Odd_new_s, Odd_new_c, Odd_new_s, Odd_new_c); /* Reconstruct the even sites */ Hopping_Matrix(EO, g_spinor_field[DUM_DERI], Odd_new_s); Hopping_Matrix(EO, g_spinor_field[DUM_DERI+1], Odd_new_c); M_ee_inv_ndpsi(g_spinor_field[DUM_DERI+2], g_spinor_field[DUM_DERI+3], g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1], g_mubar, g_epsbar); /* The sign is plus, since in Hopping_Matrix */ /* the minus is missing */ assign_add_mul_r(Even_new_s, g_spinor_field[DUM_DERI+2], +1., VOLUME/2); assign_add_mul_r(Even_new_c, g_spinor_field[DUM_DERI+3], +1., VOLUME/2); #ifdef HAVE_GPU /* return from temporal gauge again */ # ifdef TEMPORALGAUGE if (usegpu_flag) { /* undo trafo */ /* apply_inv_gtrafo(g_gauge_field, g_trafo);*/ /* copy back the saved original field located in g_tempgauge_field -> update necessary*/ plaquette1 = measure_plaquette(g_gauge_field); copy_gauge_field(g_gauge_field, g_tempgauge_field); g_update_gauge_copy = 1; plaquette2 = measure_plaquette(g_gauge_field); if (g_proc_id == 0) printf("\tPlaquette before inverse gauge fixing: %.16e\n", plaquette1/6./VOLUME); if (g_proc_id == 0) printf("\tPlaquette after inverse gauge fixing: %.16e\n", plaquette2/6./VOLUME); /* undo trafo to source Even_s */ dret1 = square_norm(Even_s, VOLUME/2 , 1); apply_inv_gtrafo_spinor_even(Even_s, g_trafo); dret2 = square_norm(Even_s, VOLUME/2, 1); if (g_proc_id == 0) printf("\tsquare norm before gauge fixing: %.16e\n", dret1); if (g_proc_id == 0) printf("\tsquare norm after gauge fixing: %.16e\n", dret2); /* undo trafo to source Even_c */ dret1 = square_norm(Even_c, VOLUME/2 , 1); apply_inv_gtrafo_spinor_even(Even_c, g_trafo); dret2 = square_norm(Even_c, VOLUME/2, 1); if (g_proc_id == 0) printf("\tsquare norm before gauge fixing: %.16e\n", dret1); if (g_proc_id == 0) printf("\tsquare norm after gauge fixing: %.16e\n", dret2); /* undo trafo to source Odd_s */ dret1 = square_norm(Odd_s, VOLUME/2 , 1); apply_inv_gtrafo_spinor_odd(Odd_s, g_trafo); dret2 = square_norm(Odd_s, VOLUME/2, 1); if (g_proc_id == 0) printf("\tsquare norm before gauge fixing: %.16e\n", dret1); if (g_proc_id == 0) printf("\tsquare norm after gauge fixing: %.16e\n", dret2); /* undo trafo to source Odd_c */ dret1 = square_norm(Odd_c, VOLUME/2 , 1); apply_inv_gtrafo_spinor_odd(Odd_c, g_trafo); dret2 = square_norm(Odd_c, VOLUME/2, 1); if (g_proc_id == 0) printf("\tsquare norm before gauge fixing: %.16e\n", dret1); if (g_proc_id == 0) printf("\tsquare norm after gauge fixing: %.16e\n", dret2); // Even_new_s dret1 = square_norm(Even_new_s, VOLUME/2 , 1); apply_inv_gtrafo_spinor_even(Even_new_s, g_trafo); dret2 = square_norm(Even_new_s, VOLUME/2, 1); if (g_proc_id == 0) printf("\tsquare norm before gauge fixing: %.16e\n", dret1); if (g_proc_id == 0) printf("\tsquare norm after gauge fixing: %.16e\n", dret2); // Even_new_c dret1 = square_norm(Even_new_c, VOLUME/2 , 1); apply_inv_gtrafo_spinor_even(Even_new_c, g_trafo); dret2 = square_norm(Even_new_c, VOLUME/2, 1); if (g_proc_id == 0) printf("\tsquare norm before gauge fixing: %.16e\n", dret1); if (g_proc_id == 0) printf("\tsquare norm after gauge fixing: %.16e\n", dret2); // Odd_new_s dret1 = square_norm(Odd_new_s, VOLUME/2 , 1); apply_inv_gtrafo_spinor_odd(Odd_new_s, g_trafo); dret2 = square_norm(Odd_new_s, VOLUME/2, 1); if (g_proc_id == 0) printf("\tsquare norm before gauge fixing: %.16e\n", dret1); if (g_proc_id == 0) printf("\tsquare norm after gauge fixing: %.16e\n", dret2); // Odd_new_c dret1 = square_norm(Odd_new_c, VOLUME/2 , 1); apply_inv_gtrafo_spinor_odd(Odd_new_c, g_trafo); dret2 = square_norm(Odd_new_c, VOLUME/2, 1); if (g_proc_id == 0) printf("\tsquare norm before gauge fixing: %.16e\n", dret1); if (g_proc_id == 0) printf("\tsquare norm after gauge fixing: %.16e\n", dret2); finalize_temporalgauge(); # ifdef MPI xchange_gauge(g_gauge_field); # endif } # endif #endif return(iter); }
int main(int argc,char *argv[]) { int j,j_max,k,k_max = 2; paramsXlfInfo *xlfInfo; int ix, n, *nn,*mm,i; double delta, deltamax; spinor rsp; int status = 0; #ifdef MPI DUM_DERI = 6; DUM_SOLVER = DUM_DERI+2; DUM_MATRIX = DUM_SOLVER+6; NO_OF_SPINORFIELDS = DUM_MATRIX+2; MPI_Init(&argc, &argv); #endif g_rgi_C1 = 1.; /* Read the input file */ read_input("hopping_test.input"); tmlqcd_mpi_init(argc, argv); if(g_proc_id==0) { #ifdef SSE printf("# The code was compiled with SSE instructions\n"); #endif #ifdef SSE2 printf("# The code was compiled with SSE2 instructions\n"); #endif #ifdef SSE3 printf("# The code was compiled with SSE3 instructions\n"); #endif #ifdef P4 printf("# The code was compiled for Pentium4\n"); #endif #ifdef OPTERON printf("# The code was compiled for AMD Opteron\n"); #endif #ifdef _GAUGE_COPY printf("# The code was compiled with -D_GAUGE_COPY\n"); #endif #ifdef BGL printf("# The code was compiled for Blue Gene/L\n"); #endif #ifdef BGP printf("# The code was compiled for Blue Gene/P\n"); #endif #ifdef _USE_HALFSPINOR printf("# The code was compiled with -D_USE_HALFSPINOR\n"); #endif #ifdef _USE_SHMEM printf("# the code was compiled with -D_USE_SHMEM\n"); # ifdef _PERSISTENT printf("# the code was compiled for persistent MPI calls (halfspinor only)\n"); # endif #endif #ifdef _INDEX_INDEP_GEOM printf("# the code was compiled with index independent geometry\n"); #endif #ifdef MPI # ifdef _NON_BLOCKING printf("# the code was compiled for non-blocking MPI calls (spinor and gauge)\n"); # endif # ifdef _USE_TSPLITPAR printf("# the code was compiled with tsplit parallelization\n"); # endif #endif printf("\n"); fflush(stdout); } #ifdef _GAUGE_COPY init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 1); #else init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 0); #endif init_geometry_indices(VOLUMEPLUSRAND + g_dbw2rand); if(even_odd_flag) { j = init_spinor_field(VOLUMEPLUSRAND/2, 2*k_max+1); } else { j = init_spinor_field(VOLUMEPLUSRAND, 2*k_max); } if ( j!= 0) { fprintf(stderr, "Not enough memory for spinor fields! Aborting...\n"); exit(0); } j = init_moment_field(VOLUME, VOLUMEPLUSRAND); if ( j!= 0) { fprintf(stderr, "Not enough memory for moment fields! Aborting...\n"); exit(0); } if(g_proc_id == 0) { fprintf(stdout,"The number of processes is %d \n",g_nproc); printf("# The lattice size is %d x %d x %d x %d\n", (int)(T*g_nproc_t), (int)(LX*g_nproc_x), (int)(LY*g_nproc_y), (int)(g_nproc_z*LZ)); printf("# The local lattice size is %d x %d x %d x %d\n", (int)(T), (int)(LX), (int)(LY),(int) LZ); if(even_odd_flag) { printf("# testinging the even/odd preconditioned Dirac operator\n"); } else { printf("# testinging the standard Dirac operator\n"); } fflush(stdout); } /* define the geometry */ geometry(); /* define the boundary conditions for the fermion fields */ boundary(g_kappa); #ifdef _USE_HALFSPINOR j = init_dirac_halfspinor(); if ( j!= 0) { fprintf(stderr, "Not enough memory for halfspinor fields! Aborting...\n"); exit(0); } if(g_sloppy_precision_flag == 1) { g_sloppy_precision = 1; j = init_dirac_halfspinor32(); if ( j!= 0) { fprintf(stderr, "Not enough memory for 32-Bit halfspinor fields! Aborting...\n"); exit(0); } } # if (defined _PERSISTENT) init_xchange_halffield(); # endif #endif status = check_geometry(); if (status != 0) { fprintf(stderr, "Checking of geometry failed. Unable to proceed.\nAborting....\n"); exit(1); } #if (defined MPI && !(defined _USE_SHMEM)) check_xchange(); #endif start_ranlux(1, 123456); xlfInfo = construct_paramsXlfInfo(0.5, 0); random_gauge_field(reproduce_randomnumber_flag); if ( startoption == 2 ) { /* restart */ write_gauge_field(gauge_input_filename,gauge_precision_write_flag,xlfInfo); } else if ( startoption == 0 ) { /* cold */ unit_g_gauge_field(); } else if (startoption == 3 ) { /* continue */ read_gauge_field(gauge_input_filename); } else if ( startoption == 1 ) { /* hot */ } #ifdef MPI /*For parallelization: exchange the gaugefield */ xchange_gauge(g_gauge_field); #endif if(even_odd_flag) { /*initialize the pseudo-fermion fields*/ j_max=1; for (k = 0; k < k_max; k++) { random_spinor_field(g_spinor_field[k], VOLUME/2, 0); } if (read_source_flag == 2) { /* save */ /* even first, odd second */ write_spinorfield_cm_single(g_spinor_field[0],g_spinor_field[1],SourceInfo.basename); } else if (read_source_flag == 1) { /* yes */ /* even first, odd second */ read_spinorfield_cm_single(g_spinor_field[0],g_spinor_field[1],SourceInfo.basename,-1,0); # if (!defined MPI) if (write_cp_flag == 1) { strcat(SourceInfo.basename,".2"); read_spinorfield_cm_single(g_spinor_field[2],g_spinor_field[3],SourceInfo.basename,-1,0); nn=(int*)calloc(VOLUME,sizeof(int)); if((void*)nn == NULL) return(100); mm=(int*)calloc(VOLUME,sizeof(int)); if((void*)mm == NULL) return(100); n=0; deltamax=0.0; for(ix=0;ix<VOLUME/2;ix++){ (rsp.s0).c0 = (g_spinor_field[2][ix].s0).c0 - (g_spinor_field[0][ix].s0).c0; (rsp.s0).c1 = (g_spinor_field[2][ix].s0).c1 - (g_spinor_field[0][ix].s0).c1; (rsp.s0).c2 = (g_spinor_field[2][ix].s0).c2 - (g_spinor_field[0][ix].s0).c2; (rsp.s1).c0 = (g_spinor_field[2][ix].s1).c0 - (g_spinor_field[0][ix].s1).c0; (rsp.s1).c1 = (g_spinor_field[2][ix].s1).c1 - (g_spinor_field[0][ix].s1).c1; (rsp.s1).c2 = (g_spinor_field[2][ix].s1).c2 - (g_spinor_field[0][ix].s1).c2; (rsp.s2).c0 = (g_spinor_field[2][ix].s2).c0 - (g_spinor_field[0][ix].s2).c0; (rsp.s2).c1 = (g_spinor_field[2][ix].s2).c1 - (g_spinor_field[0][ix].s2).c1; (rsp.s2).c2 = (g_spinor_field[2][ix].s2).c2 - (g_spinor_field[0][ix].s2).c2; (rsp.s3).c0 = (g_spinor_field[2][ix].s3).c0 - (g_spinor_field[0][ix].s3).c0; (rsp.s3).c1 = (g_spinor_field[2][ix].s3).c1 - (g_spinor_field[0][ix].s3).c1; (rsp.s3).c2 = (g_spinor_field[2][ix].s3).c2 - (g_spinor_field[0][ix].s3).c2; _spinor_norm_sq(delta,rsp); if (delta > 1.0e-12) { nn[n] = g_eo2lexic[ix]; mm[n]=ix; n++; } if(delta>deltamax) deltamax=delta; } if (n>0){ printf("mismatch in even spincolorfield in %d points:\n",n); for(i=0; i< MIN(n,1000); i++){ printf("%d,(%d,%d,%d,%d):%f vs. %f\n",nn[i],g_coord[nn[i]][0],g_coord[nn[i]][1],g_coord[nn[i]][2],g_coord[nn[i]][3],creal((g_spinor_field[2][mm[i]].s0).c0), creal((g_spinor_field[0][mm[i]].s0).c0));fflush(stdout); } } n = 0; for(ix=0;ix<VOLUME/2;ix++){ (rsp.s0).c0 = (g_spinor_field[3][ix].s0).c0 - (g_spinor_field[1][ix].s0).c0; (rsp.s0).c1 = (g_spinor_field[3][ix].s0).c1 - (g_spinor_field[1][ix].s0).c1; (rsp.s0).c2 = (g_spinor_field[3][ix].s0).c2 - (g_spinor_field[1][ix].s0).c2; (rsp.s1).c0 = (g_spinor_field[3][ix].s1).c0 - (g_spinor_field[1][ix].s1).c0; (rsp.s1).c1 = (g_spinor_field[3][ix].s1).c1 - (g_spinor_field[1][ix].s1).c1; (rsp.s1).c2 = (g_spinor_field[3][ix].s1).c2 - (g_spinor_field[1][ix].s1).c2; (rsp.s2).c0 = (g_spinor_field[3][ix].s2).c0 - (g_spinor_field[1][ix].s2).c0; (rsp.s2).c1 = (g_spinor_field[3][ix].s2).c1 - (g_spinor_field[1][ix].s2).c1; (rsp.s2).c2 = (g_spinor_field[3][ix].s2).c2 - (g_spinor_field[1][ix].s2).c2; (rsp.s3).c0 = (g_spinor_field[3][ix].s3).c0 - (g_spinor_field[1][ix].s3).c0; (rsp.s3).c1 = (g_spinor_field[3][ix].s3).c1 - (g_spinor_field[1][ix].s3).c1; (rsp.s3).c2 = (g_spinor_field[3][ix].s3).c2 - (g_spinor_field[1][ix].s3).c2; _spinor_norm_sq(delta,rsp); if (delta > 1.0e-12) { nn[n]=g_eo2lexic[ix+(VOLUME+RAND)/2]; mm[n]=ix; n++; } if(delta>deltamax) deltamax=delta; } if (n>0){ printf("mismatch in odd spincolorfield in %d points:\n",n); for(i=0; i< MIN(n,1000); i++){ printf("%d,(%d,%d,%d,%d):%f vs. %f\n",nn[i],g_coord[nn[i]][0],g_coord[nn[i]][1],g_coord[nn[i]][2],g_coord[nn[i]][3],creal(g_spinor_field[3][mm[i]].s0.c0), creal(g_spinor_field[1][mm[i]].s0.c0));fflush(stdout); } } printf("max delta=%e",deltamax);fflush(stdout); } # endif } if (read_source_flag > 0 && write_cp_flag == 0) { /* read-source yes or nobutsave; checkpoint no */ /* first spinorial arg is output, the second is input */ Hopping_Matrix(1, g_spinor_field[1], g_spinor_field[0]); /*ieo=1 M_{eo}*/ Hopping_Matrix(0, g_spinor_field[0], g_spinor_field[1]); /*ieo=0 M_{oe}*/ strcat(SourceInfo.basename,".out"); write_spinorfield_cm_single(g_spinor_field[0],g_spinor_field[1],SourceInfo.basename); printf("Check-field printed. Exiting...\n"); fflush(stdout); } #ifdef MPI MPI_Barrier(MPI_COMM_WORLD); MPI_Finalize(); #endif } free_gauge_field(); free_geometry_indices(); free_spinor_field(); free_moment_field(); return(0); }
int main(int argc, char **argv) { int c, i, mu, nu; int count = 0; int filename_set = 0; int dims[4] = {0,0,0,0}; int l_LX_at, l_LXstart_at; int x0, x1, x2, x3, ix, iix; int sx0, sx1, sx2, sx3; int sid; double *disc = (double*)NULL; double *disc2 = (double*)NULL; double *work = (double*)NULL; double q[4], fnorm; double cvc_lnuy[8]; double *gauge_trafo=(double*)NULL; double unit_trace[2], D_trace[2]; int verbose = 0; int do_gt = 0; char filename[100]; double ratime, retime; double plaq; double spinor1[24], spinor2[24], U_[18]; complex w, w1, *cp1, *cp2, *cp3; FILE *ofs; fftw_complex *in=(fftw_complex*)NULL; #ifdef MPI fftwnd_mpi_plan plan_p; int *status; #else fftwnd_plan plan_p; #endif #ifdef MPI MPI_Init(&argc, &argv); #endif while ((c = getopt(argc, argv, "h?vgf:")) != -1) { switch (c) { case 'v': verbose = 1; break; case 'g': do_gt = 1; break; case 'f': strcpy(filename, optarg); filename_set=1; break; case 'h': case '?': default: usage(); break; } } /* set the default values */ set_default_input_values(); if(filename_set==0) strcpy(filename, "cvc.input"); /* read the input file */ read_input(filename); /* some checks on the input data */ if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) { if(g_proc_id==0) fprintf(stdout, "T and L's must be set\n"); usage(); } if(g_kappa == 0.) { if(g_proc_id==0) fprintf(stdout, "kappa should be > 0.n"); usage(); } /* initialize MPI parameters */ mpi_init(argc, argv); #ifdef MPI if((status = (int*)calloc(g_nproc, sizeof(int))) == (int*)NULL) { MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); exit(7); } #endif /* initialize fftw */ dims[0]=T_global; dims[1]=LX; dims[2]=LY; dims[3]=LZ; #ifdef MPI plan_p = fftwnd_mpi_create_plan(g_cart_grid, 4, dims, FFTW_BACKWARD, FFTW_MEASURE); fftwnd_mpi_local_sizes(plan_p, &T, &Tstart, &l_LX_at, &l_LXstart_at, &FFTW_LOC_VOLUME); #else plan_p = fftwnd_create_plan(4, dims, FFTW_BACKWARD, FFTW_MEASURE | FFTW_IN_PLACE); T = T_global; Tstart = 0; l_LX_at = LX; l_LXstart_at = 0; FFTW_LOC_VOLUME = T*LX*LY*LZ; #endif fprintf(stdout, "# [%2d] fftw parameters:\n"\ "# [%2d] T = %3d\n"\ "# [%2d] Tstart = %3d\n"\ "# [%2d] l_LX_at = %3d\n"\ "# [%2d] l_LXstart_at = %3d\n"\ "# [%2d] FFTW_LOC_VOLUME = %3d\n", g_cart_id, g_cart_id, T, g_cart_id, Tstart, g_cart_id, l_LX_at, g_cart_id, l_LXstart_at, g_cart_id, FFTW_LOC_VOLUME); #ifdef MPI if(T==0) { fprintf(stderr, "[%2d] local T is zero; exit\n", g_cart_id); MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); exit(2); } #endif if(init_geometry() != 0) { fprintf(stderr, "ERROR from init_geometry\n"); #ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); #endif exit(1); } geometry(); /* read the gauge field */ alloc_gauge_field(&g_gauge_field, VOLUMEPLUSRAND); sprintf(filename, "%s.%.4d", gaugefilename_prefix, Nconf); if(g_cart_id==0) fprintf(stdout, "reading gauge field from file %s\n", filename); read_lime_gauge_field_doubleprec(filename); xchange_gauge(); /* measure the plaquette */ plaquette(&plaq); if(g_cart_id==0) fprintf(stdout, "measured plaquette value: %25.16e\n", plaq); /* get the source location coordinates */ sx0 = g_source_location / (LX*LY*LZ ); sx1 = ( g_source_location % (LX*LY*LZ) ) / (LY*LZ); sx2 = ( g_source_location % (LY*LZ) ) / LZ; sx3 = ( g_source_location % LZ ); /* read the data for lnuy */ sprintf(filename, "cvc_lnuy_X.%.4d", Nconf); ofs = fopen(filename, "r"); fprintf(stdout, "reading cvc lnuy from file %s\n", filename); for(mu=0; mu<4; mu++) { fscanf(ofs, "%lf%lf", cvc_lnuy+2*mu, cvc_lnuy+2*mu+1); } fclose(ofs); /* allocate memory for the spinor fields */ no_fields = 2; g_spinor_field = (double**)calloc(no_fields, sizeof(double*)); for(i=0; i<no_fields; i++) alloc_spinor_field(&g_spinor_field[i], VOLUMEPLUSRAND); /**************************************** * allocate memory for the contractions ****************************************/ disc = (double*)calloc(8*VOLUME, sizeof(double)); if( disc == (double*)NULL ) { fprintf(stderr, "could not allocate memory for disc\n"); #ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); #endif exit(3); } for(ix=0; ix<8*VOLUME; ix++) disc[ix] = 0.; disc2 = (double*)calloc(8*VOLUME, sizeof(double)); if( disc2 == (double*)NULL ) { fprintf(stderr, "could not allocate memory for disc2\n"); #ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); #endif exit(3); } for(ix=0; ix<8*VOLUME; ix++) disc2[ix] = 0.; work = (double*)calloc(48*VOLUME, sizeof(double)); if( work == (double*)NULL ) { fprintf(stderr, "could not allocate memory for work\n"); #ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); #endif exit(3); } /**************************************** * prepare Fourier transformation arrays ****************************************/ in = (fftw_complex*)malloc(FFTW_LOC_VOLUME*sizeof(fftw_complex)); if(in==(fftw_complex*)NULL) { #ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); #endif exit(4); } if(g_resume==1) { /* read current disc from file */ sprintf(filename, ".outcvc_current.%.4d", Nconf); c = read_contraction(disc, &count, filename, 8); #ifdef MPI MPI_Gather(&c, 1, MPI_INT, status, 1, MPI_INT, 0, g_cart_grid); if(g_cart_id==0) { /* check the entries in status */ for(i=0; i<g_nproc; i++) if(status[i]!=0) { status[0] = 1; break; } } MPI_Bcast(status, 1, MPI_INT, 0, g_cart_grid); if(status[0]==1) { for(ix=0; ix<8*VOLUME; ix++) disc[ix] = 0.; count = 0; } #else if(c != 0) { fprintf(stdout, "could not read current disc; start new\n"); for(ix=0; ix<8*VOLUME; ix++) disc[ix] = 0.; count = 0; } #endif if(g_cart_id==0) fprintf(stdout, "starting with count = %d\n", count); } /* of g_resume == 1 */ if(do_gt==1) { /*********************************** * initialize gauge transformation ***********************************/ init_gauge_trafo(&gauge_trafo,1.0); fprintf(stdout, "applying gauge trafo to gauge field\n"); apply_gt_gauge(gauge_trafo); plaquette(&plaq); if(g_cart_id==0) fprintf(stdout, "plaquette plaq = %25.16e\n", plaq); } unit_trace[0] = 0.; unit_trace[1] = 0.; D_trace[0] = 0.; D_trace[1] = 0.; /**************************************** * start loop on source id.s ****************************************/ for(sid=g_sourceid; sid<=g_sourceid2; sid++) { /**************************************** * read the new propagator ****************************************/ #ifdef MPI ratime = MPI_Wtime(); #else ratime = (double)clock() / CLOCKS_PER_SEC; #endif /**************************************** * check: write source before D-appl. ****************************************/ /* if(format==0) { sprintf(filename, "%s.%.4d.%.2d", filename_prefix, Nconf, sid); read_lime_spinor(g_spinor_field[0], filename, 0); } for(ix=0; ix<12*VOLUME; ix++) { fprintf(stdout, "source: %6d%25.16e%25.16e\n", ix, g_spinor_field[0][2*ix], g_spinor_field[0][2*ix+1]); } */ if(format==0) { sprintf(filename, "%s.%.4d.%.2d.inverted", filename_prefix, Nconf, sid); /* sprintf(filename, "%s.%.4d.%.2d", filename_prefix, Nconf, sid); */ if(read_lime_spinor(g_spinor_field[1], filename, 0) != 0) break; } else if(format==1) { sprintf(filename, "%s.%.4d.%.5d.inverted", filename_prefix, Nconf, sid); if(read_cmi(g_spinor_field[1], filename) != 0) break; } xchange_field(g_spinor_field[1]); if(do_gt==1) { fprintf(stdout, "applying gt on propagators\n"); for(ix=0; ix<VOLUME; ix++) { _fv_eq_cm_ti_fv(spinor1, gauge_trafo+18*ix, g_spinor_field[1]+_GSI(ix)); _fv_eq_fv(g_spinor_field[1]+_GSI(ix), spinor1); } } #ifdef MPI retime = MPI_Wtime(); #else retime = (double)clock() / CLOCKS_PER_SEC; #endif fprintf(stdout, "time to read prop.: %e seconds\n", retime-ratime); count++; /**************************************** * calculate the source: apply Q_phi_tbc ****************************************/ #ifdef MPI ratime = MPI_Wtime(); #else ratime = (double)clock() / CLOCKS_PER_SEC; #endif Q_phi_tbc(g_spinor_field[0], g_spinor_field[1]); xchange_field(g_spinor_field[0]); #ifdef MPI retime = MPI_Wtime(); #else retime = (double)clock() / CLOCKS_PER_SEC; #endif if(g_cart_id==0) fprintf(stdout, "time to calculate source: %e seconds\n", retime-ratime); /**************************************** * check: write source after D-appl. ****************************************/ /* for(ix=0; ix<12*VOLUME; ix++) { fprintf(stdout, "D_source: %6d%25.16e%25.16e\n", ix, g_spinor_field[0][2*ix], g_spinor_field[0][2*ix+1]); } */ /**************************************** * add new contractions to (existing) disc ****************************************/ #ifdef MPI ratime = MPI_Wtime(); #else ratime = (double)clock() / CLOCKS_PER_SEC; #endif for(mu=0; mu<4; mu++) { /* loop on Lorentz index of the current */ iix = _GWI(mu,0,VOLUME); for(ix=0; ix<VOLUME; ix++) { /* loop on lattice sites */ _cm_eq_cm_ti_co(U_, &g_gauge_field[_GGI(ix, mu)], &co_phase_up[mu]); /* first contribution */ _fv_eq_cm_ti_fv(spinor1, U_, &g_spinor_field[1][_GSI(g_iup[ix][mu])]); _fv_eq_gamma_ti_fv(spinor2, mu, spinor1); _fv_mi_eq_fv(spinor2, spinor1); _co_eq_fv_dag_ti_fv(&w, &g_spinor_field[0][_GSI(ix)], spinor2); disc[iix ] -= 0.5 * w.re; disc[iix+1] -= 0.5 * w.im; /* second contribution */ _fv_eq_cm_dag_ti_fv(spinor1, U_, &g_spinor_field[1][_GSI(ix)]); _fv_eq_gamma_ti_fv(spinor2, mu, spinor1); _fv_pl_eq_fv(spinor2, spinor1); _co_eq_fv_dag_ti_fv(&w, &g_spinor_field[0][_GSI(g_iup[ix][mu])], spinor2); disc2[iix ] -= 0.5 * w.re; disc2[iix+1] -= 0.5 * w.im; iix += 2; } /* of ix */ } /* of mu */ #ifdef MPI retime = MPI_Wtime(); #else retime = (double)clock() / CLOCKS_PER_SEC; #endif fprintf(stdout, "[%2d] contractions for CVC in %e seconds\n", g_cart_id, retime-ratime); /*************************************************** * check: convergence of trace of unit matrix ***************************************************/ _co_eq_fv_dag_ti_fv(&w, g_spinor_field[0]+_GSI(g_source_location), g_spinor_field[0]+_GSI(g_source_location)); unit_trace[0] += w.re; unit_trace[1] += w.im; fprintf(stdout, "unit_trace: %4d%25.16e%25.16e\n", count, w.re, w.im); _co_eq_fv_dag_ti_fv(&w, g_spinor_field[0]+_GSI(g_source_location), g_spinor_field[0]+_GSI(g_iup[g_source_location][0])); fprintf(stdout, "shift_trace: %4d%25.16e%25.16e\n", count, w.re, w.im); /*************************************************** * check: convergence of trace D_u(source_location, source_location) ***************************************************/ Q_phi_tbc(g_spinor_field[1], g_spinor_field[0]); _co_eq_fv_dag_ti_fv(&w, g_spinor_field[0]+_GSI(g_source_location), g_spinor_field[1]+_GSI(g_source_location)); D_trace[0] += w.re; D_trace[1] += w.im; /* fprintf(stdout, "D_trace: %4d%25.16e%25.16e\n", count, D_trace[0]/(double)count, D_trace[1]/(double)count); */ fprintf(stdout, "D_trace: %4d%25.16e%25.16e\n", count, w.re, w.im); /*************************************************** * save results for count = multiple of Nsave ***************************************************/ if(count%Nsave == 0) { if(g_cart_id == 0) fprintf(stdout, "save results for count = %d\n", count); /* save the result in position space */ /* divide by number of propagators */ for(ix=0; ix<8*VOLUME; ix++) work[ix] = disc[ix] / (double)count; sprintf(filename, "outcvc_Xm.%.4d.%.4d", Nconf, count); write_contraction(work, NULL, filename, 4, 2, 0); for(ix=0; ix<8*VOLUME; ix++) work[ix] = disc2[ix] / (double)count; sprintf(filename, "outcvc_Xp.%.4d.%.4d", Nconf, count); write_contraction(work, NULL, filename, 4, 2, 0); for(ix=0; ix<8*VOLUME; ix++) work[ix] = (disc[ix] + disc2[ix]) / (double)count; sprintf(filename, "outcvc_X.%.4d.%.4d", Nconf, count); write_contraction(work, NULL, filename, 4, 2, 0); #ifdef MPI ratime = MPI_Wtime(); #else ratime = (double)clock() / CLOCKS_PER_SEC; #endif /**************************************** * Fourier transform data, copy to work ****************************************/ for(mu=0; mu<4; mu++) { memcpy((void*)in, (void*)(work+_GWI(mu,0,VOLUME)), 2*VOLUME*sizeof(double)); #ifdef MPI fftwnd_mpi(plan_p, 1, in, NULL, FFTW_NORMAL_ORDER); #else fftwnd_one(plan_p, in, NULL); #endif memcpy((void*)(work+_GWI(mu,0,VOLUME)), (void*)in, 2*VOLUME*sizeof(double)); } /* of mu =0 ,..., 3*/ /* fnorm = 1. / ((double)count); */ fprintf(stdout, "fnorm = %e\n", fnorm); for(mu=0; mu<4; mu++) { for(nu=0; nu<4; nu++) { cp1 = (complex*)(work+_GWI(mu,0,VOLUME)); cp2 = (complex*)(cvc_lnuy+2*nu); cp3 = (complex*)(work+_GWI(4+4*mu+nu,0,VOLUME)); for(x0=0; x0<T; x0++) { q[0] = (double)(x0+Tstart) / (double)T_global; for(x1=0; x1<LX; x1++) { q[1] = (double)(x1) / (double)LX; for(x2=0; x2<LY; x2++) { q[2] = (double)(x2) / (double)LY; for(x3=0; x3<LZ; x3++) { q[3] = (double)(x3) / (double)LZ; ix = g_ipt[x0][x1][x2][x3]; w.re = cos( M_PI * ( q[mu] - q[nu] - 2.*(sx0*q[0]+sx1*q[1]+sx2*q[2]+sx3*q[3])) ); w.im = sin( M_PI * ( q[mu] - q[nu] - 2.*(sx0*q[0]+sx1*q[1]+sx2*q[2]+sx3*q[3])) ); /* fprintf(stdout, "mu=%3d, nu=%3d, t=%3d, x=%3d, y=%3d, z=%3d, phase= %21.12e + %21.12ei\n", \ mu, nu, x0, x1, x2, x3, w.re, w.im); */ _co_eq_co_ti_co(&w1, cp1, cp2); _co_eq_co_ti_co(cp3, &w1, &w); /* _co_ti_eq_re(cp3, fnorm); */ cp1++; cp3++; } } } } } } /* save the result in momentum space */ sprintf(filename, "outcvc_P.%.4d.%.4d", Nconf, count); write_contraction(work+_GWI(4,0,VOLUME), NULL, filename, 16, 2, 0); #ifdef MPI retime = MPI_Wtime(); #else retime = (double)clock() / CLOCKS_PER_SEC; #endif if(g_cart_id==0) fprintf(stdout, "time to cvc save results: %e seconds\n", retime-ratime); } /* of count % Nsave == 0 */ } /* of loop on sid */ if(g_resume==1) { /* write current disc to file */ sprintf(filename, ".outcvc_current.%.4d", Nconf); write_contraction(disc, &count, filename, 4, 0, 0); } /************************************** * free the allocated memory, finalize **************************************/ free(g_gauge_field); if(do_gt==1) free(gauge_trafo); for(i=0; i<no_fields; i++) free(g_spinor_field[i]); free(g_spinor_field); free_geometry(); fftw_free(in); free(disc); free(disc2); free(work); #ifdef MPI fftwnd_mpi_destroy_plan(plan_p); free(status); MPI_Finalize(); #else fftwnd_destroy_plan(plan_p); #endif return(0); }
int main(int argc, char **argv) { int c, i, mu, K=16; int count = 0; int filename_set = 0; int x0; int estat; // exit status unsigned long int ix, idx; unsigned long int x1, VOL3, index_min; int sid; double *disc = (double*)NULL, *buffer=NULL, *buffer2=NULL; int verbose = 0; char filename[100]; double ratime, retime; double plaq; double spinor1[24]; double *gauge_field_f=NULL, *gauge_field_timeslice=NULL; double v4norm = 0., vvnorm = 0.; double *psi0 = NULL, *psi1 = NULL, *psi2 = NULL, *psi3 = NULL; complex w; FILE *ofs[4]; double addreal, addimag; /* Initialise all the gamma matrix combinations g5, g1, g2, g3, ig0g5, ig0gi, -1, -g5gi, g0 -g5g0gi */ int gindex[] = {5, 1, 2, 3, 6, 7, 8, 9, 4, 10, 11, 12, 0, 13, 14, 15}; double gsign[] = {-1., 1., 1., 1., -1., 1., 1., 1., 1., 1., 1., 1., 1., 1., -1., 1.}; #ifdef MPI MPI_Status status; #endif #ifdef MPI MPI_Init(&argc, &argv); #endif while ((c = getopt(argc, argv, "h?vgf:")) != -1) { switch (c) { case 'v': verbose = 1; break; case 'f': strcpy(filename, optarg); filename_set=1; break; case 'h': case '?': default: usage(); break; } } // local time stamp g_the_time = time; if(g_cart_id == 0) { fprintf(stdout, "\n# [disc] using global time stamp %s", ctime(&g_the_time)); } /* set the default values */ if(filename_set==0) strcpy(filename, "cvc.input"); fprintf(stdout, "# reading input from file %s\n", filename); read_input_parser(filename); /* some checks on the input data */ if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) { if(g_proc_id==0) fprintf(stdout, "T and L's must be set\n"); usage(); } if(g_kappa == 0.) { if(g_proc_id==0) fprintf(stdout, "kappa should be > 0.n"); usage(); } /* initialize MPI parameters */ mpi_init(argc, argv); #ifdef MPI # if ! ( (defined PARALLELTX) || (defined PARALLELTXY) ) T = T_global / g_nproc; Tstart = g_cart_id * T; # endif #else T = T_global; Tstart = 0; #endif VOL3 = LX*LY*LZ; fprintf(stdout, "# [%2d] parameters:\n"\ "# [%2d] T_global = %3d\n"\ "# [%2d] T = %3d\n"\ "# [%2d] Tstart = %3d\n"\ "# [%2d] LX_global = %3d\n"\ "# [%2d] LX = %3d\n"\ "# [%2d] LXstart = %3d\n", g_cart_id, g_cart_id, T_global, g_cart_id, T, g_cart_id, Tstart, g_cart_id, LX_global, g_cart_id, LX, g_cart_id, LXstart); if(init_geometry() != 0) { fprintf(stderr, "ERROR from init_geometry\n"); #ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); #endif exit(1); } geometry(); /********************************************** * read the gauge field **********************************************/ // if(N_ape>0 || Nlong>0) { alloc_gauge_field(&g_gauge_field, VOLUMEPLUSRAND); sprintf(filename, "%s.%.4d", gaugefilename_prefix, Nconf); if(g_cart_id==0) fprintf(stdout, "reading gauge field from file %s\n", filename); read_lime_gauge_field_doubleprec(filename); xchange_gauge(); plaquette(&plaq); if(g_cart_id==0) fprintf(stdout, "# measured plaquette value: %25.16e\n", plaq); // } else { // g_gauge_field = (double*)NULL; // } if(Nlong > 0) { // N_ape = 1; // alpha_ape = 0.4; if(g_cart_id==0) fprintf(stdout, "# apply fuzzing of gauge field and propagators with parameters:\n"\ "# Nlong = %d\n# N_ape = %d\n# alpha_ape = %f\n", Nlong, N_ape, alpha_ape); alloc_gauge_field(&gauge_field_f, VOLUMEPLUSRAND); #if !( (defined PARALLELTX) || (defined PARALLELTXY) ) gauge_field_timeslice = (double*)calloc(72*VOL3, sizeof(double)); if( gauge_field_timeslice == (double*)NULL ) { fprintf(stderr, "Error, could not allocate mem for gauge_field_timeslice\n"); #ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); #endif exit(2); } for(x0=0; x0<T; x0++) { memcpy((void*)gauge_field_timeslice, (void*)(g_gauge_field+_GGI(g_ipt[x0][0][0][0],0)), 72*VOL3*sizeof(double)); for(i=0; i<N_ape; i++) { fprintf(stdout, "# [] APE smearing time slice %d step %d\n", x0, i); APE_Smearing_Step_Timeslice(gauge_field_timeslice, alpha_ape); } if(Nlong > 0) { fuzzed_links_Timeslice(gauge_field_f, gauge_field_timeslice, Nlong, x0); } else { memcpy(gauge_field_f+_GGI(g_ipt[x0][0][0][0], 0), gauge_field_timeslice, 72*VOL3*sizeof(double)); } } free(gauge_field_timeslice); #else for(i=0; i<N_ape; i++) { APE_Smearing_Step(g_gauge_field, alpha_ape); xchange_gauge_field_timeslice(g_gauge_field); } if ( Nlong > 0 ) { if(g_cart_id==0) fprintf(stdout, "\n# [hdisc] fuzzing gauge field ...\n"); fuzzed_links2(gauge_field_f, g_gauge_field, Nlong); } else { memcpy(gauge_field_f, g_gauge_field, 72*VOLUMEPLUSRAND*sizeof(double)); } xchange_gauge_field(gauge_field_f); read_lime_gauge_field_doubleprec(filename); xchange_gauge(); #endif /* for(ix=0; ix<VOLUME; ix++) { for(mu=0; mu<4; mu++) { for(i=0; i<9; i++) { fprintf(stdout, "%6d%3d%3d%25.16e%25.16e%25.16e%25.16e\n", ix, mu, i, gauge_field_f[_GGI(ix,mu)+2*i], gauge_field_f[_GGI(ix,mu)+2*i+1], g_gauge_field[_GGI(ix,mu)+2*i], g_gauge_field[_GGI(ix,mu)+2*i+1]); }} } */ } // of if Nlong > 0 /* allocate memory for the spinor fields */ no_fields = 8; g_spinor_field = (double**)calloc(no_fields, sizeof(double*)); for(i=0; i<no_fields; i++) alloc_spinor_field(&g_spinor_field[i], VOLUME+RAND); /* allocate memory for the contractions */ /* #ifdef PARALLELTX if(g_xs_id==0) {idx = 4 * 4 * K * T_global * 2;} else {idx = 4 * 4 * K * T * 2;} #else if(g_cart_id==0) {idx = 4 * 4 * K * T_global * 2;} else {idx = 4 * 4 * K* T * 2;} #endif */ disc = (double*)calloc(32*K*T, sizeof(double)); if( disc==(double*)NULL ) { fprintf(stderr, "could not allocate memory for disc\n"); #ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); #endif exit(3); } buffer = (double*)calloc(32*K*T, sizeof(double)); if( buffer==(double*)NULL ) { fprintf(stderr, "could not allocate memory for buffer\n"); #ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); #endif exit(4); } buffer2 = (double*)calloc(32*K*T_global, sizeof(double)); if( buffer2==(double*)NULL ) { fprintf(stderr, "could not allocate memory for buffer2\n"); #ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); #endif exit(5); } if(g_cart_id==0) { sprintf(filename, "hdisc-ss.k0v4.%.4d", Nconf); ofs[0] = fopen(filename, "w"); sprintf(filename, "hdisc-sc.k0v4.%.4d", Nconf); ofs[1] = fopen(filename, "w"); sprintf(filename, "hdisc-cs.k0v4.%.4d", Nconf); ofs[2] = fopen(filename, "w"); sprintf(filename, "hdisc-cc.k0v4.%.4d", Nconf); ofs[3] = fopen(filename, "w"); if(ofs[0]==(FILE*)NULL || ofs[1]==(FILE*)NULL || ofs[2]==(FILE*)NULL || ofs[3]==(FILE*)NULL) { fprintf(stderr, "Error, could not open files for writing.\n"); #ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); #endif exit(6); } } /***************************************** * HPE coefficients *****************************************/ /* if(format==1) { */ addimag = 2*g_kappa*g_musigma/sqrt(1 + 4*g_kappa*g_kappa*(g_musigma*g_musigma-g_mudelta*g_mudelta)) * LX*LY*LZ*3*4*2.*g_kappa*g_kappa*4; // addreal = (1.+2*g_kappa*g_mudelta)/sqrt(1 + 4*g_kappa*g_kappa*(g_musigma*g_musigma-g_mudelta*g_mudelta)) * // LX*LY*LZ*3*4*2.*g_kappa*g_kappa*4; addreal = (1.- 2*g_kappa*g_mudelta)/sqrt(1 + 4*g_kappa*g_kappa*(g_musigma*g_musigma-g_mudelta*g_mudelta)) * LX*LY*LZ*3*4*2.*g_kappa*g_kappa*4; v4norm = 1. / ( 8. * g_kappa * g_kappa ); vvnorm = 1. / ( 8. * g_kappa * g_kappa ); /* } else { addimag = 2*g_kappa*g_musigma/sqrt(1 + 4*g_kappa*g_kappa*(g_musigma*g_musigma-g_mudelta*g_mudelta)) * LX*LY*LZ*3*4*2.*g_kappa*2; addreal = (1.+2*g_kappa*g_mudelta)/sqrt(1 + 4*g_kappa*g_kappa*(g_musigma*g_musigma-g_mudelta*g_mudelta)) * LX*LY*LZ*3*4*2.*g_kappa*2; v4norm = 1. / ( 4. * g_kappa ); vvnorm = 1. / ( 4. * g_kappa ); } */ if(g_cart_id==0) fprintf(stdout, "# addimag = %25.16e;\t addreal = %25.16e\n"\ "# v4norm = %25.16e;\t vvnorm = %25.16e\n", addimag, addreal, v4norm, vvnorm); /****************************************** * start loop on source id.s ******************************************/ count = -1; for(sid=g_sourceid; sid<=g_sourceid2; sid+=g_sourceid_step) { for(ix=0; ix<32*K*T; ix++) disc[ix] = 0.; for(ix=0; ix<32*K*T; ix++) buffer[ix] = 0.; for(ix=0; ix<32*K*T_global; ix++) buffer2[ix] = 0.; /* read the new propagator */ sprintf(filename, "%s.%.4d.%.5d.hinverted", filename_prefix, Nconf, sid); /* sprintf(filename, "%s.%.4d.%.2d.inverted", filename_prefix, Nconf, sid); */ estat = read_lime_spinor(g_spinor_field[2], filename, 0); if( estat != 0 ) { fprintf(stderr, "[%2d] Error, could not read from file %s at position 0\n", g_cart_id, filename); #ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); #endif exit(7); } estat = read_lime_spinor(g_spinor_field[3], filename, 1); if( estat != 0 ) { fprintf(stderr, "[%2d] Error, could not read from file %s at position 1\n", g_cart_id, filename); #ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); #endif exit(7); } count++; xchange_field(g_spinor_field[2]); xchange_field(g_spinor_field[3]); /* calculate the source: apply Q_phi_tbc */ #ifdef MPI ratime = MPI_Wtime(); #else ratime = (double)clock() / CLOCKS_PER_SEC; #endif Q_h_phi(g_spinor_field[0], g_spinor_field[1], g_spinor_field[2], g_spinor_field[3]); xchange_field(g_spinor_field[0]); xchange_field(g_spinor_field[1]); // print the sources /* for(ix=0; ix<VOLUME; ix++) { for(mu=0; mu<12; mu++) { fprintf(stdout, "%6d%3d%25.16e%25.16e%25.16e%25.16e\n", ix, mu, g_spinor_field[0][_GSI(ix)+2*mu], g_spinor_field[0][_GSI(ix)+2*mu+1], g_spinor_field[1][_GSI(ix)+2*mu], g_spinor_field[1][_GSI(ix)+2*mu+1]); } } */ #ifdef MPI retime = MPI_Wtime(); #else retime = (double)clock() / CLOCKS_PER_SEC; #endif if(g_cart_id==0) fprintf(stdout, "\n# [hdisc] time for applying Q_tm_h: %e seconds\n", retime-ratime); /* apply gamma5_BdagH4_gamma5 */ #ifdef MPI ratime = MPI_Wtime(); #else ratime = (double)clock() / CLOCKS_PER_SEC; #endif gamma5_B_h_dagH4_gamma5(g_spinor_field[4], g_spinor_field[5], g_spinor_field[0], g_spinor_field[1], g_spinor_field[6], g_spinor_field[7]); /* for(ix=0; ix<VOLUME; ix++) { for(mu=0; mu<12; mu++) { fprintf(stdout, "%6d%3d%25.16e%25.16e%25.16e%25.16e\n", ix, mu, g_spinor_field[4][_GSI(ix)+2*mu], g_spinor_field[4][_GSI(ix)+2*mu+1], g_spinor_field[5][_GSI(ix)+2*mu], g_spinor_field[5][_GSI(ix)+2*mu+1]); } } */ #ifdef MPI retime = MPI_Wtime(); #else retime = (double)clock() / CLOCKS_PER_SEC; #endif if(g_cart_id==0) fprintf(stdout, "# time for applying noise reduction: %e seconds\n", retime-ratime); #ifdef MPI ratime = MPI_Wtime(); #else ratime = (double)clock() / CLOCKS_PER_SEC; #endif if(Nlong>0) { if(g_cart_id==0) fprintf(stdout, "# fuzzing propagator with Nlong = %d\n", Nlong); memcpy((void*)g_spinor_field[6], (void*)g_spinor_field[2], 24*(VOLUME+RAND)*sizeof(double)); /* xchange_field_timeslice(g_spinor_field[6]); */ Fuzz_prop3(gauge_field_f, g_spinor_field[6], g_spinor_field[0], Nlong); xchange_field_timeslice(g_spinor_field[6]); memcpy((void*)g_spinor_field[7], (void*)g_spinor_field[3], 24*(VOLUME+RAND)*sizeof(double)); /* xchange_field_timeslice(g_spinor_field[7]); */ Fuzz_prop3(gauge_field_f, g_spinor_field[7], g_spinor_field[1], Nlong); xchange_field_timeslice(g_spinor_field[7]); } else { for(ix=0;ix<VOLUME;ix++) { _fv_eq_zero(g_spinor_field[6]+_GSI(ix)); } for(ix=0;ix<VOLUME;ix++) { _fv_eq_zero(g_spinor_field[7]+_GSI(ix)); } } /* for(ix=0; ix<VOLUME; ix++) { for(mu=0; mu<12; mu++) { fprintf(stdout, "%6d%3d%25.16e%25.16e%25.16e%25.16e\n", ix, mu, g_spinor_field[6][_GSI(ix)+2*mu], g_spinor_field[6][_GSI(ix)+2*mu+1], g_spinor_field[7][_GSI(ix)+2*mu], g_spinor_field[7][_GSI(ix)+2*mu+1]); } } */ // recalculate the sources --- they are changed in Fuzz_prop3 Q_h_phi(g_spinor_field[0], g_spinor_field[1], g_spinor_field[2], g_spinor_field[3]); xchange_field(g_spinor_field[0]); xchange_field(g_spinor_field[1]); #ifdef MPI retime = MPI_Wtime(); #else retime = (double)clock() / CLOCKS_PER_SEC; #endif if(g_cart_id==0) fprintf(stdout, "# time for fuzzing: %e seconds\n", retime-ratime); /******************************** * add new contractions to disc ********************************/ #ifdef MPI ratime = MPI_Wtime(); #else ratime = (double)clock() / CLOCKS_PER_SEC; #endif for(c=0; c<4; c++) { if(c==0) { psi0 = g_spinor_field[2]; psi1 = g_spinor_field[4]; psi2 = g_spinor_field[6]; psi3 = g_spinor_field[0]; } if(c==1) { psi0 = g_spinor_field[2]; psi1 = g_spinor_field[5]; psi2 = g_spinor_field[6]; psi3 = g_spinor_field[1]; } if(c==2) { psi0 = g_spinor_field[3]; psi1 = g_spinor_field[4]; psi2 = g_spinor_field[7]; psi3 = g_spinor_field[0]; } if(c==3) { psi0 = g_spinor_field[3]; psi1 = g_spinor_field[5]; psi2 = g_spinor_field[7]; psi3 = g_spinor_field[1]; } /* for(ix=0; ix<VOLUME; ix++) { for(mu=0; mu<12; mu++) { fprintf(stdout, "%6d%3d%16.7e%16.7e%16.7e%16.7e%16.7e%16.7e%16.7e%16.7e\n", ix, mu, psi0[_GSI(ix)+2*mu], psi0[_GSI(ix)+2*mu+1], psi1[_GSI(ix)+2*mu], psi1[_GSI(ix)+2*mu+1], psi2[_GSI(ix)+2*mu], psi2[_GSI(ix)+2*mu+1], psi3[_GSI(ix)+2*mu], psi3[_GSI(ix)+2*mu+1]); } } */ for(x0=0; x0<T; x0++) { for(mu=0; mu<16; mu++) { index_min = x0 * K + mu + c * 4 * K * T; for(x1=0; x1<VOL3; x1++) { ix = x0*VOL3 + x1; idx = _GSI( ix ); _fv_eq_gamma_ti_fv(spinor1, mu, psi0+idx); _co_eq_fv_dag_ti_fv(&w, psi1+idx, spinor1); disc[2*( index_min) ] += w.re; disc[2*( index_min)+1] += w.im; if(Nlong>0) { _fv_eq_gamma_ti_fv(spinor1, mu, psi2+idx); _co_eq_fv_dag_ti_fv(&w, psi1+idx, spinor1); disc[2*( K*T + index_min) ] += w.re; disc[2*( K*T + index_min)+1] += w.im; } _fv_eq_gamma_ti_fv(spinor1, mu, psi0+idx); _co_eq_fv_dag_ti_fv(&w, psi3+idx, spinor1); disc[2*(2*K*T + index_min) ] += w.re; disc[2*(2*K*T + index_min)+1] += w.im; if(Nlong>0) { _fv_eq_gamma_ti_fv(spinor1, mu, psi2+idx); _co_eq_fv_dag_ti_fv(&w, psi3+idx, spinor1); disc[2*(3*K*T + index_min) ] += w.re; disc[2*(3*K*T + index_min)+1] += w.im; } } } } // of loop on x0 for(x0=0; x0<T; x0++) { disc[2*( x0*K+4 + 4*c*K*T) ] += addreal; disc[2*( x0*K+5 + 4*c*K*T)+1] -= addimag; if(Nlong>0) { disc[2*(K*T + x0*K+4 + 4*c*K*T) ] += addreal; disc[2*(K*T + x0*K+5 + 4*c*K*T)+1] -= addimag; } } } // of c=0,...,4 #ifdef MPI retime = MPI_Wtime(); #else retime = (double)clock() / CLOCKS_PER_SEC; #endif if(g_cart_id==0) fprintf(stdout, "# time for contracting: %e seconds\n", retime-ratime); #ifdef MPI /* collect results to disc */ #if (defined PARALLELTX) || (defined PARALLELTXY) MPI_Allreduce(disc, buffer, 32*K*T, MPI_DOUBLE, MPI_SUM, g_ts_comm); MPI_Allgather(buffer, 32*K*T, MPI_DOUBLE, buffer2, 32*K*T, MPI_DOUBLE, g_xs_comm); # else MPI_Gather(disc, 32*K*T, MPI_DOUBLE, buffer2, 32*K*T, MPI_DOUBLE, 0, g_cart_grid); # endif #else memcpy((void*)buffer2, (void*)disc, 32*K*T_global*sizeof(double)); #endif /* write current disc to file */ if(g_cart_id==0) { for(c=0; c<4; c++) { if(sid==g_sourceid) fprintf(ofs[c], "#%6d%3d%3d%3d%3d\t%f\t%f\t%f\t%f\n", Nconf, T_global, LX_global, LY_global, LZ, g_kappa, g_mu, g_musigma, g_mudelta); for(x0=0; x0<T_global; x0++) { for(mu=0; mu<16; mu++) { idx = gindex[mu]; ix = K*(x0%T) + idx + 16*K*T*(x0/T) + c*4*K*T; fprintf(ofs[c], "%6d%3d%4d%4d%25.16e%25.16e%25.16e%25.16e%25.16e%25.16e%25.16e%25.16e\n", Nconf, mu, x0, sid, gsign[mu]*buffer2[2*( ix)]*v4norm, gsign[mu]*buffer2[2*( ix)+1]*v4norm, gsign[mu]*buffer2[2*( K*T+ix)]*v4norm, gsign[mu]*buffer2[2*( K*T+ix)+1]*v4norm, gsign[mu]*buffer2[2*(2*K*T+ix)]*vvnorm, gsign[mu]*buffer2[2*(2*K*T+ix)+1]*vvnorm, gsign[mu]*buffer2[2*(3*K*T+ix)]*vvnorm, gsign[mu]*buffer2[2*(3*K*T+ix)+1]*vvnorm); } } } } if(g_cart_id==0) fprintf(stdout, "# finished all sid %d\n", sid); } /* of loop on sid */ if(g_cart_id==0) { fclose(ofs[0]); fclose(ofs[1]); fclose(ofs[2]); fclose(ofs[3]); } /* free the allocated memory, finalize */ free(g_gauge_field); if(no_fields>0) { for(i=0; i<no_fields; i++) free(g_spinor_field[i]); free(g_spinor_field); } free_geometry(); free(disc); free(buffer); free(buffer2); if(Nlong>0) free(gauge_field_f); if(g_cart_id == 0) { g_the_time = time(NULL); fprintf(stdout, "\n# [disc] %s# [disc] end of run\n", ctime(&g_the_time)); fprintf(stderr, "\n# [disc] %s# [disc] end of run\n", ctime(&g_the_time)); } #ifdef MPI MPI_Finalize(); #endif return(0); }
int main(int argc, char **argv) { const int n_c = 3; // number of colors int c, i, j, mu, nu, ir, is, ia, imunu; int filename_set = 0; int dims[4] = {0,0,0,0}; int l_LX_at, l_LXstart_at; int source_location, have_source_flag = 0; int x0, x1, x2, x3, ix; int sx0, sx1, sx2, sx3; int isimag[4]; int gperm[5][4], gperm2[4][4]; int check_position_space_WI=0; int num_threads = 1, nthreads=-1, threadid=-1; int exitstatus; int write_ascii=0; int mms = 0, mass_id = -1; int outfile_prefix_set = 0; int source_proc_coords[4], source_proc_id = -1; int ud_single_file = 0; double gperm_sign[5][4], gperm2_sign[4][4]; double *conn = NULL; double *conn2 = NULL; double contact_term[8]; double *work=NULL; int verbose = 0; int do_gt = 0, status; char filename[100], contype[400], outfile_prefix[400]; double ratime, retime; double plaq; double spinor1[24], spinor2[24], U_[18]; double *gauge_trafo=(double*)NULL; double *phi=NULL, *chi=NULL; complex w; double Usourcebuff[72], *Usource[4]; FILE *ofs; #ifdef MPI int *status; #endif #ifdef MPI MPI_Init(&argc, &argv); #endif while ((c = getopt(argc, argv, "swah?vgf:t:m:o:")) != -1) { switch (c) { case 'v': verbose = 1; break; case 'g': do_gt = 1; break; case 'f': strcpy(filename, optarg); filename_set=1; break; case 'w': check_position_space_WI = 1; fprintf(stdout, "\n# [avc_exact2_lowmem_xspace] will check Ward identity in position space\n"); break; case 't': num_threads = atoi(optarg); fprintf(stdout, "\n# [avc_exact2_lowmem_xspace] will use %d threads in spacetime loops\n", num_threads); break; case 'a': write_ascii = 1; fprintf(stdout, "\n# [avc_exact2_lowmem_xspace] will write data in ASCII format too\n"); break; case 'm': mms = 1; mass_id = atoi(optarg); fprintf(stdout, "\n# [avc_exact2_lowmem_xspace] will read propagators in MMS format with mass id %d\n", mass_id); break; case 'o': strcpy(outfile_prefix, optarg); fprintf(stdout, "\n# [avc_exact2_lowmem_xspace] will use prefix %s for output filenames\n", outfile_prefix); outfile_prefix_set = 1; break; case 's': ud_single_file = 1; fprintf(stdout, "\n# [avc_exact2_lowmem_xspace] will read up and down propagator from same file\n"); break; case 'h': case '?': default: usage(); break; } } if(g_cart_id==0) { g_the_time = time(NULL); fprintf(stdout, "\n# [avc_exact2_lowmem_xspace] using global time stamp %s", ctime(&g_the_time)); } /********************************* * set number of openmp threads *********************************/ #ifdef OPENMP omp_set_num_threads(num_threads); #endif /* set the default values */ if(filename_set==0) strcpy(filename, "cvc.input"); fprintf(stdout, "# Reading input from file %s\n", filename); read_input_parser(filename); /* some checks on the input data */ if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) { if(g_proc_id==0) fprintf(stderr, "\n[avc_exact2_lowmem_xspace] T and L's must be set\n"); usage(); } if(g_kappa == 0.) { if(g_proc_id==0) fprintf(stderr, "\n[avc_exact2_lowmem_xspace] kappa should be > 0.n"); usage(); } /* initialize MPI parameters */ mpi_init(argc, argv); #ifdef MPI if((status = (int*)calloc(g_nproc, sizeof(int))) == (int*)NULL) { MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); exit(7); } #endif dims[0]=T_global; dims[1]=LX; dims[2]=LY; dims[3]=LZ; #ifndef MPI T = T_global; Tstart = 0; l_LX_at = LX; l_LXstart_at = 0; #endif fprintf(stdout, "# [%2d] parameters:\n"\ "# [%2d] T = %3d\n"\ "# [%2d] Tstart = %3d\n"\ "# [%2d] l_LX_at = %3d\n"\ "# [%2d] l_LXstart_at = %3d\n", g_cart_id, g_cart_id, T, g_cart_id, Tstart, g_cart_id, l_LX_at, g_cart_id, l_LXstart_at); #ifdef MPI if(T==0) { fprintf(stderr, "[%2d] local T is zero; exit\n", g_cart_id); MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); exit(2); } #endif if(init_geometry() != 0) { fprintf(stderr, "ERROR from init_geometry\n"); #ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); #endif exit(1); } geometry(); alloc_gauge_field(&g_gauge_field, VOLUMEPLUSRAND); if(!(strcmp(gaugefilename_prefix,"identity")==0)) { /* read the gauge field */ sprintf(filename, "%s.%.4d", gaugefilename_prefix, Nconf); if(g_cart_id==0) fprintf(stdout, "reading gauge field from file %s\n", filename); read_lime_gauge_field_doubleprec(filename); } else { /* initialize unit matrices */ if(g_cart_id==0) fprintf(stdout, "\n# [avc_exact] initializing unit matrices\n"); for(ix=0;ix<VOLUME;ix++) { _cm_eq_id( g_gauge_field + _GGI(ix, 0) ); _cm_eq_id( g_gauge_field + _GGI(ix, 1) ); _cm_eq_id( g_gauge_field + _GGI(ix, 2) ); _cm_eq_id( g_gauge_field + _GGI(ix, 3) ); } } #ifdef MPI xchange_gauge(); #endif /* measure the plaquette */ plaquette(&plaq); if(g_cart_id==0) fprintf(stdout, "measured plaquette value: %25.16e\n", plaq); /* sprintf(filename, "gauge.%.2d", g_cart_id); ofs = fopen(filename, "w"); for(x0=0;x0<T;x0++) { for(x1=0;x1<LX;x1++) { for(x2=0;x2<LY;x2++) { for(x3=0;x3<LZ;x3++) { ix = g_ipt[x0][x1][x2][x3]; for(mu=0;mu<4;mu++) { for(i=0;i<9;i++) { fprintf(ofs, "%8d%3d%3d%3d%3d%3d%3d%25.16e%25.16e\n", ix, x0+Tstart, x1+LXstart, x2+LYstart, x3, mu, i, g_gauge_field[_GGI(ix,mu)+2*i], g_gauge_field[_GGI(ix,mu)+2*i+1]); } } }}}} fclose(ofs); if(g_cart_id==0) fprintf(stdout, "\nWarning: forced exit\n"); fflush(stdout); fflush(stderr); #ifdef MPI MPI_Abort(MPI_COMM_WORLD, 255); MPI_Finalize(); #endif exit(255); */ /* allocate memory for the spinor fields */ no_fields = 2; if(mms) no_fields++; g_spinor_field = (double**)calloc(no_fields, sizeof(double*)); for(i=0; i<no_fields; i++) alloc_spinor_field(&g_spinor_field[i], VOLUMEPLUSRAND); if(mms) { work = g_spinor_field[no_fields-1]; } /* allocate memory for the contractions */ conn = (double*)calloc(2 * 16 * VOLUME, sizeof(double)); if( conn==(double*)NULL ) { fprintf(stderr, "could not allocate memory for contr. fields\n"); #ifdef MPI MPI_Abort(MPI_COMM_WORLD, 3); MPI_Finalize(); #endif exit(3); } #ifdef OPENMP #pragma omp parallel for #endif for(ix=0; ix<32*VOLUME; ix++) conn[ix] = 0.; conn2 = (double*)calloc(2 * 16 * VOLUME, sizeof(double)); if( conn2 == NULL ) { fprintf(stderr, "could not allocate memory for contr. fields\n"); #ifdef MPI MPI_Abort(MPI_COMM_WORLD, 3); MPI_Finalize(); #endif exit(3); } #ifdef OPENMP #pragma omp parallel for #endif for(ix=0; ix<32*VOLUME; ix++) conn2[ix] = 0.; /*********************************************************** * determine source coordinates, find out, if source_location is in this process ***********************************************************/ #if (defined PARALLELTX) || (defined PARALLELTXY) sx0 = g_source_location / (LX_global*LY_global*LZ); sx1 = (g_source_location%(LX_global*LY_global*LZ)) / (LY_global*LZ); sx2 = (g_source_location%(LY_global*LZ)) / LZ; sx3 = (g_source_location%LZ); source_proc_coords[0] = sx0 / T; source_proc_coords[1] = sx1 / LX; source_proc_coords[2] = sx2 / LY; source_proc_coords[3] = 0; MPI_Cart_rank(g_cart_grid, source_proc_coords, &source_proc_id); have_source_flag = (int)(g_cart_id == source_proc_id); if(have_source_flag==1) { fprintf(stdout, "\n# process %2d has source location\n", source_proc_id); fprintf(stdout, "\n# global source coordinates: (%3d,%3d,%3d,%3d)\n", sx0, sx1, sx2, sx3); fprintf(stdout, "\n# source proc coordinates: (%3d,%3d,%3d,%3d)\n", source_proc_coords[0], source_proc_coords[1], source_proc_coords[2], source_proc_coords[3]); } sx0 = sx0 % T; sx1 = sx1 % LX; sx2 = sx2 % LY; sx3 = sx3 % LZ; # else have_source_flag = (int)(g_source_location/(LX*LY*LZ)>=Tstart && g_source_location/(LX*LY*LZ)<(Tstart+T)); if(have_source_flag==1) fprintf(stdout, "process %2d has source location\n", g_cart_id); sx0 = g_source_location/(LX*LY*LZ)-Tstart; sx1 = (g_source_location%(LX*LY*LZ)) / (LY*LZ); sx2 = (g_source_location%(LY*LZ)) / LZ; sx3 = (g_source_location%LZ); #endif if(have_source_flag==1) { fprintf(stdout, "local source coordinates: (%3d,%3d,%3d,%3d)\n", sx0, sx1, sx2, sx3); source_location = g_ipt[sx0][sx1][sx2][sx3]; } #ifdef MPI # if (defined PARALLELTX) || (defined PARALLELTXY) have_source_flag = source_proc_id; MPI_Bcast(Usourcebuff, 72, MPI_DOUBLE, have_source_flag, g_cart_grid); # else MPI_Gather(&have_source_flag, 1, MPI_INT, status, 1, MPI_INT, 0, g_cart_grid); if(g_cart_id==0) { for(mu=0; mu<g_nproc; mu++) fprintf(stdout, "status[%1d]=%d\n", mu,status[mu]); } if(g_cart_id==0) { for(have_source_flag=0; status[have_source_flag]!=1; have_source_flag++); fprintf(stdout, "have_source_flag= %d\n", have_source_flag); } MPI_Bcast(&have_source_flag, 1, MPI_INT, 0, g_cart_grid); # endif fprintf(stdout, "[%2d] have_source_flag = %d\n", g_cart_id, have_source_flag); #else have_source_flag = 0; #endif /* if(g_cart_id==0) fprintf(stdout, "\nWarning: forced exit\n"); fflush(stdout); fflush(stderr); #ifdef MPI MPI_Abort(MPI_COMM_WORLD, 255); MPI_Finalize(); #endif exit(255); */ #ifdef MPI ratime = MPI_Wtime(); #else ratime = (double)clock() / CLOCKS_PER_SEC; #endif /*********************************************************** * initialize the Gamma matrices ***********************************************************/ // gamma_5: gperm[4][0] = gamma_permutation[5][ 0] / 6; gperm[4][1] = gamma_permutation[5][ 6] / 6; gperm[4][2] = gamma_permutation[5][12] / 6; gperm[4][3] = gamma_permutation[5][18] / 6; gperm_sign[4][0] = gamma_sign[5][ 0]; gperm_sign[4][1] = gamma_sign[5][ 6]; gperm_sign[4][2] = gamma_sign[5][12]; gperm_sign[4][3] = gamma_sign[5][18]; // gamma_nu gamma_5 for(nu=0;nu<4;nu++) { // permutation gperm[nu][0] = gamma_permutation[6+nu][ 0] / 6; gperm[nu][1] = gamma_permutation[6+nu][ 6] / 6; gperm[nu][2] = gamma_permutation[6+nu][12] / 6; gperm[nu][3] = gamma_permutation[6+nu][18] / 6; // is imaginary ? isimag[nu] = gamma_permutation[6+nu][0] % 2; // (overall) sign gperm_sign[nu][0] = gamma_sign[6+nu][ 0]; gperm_sign[nu][1] = gamma_sign[6+nu][ 6]; gperm_sign[nu][2] = gamma_sign[6+nu][12]; gperm_sign[nu][3] = gamma_sign[6+nu][18]; // write to stdout if(g_cart_id == 0) { fprintf(stdout, "# gamma_%d5 = (%f %d, %f %d, %f %d, %f %d)\n", nu, gperm_sign[nu][0], gperm[nu][0], gperm_sign[nu][1], gperm[nu][1], gperm_sign[nu][2], gperm[nu][2], gperm_sign[nu][3], gperm[nu][3]); } } // gamma_nu for(nu=0;nu<4;nu++) { // permutation gperm2[nu][0] = gamma_permutation[nu][ 0] / 6; gperm2[nu][1] = gamma_permutation[nu][ 6] / 6; gperm2[nu][2] = gamma_permutation[nu][12] / 6; gperm2[nu][3] = gamma_permutation[nu][18] / 6; // (overall) sign gperm2_sign[nu][0] = gamma_sign[nu][ 0]; gperm2_sign[nu][1] = gamma_sign[nu][ 6]; gperm2_sign[nu][2] = gamma_sign[nu][12]; gperm2_sign[nu][3] = gamma_sign[nu][18]; // write to stdout if(g_cart_id == 0) { fprintf(stdout, "# gamma_%d = (%f %d, %f %d, %f %d, %f %d)\n", nu, gperm2_sign[nu][0], gperm2[nu][0], gperm2_sign[nu][1], gperm2[nu][1], gperm2_sign[nu][2], gperm2[nu][2], gperm2_sign[nu][3], gperm2[nu][3]); } } /********************************************************** ********************************************************** ** ** first contribution ** ********************************************************** **********************************************************/ /********************************************** * loop on the Lorentz index nu at source **********************************************/ for(ia=0; ia<n_c; ia++) { for(nu=0; nu<4; nu++) //for(nu=0; nu<4; nu++) { // fprintf(stdout, "\n# [avc_exact2_lowmem_xspace] 1st part, processing nu = %d ...\n", nu); for(ir=0; ir<4; ir++) { // read 1 up-type propagator color components for spinor index ir if(!mms) { get_filename(filename, 0, 3*ir+ia, 1); exitstatus = read_lime_spinor(g_spinor_field[0], filename, 0); if(exitstatus != 0) { fprintf(stderr, "\n# [avc_exact2_lowmem_xspace] Error from read_lime_spinor\n"); exit(111); } xchange_field(g_spinor_field[0]); } else { sprintf(filename, "%s.%.4d.00.%.2d.cgmms.%.2d.inverted", filename_prefix, Nconf, 3*ir+ia, mass_id); exitstatus = read_lime_spinor(work, filename, 0); if(exitstatus != 0) { fprintf(stderr, "\n# [avc_exact2_lowmem_xspace] Error from read_lime_spinor\n"); exit(111); } xchange_field(work); Qf5(g_spinor_field[0], work, -g_mu); xchange_field(g_spinor_field[0]); } // read 1 dn-type propagator color components for spinor index gamma_perm ( ir ) if(!mms) { if(ud_single_file) { get_filename(filename, 0, 3*gperm[nu][ir]+ia, 1); exitstatus = read_lime_spinor(g_spinor_field[1], filename, 1); } else { get_filename(filename, 0, 3*gperm[nu][ir]+ia, -1); exitstatus = read_lime_spinor(g_spinor_field[1], filename, 0); } if(exitstatus != 0) { fprintf(stderr, "\n# [avc_exact2_lowmem_xspace] Error from read_lime_spinor\n"); exit(111); } xchange_field(g_spinor_field[1]); } else { sprintf(filename, "%s.%.4d.%.2d.%.2d.cgmms.%.2d.inverted", filename_prefix, Nconf, 4, 3*gperm[nu][ir]+ia, mass_id); exitstatus = read_lime_spinor(work, filename, 0); if(exitstatus != 0) { fprintf(stderr, "\n# [avc_exact2_lowmem_xspace] Error from read_lime_spinor\n"); exit(111); } xchange_field(work); Qf5(g_spinor_field[1], work, g_mu); xchange_field(g_spinor_field[1]); } phi = g_spinor_field[0]; chi = g_spinor_field[1]; //fprintf(stdout, "\n# [nu5] spin index pair (%d, %d); col index %d\n", ir, gperm[nu][ir], ia); // 1) gamma_nu gamma_5 x U for(mu=0; mu<4; mu++) //for(mu=0; mu<1; mu++) { imunu = 4*mu+nu; #ifdef OPENMP #pragma omp parallel for private(ix, spinor1, spinor2, U_, w) shared(imunu, ia, nu, mu) #endif for(ix=0; ix<VOLUME; ix++) { /* threadid = omp_get_thread_num(); nthreads = omp_get_num_threads(); fprintf(stdout, "[thread%d] number of threads = %d\n", threadid, nthreads); */ _cm_eq_cm_ti_co(U_, &g_gauge_field[_GGI(ix,mu)], &co_phase_up[mu]); _fv_eq_cm_ti_fv(spinor1, U_, phi+_GSI(g_iup[ix][mu])); _fv_eq_gamma_ti_fv(spinor2, mu, spinor1); _fv_mi_eq_fv(spinor2, spinor1); _fv_eq_gamma_ti_fv(spinor1, 5, spinor2); _co_eq_fv_dag_ti_fv(&w, chi+_GSI(ix), spinor1); if(!isimag[nu]) { conn[_GWI(imunu,ix,VOLUME) ] += gperm_sign[nu][ir] * w.re; conn[_GWI(imunu,ix,VOLUME)+1] += gperm_sign[nu][ir] * w.im; } else { conn[_GWI(imunu,ix,VOLUME) ] += gperm_sign[nu][ir] * w.im; conn[_GWI(imunu,ix,VOLUME)+1] -= gperm_sign[nu][ir] * w.re; } } // of ix #ifdef OPENMP #pragma omp parallel for private(ix, spinor1, spinor2, U_, w) shared(imunu, ia, nu, mu) #endif for(ix=0; ix<VOLUME; ix++) { _cm_eq_cm_ti_co(U_, &g_gauge_field[_GGI(ix,mu)], &co_phase_up[mu]); _fv_eq_cm_dag_ti_fv(spinor1, U_, phi+_GSI(ix)); _fv_eq_gamma_ti_fv(spinor2, mu, spinor1); _fv_pl_eq_fv(spinor2, spinor1); _fv_eq_gamma_ti_fv(spinor1, 5, spinor2); _co_eq_fv_dag_ti_fv(&w, chi+_GSI(g_iup[ix][mu]), spinor1); if(!isimag[nu]) { conn[_GWI(imunu,ix,VOLUME) ] += gperm_sign[nu][ir] * w.re; conn[_GWI(imunu,ix,VOLUME)+1] += gperm_sign[nu][ir] * w.im; } else { conn[_GWI(imunu,ix,VOLUME) ] += gperm_sign[nu][ir] * w.im; conn[_GWI(imunu,ix,VOLUME)+1] -= gperm_sign[nu][ir] * w.re; } } // of ix // contribution to local-local correlator #ifdef OPENMP #pragma omp parallel for private(ix, spinor1, spinor2, U_, w) shared(imunu, ia, nu, mu) #endif for(ix=0; ix<VOLUME; ix++) { _fv_eq_gamma_ti_fv(spinor2, mu, phi+_GSI(ix) ); _fv_eq_gamma_ti_fv(spinor1, 5, spinor2); _co_eq_fv_dag_ti_fv(&w, chi+_GSI(ix), spinor1); if(!isimag[nu]) { conn2[_GWI(imunu,ix,VOLUME) ] += gperm_sign[nu][ir] * w.re; conn2[_GWI(imunu,ix,VOLUME)+1] += gperm_sign[nu][ir] * w.im; } else { conn2[_GWI(imunu,ix,VOLUME) ] += gperm_sign[nu][ir] * w.im; conn2[_GWI(imunu,ix,VOLUME)+1] -= gperm_sign[nu][ir] * w.re; } } // of ix } // of mu } // of ir } // of nu } // of ia loop on colors // normalisation of contractions #ifdef OPENMP #pragma omp parallel for #endif for(ix=0; ix<32*VOLUME; ix++) conn[ix] *= -0.5; #ifdef OPENMP #pragma omp parallel for #endif for(ix=0; ix<32*VOLUME; ix++) conn2[ix] *= -1.; #ifdef MPI retime = MPI_Wtime(); #else retime = (double)clock() / CLOCKS_PER_SEC; #endif if(g_cart_id==0) fprintf(stdout, "contractions in %e seconds\n", retime-ratime); // save results #ifdef MPI ratime = MPI_Wtime(); #else ratime = (double)clock() / CLOCKS_PER_SEC; #endif if(outfile_prefix_set) { sprintf(filename, "%s/cvc_lvc_x.%.4d.t%.2dx%.2dy%.2dz%.2d", outfile_prefix, Nconf, sx0, sx1, sx2, sx3); } else { sprintf(filename, "cvc_lvc_x.%.4d.t%.2dx%.2dy%.2dz%.2d", Nconf, sx0, sx1, sx2, sx3); } sprintf(contype, "cvc - lvc in position space, all 16 components"); status = write_lime_contraction(conn, filename, 64, 16, contype, Nconf, 0); if(status != 0) { fprintf(stderr, "[] Error from write_lime_contractions, status was %d\n", status); exit(16); } if(outfile_prefix_set) { sprintf(filename, "%s/lvc_lvc_x.%.4d.t%.2dx%.2dy%.2dz%.2d", outfile_prefix, Nconf, sx0, sx1, sx2, sx3); } else { sprintf(filename, "lvc_lvc_x.%.4d.t%.2dx%.2dy%.2dz%.2d", Nconf, sx0, sx1, sx2, sx3); } sprintf(contype, "lvc - lvc in position space, all 16 components"); status = write_lime_contraction(conn2, filename, 64, 16, contype, Nconf, 0); if(status != 0) { fprintf(stderr, "[] Error from write_lime_contractions, status was %d\n", status); exit(17); } #ifndef MPI if(write_ascii) { if(outfile_prefix_set) { sprintf(filename, "%s/cvc_lvc_x.%.4d.ascii", outfile_prefix, Nconf); } else { sprintf(filename, "cvc_lvc_x.%.4d.ascii", Nconf); } write_contraction(conn, NULL, filename, 16, 2, 0); if(outfile_prefix_set) { sprintf(filename, "%s/lvc_lvc_x.%.4d.ascii", outfile_prefix, Nconf); } else { sprintf(filename, "lvc_lvc_x.%.4d.ascii", Nconf); } write_contraction(conn2, NULL, filename, 16, 2, 0); } #endif #ifdef MPI retime = MPI_Wtime(); #else retime = (double)clock() / CLOCKS_PER_SEC; #endif if(g_cart_id==0) fprintf(stdout, "saved position space results in %e seconds\n", retime-ratime); #ifndef MPI // check the Ward identity in position space if(check_position_space_WI) { sprintf(filename, "WI_X.%.4d", Nconf); ofs = fopen(filename,"w"); fprintf(stdout, "\n# [avc_exact2_lowmem_xspace] checking Ward identity in position space ...\n"); for(x0=0; x0<T; x0++) { for(x1=0; x1<LX; x1++) { for(x2=0; x2<LY; x2++) { for(x3=0; x3<LZ; x3++) { fprintf(ofs, "# t=%2d x=%2d y=%2d z=%2d\n", x0, x1, x2, x3); ix=g_ipt[x0][x1][x2][x3]; for(nu=0; nu<4; nu++) { w.re = conn[_GWI(4*0+nu,ix,VOLUME)] + conn[_GWI(4*1+nu,ix,VOLUME)] + conn[_GWI(4*2+nu,ix,VOLUME)] + conn[_GWI(4*3+nu,ix,VOLUME)] - conn[_GWI(4*0+nu,g_idn[ix][0],VOLUME)] - conn[_GWI(4*1+nu,g_idn[ix][1],VOLUME)] - conn[_GWI(4*2+nu,g_idn[ix][2],VOLUME)] - conn[_GWI(4*3+nu,g_idn[ix][3],VOLUME)]; w.im = conn[_GWI(4*0+nu,ix,VOLUME)+1] + conn[_GWI(4*1+nu,ix,VOLUME)+1] + conn[_GWI(4*2+nu,ix,VOLUME)+1] + conn[_GWI(4*3+nu,ix,VOLUME)+1] - conn[_GWI(4*0+nu,g_idn[ix][0],VOLUME)+1] - conn[_GWI(4*1+nu,g_idn[ix][1],VOLUME)+1] - conn[_GWI(4*2+nu,g_idn[ix][2],VOLUME)+1] - conn[_GWI(4*3+nu,g_idn[ix][3],VOLUME)+1]; fprintf(ofs, "\t%3d%25.16e%25.16e\n", nu, w.re, w.im); } }}}} fclose(ofs); } #endif /**************************************** * free the allocated memory, finalize ****************************************/ free(g_gauge_field); for(i=0; i<no_fields; i++) free(g_spinor_field[i]); free(g_spinor_field); free_geometry(); if(conn != NULL) free(conn); if(conn2 != NULL) free(conn2); #ifdef MPI free(status); MPI_Finalize(); #endif if(g_cart_id==0) { g_the_time = time(NULL); fprintf(stdout, "\n# [cvc_lvc_exact2_lowmem_xspace] %s# [cvc_lvc_exact2_lowmem_xspace] end of run\n", ctime(&g_the_time)); fprintf(stderr, "\n# [cvc_lvc_exact2_lowmem_xspace] %s# [cvc_lvc_exact2_lowmem_xspace] end of run\n", ctime(&g_the_time)); } return(0); }
int main(int argc,char *argv[]) { int j,j_max,k,k_max = 1; #ifdef HAVE_LIBLEMON paramsXlfInfo *xlfInfo; #endif int status = 0; static double t1,t2,dt,sdt,dts,qdt,sqdt; double antioptaway=0.0; #ifdef MPI static double dt2; DUM_DERI = 6; DUM_SOLVER = DUM_DERI+2; DUM_MATRIX = DUM_SOLVER+6; NO_OF_SPINORFIELDS = DUM_MATRIX+2; # ifdef OMP int mpi_thread_provided; MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_thread_provided); # else MPI_Init(&argc, &argv); # endif MPI_Comm_rank(MPI_COMM_WORLD, &g_proc_id); #else g_proc_id = 0; #endif g_rgi_C1 = 1.; /* Read the input file */ if((status = read_input("benchmark.input")) != 0) { fprintf(stderr, "Could not find input file: benchmark.input\nAborting...\n"); exit(-1); } #ifdef OMP if(omp_num_threads > 0) { omp_set_num_threads(omp_num_threads); } else { if( g_proc_id == 0 ) printf("# No value provided for OmpNumThreads, running in single-threaded mode!\n"); omp_num_threads = 1; omp_set_num_threads(omp_num_threads); } init_omp_accumulators(omp_num_threads); #endif tmlqcd_mpi_init(argc, argv); if(g_proc_id==0) { #ifdef SSE printf("# The code was compiled with SSE instructions\n"); #endif #ifdef SSE2 printf("# The code was compiled with SSE2 instructions\n"); #endif #ifdef SSE3 printf("# The code was compiled with SSE3 instructions\n"); #endif #ifdef P4 printf("# The code was compiled for Pentium4\n"); #endif #ifdef OPTERON printf("# The code was compiled for AMD Opteron\n"); #endif #ifdef _GAUGE_COPY printf("# The code was compiled with -D_GAUGE_COPY\n"); #endif #ifdef BGL printf("# The code was compiled for Blue Gene/L\n"); #endif #ifdef BGP printf("# The code was compiled for Blue Gene/P\n"); #endif #ifdef _USE_HALFSPINOR printf("# The code was compiled with -D_USE_HALFSPINOR\n"); #endif #ifdef _USE_SHMEM printf("# The code was compiled with -D_USE_SHMEM\n"); # ifdef _PERSISTENT printf("# The code was compiled for persistent MPI calls (halfspinor only)\n"); # endif #endif #ifdef MPI # ifdef _NON_BLOCKING printf("# The code was compiled for non-blocking MPI calls (spinor and gauge)\n"); # endif #endif printf("\n"); fflush(stdout); } #ifdef _GAUGE_COPY init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 1); #else init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 0); #endif init_geometry_indices(VOLUMEPLUSRAND + g_dbw2rand); if(even_odd_flag) { j = init_spinor_field(VOLUMEPLUSRAND/2, 2*k_max+1); } else { j = init_spinor_field(VOLUMEPLUSRAND, 2*k_max); } if ( j!= 0) { fprintf(stderr, "Not enough memory for spinor fields! Aborting...\n"); exit(0); } j = init_moment_field(VOLUME, VOLUMEPLUSRAND + g_dbw2rand); if ( j!= 0) { fprintf(stderr, "Not enough memory for moment fields! Aborting...\n"); exit(0); } if(g_proc_id == 0) { fprintf(stdout,"# The number of processes is %d \n",g_nproc); printf("# The lattice size is %d x %d x %d x %d\n", (int)(T*g_nproc_t), (int)(LX*g_nproc_x), (int)(LY*g_nproc_y), (int)(g_nproc_z*LZ)); printf("# The local lattice size is %d x %d x %d x %d\n", (int)(T), (int)(LX), (int)(LY),(int) LZ); if(even_odd_flag) { printf("# benchmarking the even/odd preconditioned Dirac operator\n"); } else { printf("# benchmarking the standard Dirac operator\n"); } fflush(stdout); } /* define the geometry */ geometry(); /* define the boundary conditions for the fermion fields */ boundary(g_kappa); #ifdef _USE_HALFSPINOR j = init_dirac_halfspinor(); if ( j!= 0) { fprintf(stderr, "Not enough memory for halfspinor fields! Aborting...\n"); exit(0); } if(g_sloppy_precision_flag == 1) { g_sloppy_precision = 1; j = init_dirac_halfspinor32(); if ( j!= 0) { fprintf(stderr, "Not enough memory for 32-Bit halfspinor fields! Aborting...\n"); exit(0); } } # if (defined _PERSISTENT) init_xchange_halffield(); # endif #endif status = check_geometry(); if (status != 0) { fprintf(stderr, "Checking of geometry failed. Unable to proceed.\nAborting....\n"); exit(1); } #if (defined MPI && !(defined _USE_SHMEM)) check_xchange(); #endif start_ranlux(1, 123456); random_gauge_field(reproduce_randomnumber_flag); #ifdef MPI /*For parallelization: exchange the gaugefield */ xchange_gauge(g_gauge_field); #endif if(even_odd_flag) { /*initialize the pseudo-fermion fields*/ j_max=2048; sdt=0.; for (k = 0; k < k_max; k++) { random_spinor_field(g_spinor_field[k], VOLUME/2, 0); } while(sdt < 30.) { #ifdef MPI MPI_Barrier(MPI_COMM_WORLD); #endif t1 = gettime(); antioptaway=0.0; for (j=0;j<j_max;j++) { for (k=0;k<k_max;k++) { Hopping_Matrix(0, g_spinor_field[k+k_max], g_spinor_field[k]); Hopping_Matrix(1, g_spinor_field[2*k_max], g_spinor_field[k+k_max]); antioptaway+=creal(g_spinor_field[2*k_max][0].s0.c0); } } t2 = gettime(); dt = t2-t1; #ifdef MPI MPI_Allreduce (&dt, &sdt, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); #else sdt = dt; #endif qdt=dt*dt; #ifdef MPI MPI_Allreduce (&qdt, &sqdt, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); #else sqdt = qdt; #endif sdt=sdt/((double)g_nproc); sqdt=sqrt(sqdt/g_nproc-sdt*sdt); j_max*=2; } j_max=j_max/2; dts=dt; sdt=1.0e6f*sdt/((double)(k_max*j_max*(VOLUME))); sqdt=1.0e6f*sqdt/((double)(k_max*j_max*(VOLUME))); if(g_proc_id==0) { printf("# The following result is just to make sure that the calculation is not optimized away: %e\n", antioptaway); printf("# Total compute time %e sec, variance of the time %e sec. (%d iterations).\n", sdt, sqdt, j_max); printf("# Communication switched on:\n# (%d Mflops [%d bit arithmetic])\n", (int)(1608.0f/sdt),(int)sizeof(spinor)/3); #ifdef OMP printf("# Mflops per OpenMP thread ~ %d\n",(int)(1608.0f/(omp_num_threads*sdt))); #endif printf("\n"); fflush(stdout); } #ifdef MPI /* isolated computation */ t1 = gettime(); antioptaway=0.0; for (j=0;j<j_max;j++) { for (k=0;k<k_max;k++) { Hopping_Matrix_nocom(0, g_spinor_field[k+k_max], g_spinor_field[k]); Hopping_Matrix_nocom(1, g_spinor_field[2*k_max], g_spinor_field[k+k_max]); antioptaway += creal(g_spinor_field[2*k_max][0].s0.c0); } } t2 = gettime(); dt2 = t2-t1; /* compute the bandwidth */ dt=dts-dt2; MPI_Allreduce (&dt, &sdt, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); sdt=sdt/((double)g_nproc); MPI_Allreduce (&dt2, &dt, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); dt=dt/((double)g_nproc); dt=1.0e6f*dt/((double)(k_max*j_max*(VOLUME))); if(g_proc_id==0) { printf("# The following result is printed just to make sure that the calculation is not optimized away: %e\n",antioptaway); printf("# Communication switched off: \n# (%d Mflops [%d bit arithmetic])\n", (int)(1608.0f/dt),(int)sizeof(spinor)/3); #ifdef OMP printf("# Mflops per OpenMP thread ~ %d\n",(int)(1608.0f/(omp_num_threads*dt))); #endif printf("\n"); fflush(stdout); } sdt=sdt/((double)k_max); sdt=sdt/((double)j_max); sdt=sdt/((double)(2*SLICE)); if(g_proc_id==0) { printf("# The size of the package is %d bytes.\n",(SLICE)*192); #ifdef _USE_HALFSPINOR printf("# The bandwidth is %5.2f + %5.2f MB/sec\n", 192./sdt/1024/1024, 192./sdt/1024./1024); #else printf("# The bandwidth is %5.2f + %5.2f MB/sec\n", 2.*192./sdt/1024/1024, 2.*192./sdt/1024./1024); #endif } #endif fflush(stdout); } else { /* the non even/odd case now */ /*initialize the pseudo-fermion fields*/ j_max=1; sdt=0.; for (k=0;k<k_max;k++) { random_spinor_field(g_spinor_field[k], VOLUME, 0); } while(sdt < 3.) { #ifdef MPI MPI_Barrier(MPI_COMM_WORLD); #endif t1 = gettime(); for (j=0;j<j_max;j++) { for (k=0;k<k_max;k++) { D_psi(g_spinor_field[k+k_max], g_spinor_field[k]); antioptaway+=creal(g_spinor_field[k+k_max][0].s0.c0); } } t2 = gettime(); dt=t2-t1; #ifdef MPI MPI_Allreduce (&dt, &sdt, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); #else sdt = dt; #endif qdt=dt*dt; #ifdef MPI MPI_Allreduce (&qdt, &sqdt, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); #else sqdt = qdt; #endif sdt=sdt/((double)g_nproc); sqdt=sqrt(sqdt/g_nproc-sdt*sdt); j_max*=2; } j_max=j_max/2; dts=dt; sdt=1.0e6f*sdt/((double)(k_max*j_max*(VOLUME))); sqdt=1.0e6f*sqdt/((double)(k_max*j_max*(VOLUME))); if(g_proc_id==0) { printf("# The following result is just to make sure that the calculation is not optimized away: %e\n", antioptaway); printf("# Total compute time %e sec, variance of the time %e sec. (%d iterations).\n", sdt, sqdt, j_max); printf("\n# (%d Mflops [%d bit arithmetic])\n", (int)(1680.0f/sdt),(int)sizeof(spinor)/3); #ifdef OMP printf("# Mflops per OpenMP thread ~ %d\n",(int)(1680.0f/(omp_num_threads*sdt))); #endif printf("\n"); fflush(stdout); } } #ifdef HAVE_LIBLEMON if(g_proc_id==0) { printf("# Performing parallel IO test ...\n"); } xlfInfo = construct_paramsXlfInfo(0.5, 0); write_gauge_field( "conf.test", 64, xlfInfo); free(xlfInfo); if(g_proc_id==0) { printf("# done ...\n"); } #endif #ifdef MPI MPI_Finalize(); #endif #ifdef OMP free_omp_accumulators(); #endif free_gauge_field(); free_geometry_indices(); free_spinor_field(); free_moment_field(); return(0); }
int main(int argc, char **argv) { int c, i, mu, status; int ispin, icol, isc; int n_c = 3; int n_s = 4; int count = 0; int filename_set = 0; int dims[4] = {0,0,0,0}; int grid_size[4]; int l_LX_at, l_LXstart_at; int x0, x1, x2, x3, ix, iix, iy, is, it, i3; int sl0, sl1, sl2, sl3, have_source_flag=0; int source_proc_coords[4], lsl0, lsl1, lsl2, lsl3; int check_residuum = 0; unsigned int VOL3, V5; int do_gt = 0; int full_orbit = 0; int smear_source = 0; char filename[200], source_filename[200], source_filename_write[200]; double ratime, retime; double plaq_r=0., plaq_m=0., norm, norm2; double spinor1[24]; double *gauge_qdp[4], *gauge_field_timeslice=NULL, *gauge_field_smeared=NULL; double _1_2_kappa, _2_kappa, phase; FILE *ofs; int mu_trans[4] = {3, 0, 1, 2}; int threadid, nthreads; int timeslice, source_timeslice; char rng_file_in[100], rng_file_out[100]; int *source_momentum=NULL; int source_momentum_class = -1; int source_momentum_no = 0; int source_momentum_runs = 1; int imom; int num_gpu_on_node=0, rank; int source_location_5d_iseven; int convert_sign=0; #ifdef HAVE_QUDA int rotate_gamma_basis = 1; #else int rotate_gamma_basis = 0; #endif omp_lock_t *lck = NULL, gen_lck[1]; int key = 0; /****************************************************************************/ /* for smearing parallel to inversion */ double *smearing_spinor_field[] = {NULL,NULL}; int dummy_flag = 0; /****************************************************************************/ /****************************************************************************/ #if (defined HAVE_QUDA) && (defined MULTI_GPU) int x_face_size, y_face_size, z_face_size, t_face_size, pad_size; #endif /****************************************************************************/ /************************************************/ int qlatt_nclass; int *qlatt_id=NULL, *qlatt_count=NULL, **qlatt_rep=NULL, **qlatt_map=NULL; double **qlatt_list=NULL; /************************************************/ /************************************************/ double boundary_condition_factor; int boundary_condition_factor_set = 0; /************************************************/ //#ifdef MPI // kernelPackT = true; //#endif /*********************************************** * QUDA parameters ***********************************************/ #ifdef HAVE_QUDA QudaPrecision cpu_prec = QUDA_DOUBLE_PRECISION; QudaPrecision cuda_prec = QUDA_DOUBLE_PRECISION; QudaPrecision cuda_prec_sloppy = QUDA_SINGLE_PRECISION; QudaGaugeParam gauge_param = newQudaGaugeParam(); QudaInvertParam inv_param = newQudaInvertParam(); #endif while ((c = getopt(argc, argv, "soch?vgf:p:b:S:R:")) != -1) { switch (c) { case 'v': g_verbose = 1; break; case 'g': do_gt = 1; break; case 'f': strcpy(filename, optarg); filename_set=1; break; case 'c': check_residuum = 1; fprintf(stdout, "# [invert_dw_quda] will check residuum again\n"); break; case 'p': n_c = atoi(optarg); fprintf(stdout, "# [invert_dw_quda] will use number of colors = %d\n", n_c); break; case 'o': full_orbit = 1; fprintf(stdout, "# [invert_dw_quda] will invert for full orbit, if source momentum set\n"); case 's': smear_source = 1; fprintf(stdout, "# [invert_dw_quda] will smear the sources if they are read from file\n"); break; case 'b': boundary_condition_factor = atof(optarg); boundary_condition_factor_set = 1; fprintf(stdout, "# [invert_dw_quda] const. boundary condition factor set to %e\n", boundary_condition_factor); break; case 'S': convert_sign = atoi(optarg); fprintf(stdout, "# [invert_dw_quda] using convert sign %d\n", convert_sign); break; case 'R': rotate_gamma_basis = atoi(optarg); fprintf(stdout, "# [invert_dw_quda] rotate gamma basis %d\n", rotate_gamma_basis); break; case 'h': case '?': default: usage(); break; } } // get the time stamp g_the_time = time(NULL); /************************************** * set the default values, read input **************************************/ if(filename_set==0) strcpy(filename, "cvc.input"); if(g_proc_id==0) fprintf(stdout, "# Reading input from file %s\n", filename); read_input_parser(filename); #ifdef MPI #ifdef HAVE_QUDA grid_size[0] = g_nproc_x; grid_size[1] = g_nproc_y; grid_size[2] = g_nproc_z; grid_size[3] = g_nproc_t; fprintf(stdout, "# [] g_nproc = (%d,%d,%d,%d)\n", g_nproc_x, g_nproc_y, g_nproc_z, g_nproc_t); initCommsQuda(argc, argv, grid_size, 4); #else MPI_Init(&argc, &argv); #endif #endif #if (defined PARALLELTX) || (defined PARALLELTXY) EXIT_WITH_MSG(1, "[] Error, 2-dim./3-dim. MPI-Version not yet implemented"); #endif // some checks on the input data if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) { if(g_proc_id==0) fprintf(stderr, "[invert_dw_quda] Error, T and L's must be set\n"); usage(); } // set number of openmp threads // initialize MPI parameters mpi_init(argc, argv); // the volume of a timeslice VOL3 = LX*LY*LZ; V5 = T*LX*LY*LZ*L5; g_kappa5d = 0.5 / (5. + g_m5); if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] kappa5d = %e\n", g_kappa5d); fprintf(stdout, "# [%2d] parameters:\n"\ "# [%2d] T = %3d\n"\ "# [%2d] Tstart = %3d\n"\ "# [%2d] L5 = %3d\n",\ g_cart_id, g_cart_id, T, g_cart_id, Tstart, g_cart_id, L5); #ifdef MPI if(T==0) { fprintf(stderr, "[%2d] local T is zero; exit\n", g_cart_id); MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); exit(2); } #endif if(init_geometry() != 0) { fprintf(stderr, "[invert_dw_quda] Error from init_geometry\n"); EXIT(1); } geometry(); if( init_geometry_5d() != 0 ) { fprintf(stderr, "[invert_dw_quda] Error from init_geometry_5d\n"); EXIT(2); } geometry_5d(); /************************************** * initialize the QUDA library **************************************/ if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] initializing quda\n"); #ifdef HAVE_QUDA // cudaGetDeviceCount(&num_gpu_on_node); if(g_gpu_per_node<0) { if(g_cart_id==0) fprintf(stderr, "[] Error, number of GPUs per node not set\n"); EXIT(106); } else { num_gpu_on_node = g_gpu_per_node; } #ifdef MPI rank = comm_rank(); #else rank = 0; #endif g_gpu_device_number = rank % num_gpu_on_node; fprintf(stdout, "# [] process %d/%d uses device %d\n", rank, g_cart_id, g_gpu_device_number); initQuda(g_gpu_device_number); #endif /************************************** * prepare the gauge field **************************************/ // read the gauge field from file alloc_gauge_field(&g_gauge_field, VOLUMEPLUSRAND); if(strcmp( gaugefilename_prefix, "identity")==0 ) { if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] Setting up unit gauge field\n"); for(ix=0;ix<VOLUME; ix++) { for(mu=0;mu<4;mu++) { _cm_eq_id(g_gauge_field+_GGI(ix,mu)); } } } else if(strcmp( gaugefilename_prefix, "random")==0 ) { if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] Setting up random gauge field with seed = %d\n", g_seed); init_rng_state(g_seed, &g_rng_state); random_gauge_field(g_gauge_field, 1.); plaquette(&plaq_m); sprintf(filename, "%s.%.4d", gaugefilename_prefix, Nconf); check_error(write_lime_gauge_field(filename, plaq_m, Nconf, 64), "write_lime_gauge_field", NULL, 12); } else { if(g_gauge_file_format == 0) { // ILDG sprintf(filename, "%s.%.4d", gaugefilename_prefix, Nconf); if(g_cart_id==0) fprintf(stdout, "# Reading gauge field from file %s\n", filename); status = read_lime_gauge_field_doubleprec(filename); } else if(g_gauge_file_format == 1) { // NERSC sprintf(filename, "%s.%.5d", gaugefilename_prefix, Nconf); if(g_cart_id==0) fprintf(stdout, "# Reading gauge field from file %s\n", filename); status = read_nersc_gauge_field(g_gauge_field, filename, &plaq_r); //status = read_nersc_gauge_field_3x3(g_gauge_field, filename, &plaq_r); } if(status != 0) { fprintf(stderr, "[invert_dw_quda] Error, could not read gauge field"); EXIT(12); } } #ifdef MPI xchange_gauge(); #endif // measure the plaquette plaquette(&plaq_m); if(g_cart_id==0) fprintf(stdout, "# Measured plaquette value: %25.16e\n", plaq_m); if(g_cart_id==0) fprintf(stdout, "# Read plaquette value : %25.16e\n", plaq_r); #ifndef HAVE_QUDA if(N_Jacobi>0) { #endif // allocate the smeared / qdp ordered gauge field alloc_gauge_field(&gauge_field_smeared, VOLUMEPLUSRAND); for(i=0;i<4;i++) { gauge_qdp[i] = gauge_field_smeared + i*18*VOLUME; } #ifndef HAVE_QUDA } #endif #ifdef HAVE_QUDA // transcribe the gauge field omp_set_num_threads(g_num_threads); #pragma omp parallel for private(ix,iy,mu) for(ix=0;ix<VOLUME;ix++) { iy = g_lexic2eot[ix]; for(mu=0;mu<4;mu++) { _cm_eq_cm(gauge_qdp[mu_trans[mu]]+18*iy, g_gauge_field+_GGI(ix,mu)); } } // multiply timeslice T-1 with factor of -1 (antiperiodic boundary condition) if(g_proc_coords[0]==g_nproc_t-1) { if(!boundary_condition_factor_set) boundary_condition_factor = -1.; fprintf(stdout, "# [] process %d multiplies gauge-field timeslice T_global-1 with boundary condition factor %e\n", g_cart_id, boundary_condition_factor); omp_set_num_threads(g_num_threads); #pragma omp parallel for private(ix,iy) for(ix=0;ix<VOL3;ix++) { iix = (T-1)*VOL3 + ix; iy = g_lexic2eot[iix]; _cm_ti_eq_re(gauge_qdp[mu_trans[0]]+18*iy, -1.); } } // QUDA precision parameters switch(g_cpu_prec) { case 0: cpu_prec = QUDA_HALF_PRECISION; if(g_cart_id==0) fprintf(stdout, "# [] CPU prec = half\n"); break; case 1: cpu_prec = QUDA_SINGLE_PRECISION; if(g_cart_id==0) fprintf(stdout, "# [] CPU prec = single\n"); break; case 2: cpu_prec = QUDA_DOUBLE_PRECISION; if(g_cart_id==0) fprintf(stdout, "# [] CPU prec = double\n"); break; default: cpu_prec = QUDA_DOUBLE_PRECISION; break; } switch(g_gpu_prec) { case 0: cuda_prec = QUDA_HALF_PRECISION; if(g_cart_id==0) fprintf(stdout, "# [] GPU prec = half\n"); break; case 1: cuda_prec = QUDA_SINGLE_PRECISION; if(g_cart_id==0) fprintf(stdout, "# [] GPU prec = single\n"); break; case 2: cuda_prec = QUDA_DOUBLE_PRECISION; if(g_cart_id==0) fprintf(stdout, "# [] GPU prec = double\n"); break; default: cuda_prec = QUDA_DOUBLE_PRECISION; break; } switch(g_gpu_prec_sloppy) { case 0: cuda_prec_sloppy = QUDA_HALF_PRECISION; if(g_cart_id==0) fprintf(stdout, "# [] GPU sloppy prec = half\n"); break; case 1: cuda_prec_sloppy = QUDA_SINGLE_PRECISION; if(g_cart_id==0) fprintf(stdout, "# [] GPU sloppy prec = single\n"); break; case 2: cuda_prec_sloppy = QUDA_DOUBLE_PRECISION; if(g_cart_id==0) fprintf(stdout, "# [] GPU sloppy prec = double\n"); break; default: cuda_prec_sloppy = QUDA_SINGLE_PRECISION; break; } // QUDA gauge parameters gauge_param.X[0] = LX; gauge_param.X[1] = LY; gauge_param.X[2] = LZ; gauge_param.X[3] = T; inv_param.Ls = L5; gauge_param.anisotropy = 1.0; gauge_param.type = QUDA_WILSON_LINKS; gauge_param.gauge_order = QUDA_QDP_GAUGE_ORDER; gauge_param.t_boundary = QUDA_ANTI_PERIODIC_T; gauge_param.cpu_prec = cpu_prec; gauge_param.cuda_prec = cuda_prec; gauge_param.reconstruct = QUDA_RECONSTRUCT_12; gauge_param.cuda_prec_sloppy = cuda_prec_sloppy; gauge_param.reconstruct_sloppy = QUDA_RECONSTRUCT_12; gauge_param.gauge_fix = QUDA_GAUGE_FIXED_NO; gauge_param.ga_pad = 0; inv_param.sp_pad = 0; inv_param.cl_pad = 0; // For multi-GPU, ga_pad must be large enough to store a time-slice #ifdef MULTI_GPU x_face_size = inv_param.Ls * gauge_param.X[1]*gauge_param.X[2]*gauge_param.X[3]/2; y_face_size = inv_param.Ls * gauge_param.X[0]*gauge_param.X[2]*gauge_param.X[3]/2; z_face_size = inv_param.Ls * gauge_param.X[0]*gauge_param.X[1]*gauge_param.X[3]/2; t_face_size = inv_param.Ls * gauge_param.X[0]*gauge_param.X[1]*gauge_param.X[2]/2; pad_size = _MAX(x_face_size, y_face_size); pad_size = _MAX(pad_size, z_face_size); pad_size = _MAX(pad_size, t_face_size); gauge_param.ga_pad = pad_size; if(g_cart_id==0) printf("# [invert_dw_quda] pad_size = %d\n", pad_size); #endif // load the gauge field if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] loading gauge field\n"); loadGaugeQuda((void*)gauge_qdp, &gauge_param); gauge_qdp[0] = NULL; gauge_qdp[1] = NULL; gauge_qdp[2] = NULL; gauge_qdp[3] = NULL; #endif /********************************************* * APE smear the gauge field *********************************************/ if(N_Jacobi>0) { memcpy(gauge_field_smeared, g_gauge_field, 72*VOLUMEPLUSRAND*sizeof(double)); fprintf(stdout, "# [invert_dw_quda] APE smearing gauge field with paramters N_APE=%d, alpha_APE=%e\n", N_ape, alpha_ape); APE_Smearing_Step_threads(gauge_field_smeared, N_ape, alpha_ape); xchange_gauge_field(gauge_field_smeared); } // allocate memory for the spinor fields #ifdef HAVE_QUDA no_fields = 3+2; #else no_fields = 6+2; #endif g_spinor_field = (double**)calloc(no_fields, sizeof(double*)); for(i=0; i<no_fields; i++) alloc_spinor_field(&g_spinor_field[i], VOLUMEPLUSRAND*L5); smearing_spinor_field[0] = g_spinor_field[no_fields-2]; smearing_spinor_field[1] = g_spinor_field[no_fields-1]; switch(g_source_type) { case 0: case 5: // the source locaton sl0 = g_source_location / (LX_global*LY_global*LZ); sl1 = ( g_source_location % (LX_global*LY_global*LZ) ) / ( LY_global*LZ); sl2 = ( g_source_location % ( LY_global*LZ) ) / ( LZ); sl3 = g_source_location % LZ; if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] global sl = (%d, %d, %d, %d)\n", sl0, sl1, sl2, sl3); source_proc_coords[0] = sl0 / T; source_proc_coords[1] = sl1 / LX; source_proc_coords[2] = sl2 / LY; source_proc_coords[3] = sl3 / LZ; #ifdef MPI MPI_Cart_rank(g_cart_grid, source_proc_coords, &g_source_proc_id); #else g_source_proc_id = 0; #endif have_source_flag = g_source_proc_id == g_cart_id; lsl0 = sl0 % T; lsl1 = sl1 % LX; lsl2 = sl2 % LY; lsl3 = sl3 % LZ; if(have_source_flag) { fprintf(stdout, "# [invert_dw_quda] process %d has the source at (%d, %d, %d, %d)\n", g_cart_id, lsl0, lsl1, lsl2, lsl3); } break; case 2: case 3: case 4: // the source timeslice #ifdef MPI source_proc_coords[0] = g_source_timeslice / T; source_proc_coords[1] = 0; source_proc_coords[2] = 0; source_proc_coords[3] = 0; MPI_Cart_rank(g_cart_grid, source_proc_coords, &g_source_proc_id); have_source_flag = ( g_source_proc_id == g_cart_id ); source_timeslice = have_source_flag ? g_source_timeslice % T : -1; #else g_source_proc_id = 0; have_source_flag = 1; source_timeslice = g_source_timeslice; #endif break; } #ifdef HAVE_QUDA /************************************************************* * QUDA inverter parameters *************************************************************/ inv_param.dslash_type = QUDA_DOMAIN_WALL_DSLASH; if(strcmp(g_inverter_type_name, "cg") == 0) { inv_param.inv_type = QUDA_CG_INVERTER; if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] using cg inverter\n"); } else if(strcmp(g_inverter_type_name, "bicgstab") == 0) { inv_param.inv_type = QUDA_BICGSTAB_INVERTER; if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] using bicgstab inverter\n"); #ifdef MULTI_GPU } else if(strcmp(g_inverter_type_name, "gcr") == 0) { inv_param.inv_type = QUDA_GCR_INVERTER; if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] using gcr inverter\n"); #endif } else { if(g_cart_id==0) fprintf(stderr, "[invert_dw_quda] Error, unrecognized inverter type %s\n", g_inverter_type_name); EXIT(123); } if(inv_param.inv_type == QUDA_CG_INVERTER) { inv_param.solution_type = QUDA_MAT_SOLUTION; inv_param.solve_type = QUDA_NORMEQ_PC_SOLVE; } else if(inv_param.inv_type == QUDA_BICGSTAB_INVERTER) { inv_param.solution_type = QUDA_MAT_SOLUTION; inv_param.solve_type = QUDA_DIRECT_PC_SOLVE; } else { inv_param.solution_type = QUDA_MATPC_SOLUTION; inv_param.solve_type = QUDA_DIRECT_PC_SOLVE; } inv_param.m5 = g_m5; inv_param.kappa = 0.5 / (5. + inv_param.m5); inv_param.mass = g_m0; inv_param.tol = solver_precision; inv_param.maxiter = niter_max; inv_param.reliable_delta = reliable_delta; #ifdef MPI // domain decomposition preconditioner parameters if(inv_param.inv_type == QUDA_GCR_INVERTER) { if(g_cart_id == 0) printf("# [] settup DD parameters\n"); inv_param.gcrNkrylov = 15; inv_param.inv_type_precondition = QUDA_MR_INVERTER; inv_param.tol_precondition = 1e-6; inv_param.maxiter_precondition = 200; inv_param.verbosity_precondition = QUDA_VERBOSE; inv_param.prec_precondition = cuda_prec_sloppy; inv_param.omega = 0.7; } #endif inv_param.matpc_type = QUDA_MATPC_EVEN_EVEN; inv_param.dagger = QUDA_DAG_NO; inv_param.mass_normalization = QUDA_KAPPA_NORMALIZATION; //;QUDA_MASS_NORMALIZATION; inv_param.cpu_prec = cpu_prec; inv_param.cuda_prec = cuda_prec; inv_param.cuda_prec_sloppy = cuda_prec_sloppy; inv_param.verbosity = QUDA_VERBOSE; inv_param.preserve_source = QUDA_PRESERVE_SOURCE_NO; inv_param.dirac_order = QUDA_DIRAC_ORDER; #ifdef MPI inv_param.preserve_dirac = QUDA_PRESERVE_DIRAC_YES; inv_param.prec_precondition = cuda_prec_sloppy; inv_param.gamma_basis = QUDA_DEGRAND_ROSSI_GAMMA_BASIS; inv_param.dirac_tune = QUDA_TUNE_NO; #endif #endif /******************************************* * write initial rng state to file *******************************************/ if( g_source_type==2 && g_coherent_source==2 ) { sprintf(rng_file_out, "%s.0", g_rng_filename); status = init_rng_stat_file (g_seed, rng_file_out); if( status != 0 ) { fprintf(stderr, "[invert_dw_quda] Error, could not write rng status\n"); EXIT(210); } } else if( (g_source_type==2 /*&& g_coherent_source==1*/) || g_source_type==3 || g_source_type==4) { if( init_rng_state(g_seed, &g_rng_state) != 0 ) { fprintf(stderr, "[invert_dw_quda] Error, could initialize rng state\n"); EXIT(211); } } /******************************************* * prepare locks for openmp *******************************************/ nthreads = g_num_threads - 1; lck = (omp_lock_t*)malloc(nthreads * sizeof(omp_lock_t)); if(lck == NULL) { EXIT_WITH_MSG(97, "[invert_dw_quda] Error, could not allocate lck\n"); } // init locks for(i=0;i<nthreads;i++) { omp_init_lock(lck+i); } omp_init_lock(gen_lck); // check the source momenta if(g_source_momentum_set) { source_momentum = (int*)malloc(3*sizeof(int)); if(g_source_momentum[0]<0) g_source_momentum[0] += LX_global; if(g_source_momentum[1]<0) g_source_momentum[1] += LY_global; if(g_source_momentum[2]<0) g_source_momentum[2] += LZ_global; fprintf(stdout, "# [invert_dw_quda] using final source momentum ( %d, %d, %d )\n", g_source_momentum[0], g_source_momentum[1], g_source_momentum[2]); if(full_orbit) { status = make_qcont_orbits_3d_parity_avg( &qlatt_id, &qlatt_count, &qlatt_list, &qlatt_nclass, &qlatt_rep, &qlatt_map); if(status != 0) { if(g_cart_id==0) fprintf(stderr, "\n[invert_dw_quda] Error while creating O_3-lists\n"); EXIT(4); } source_momentum_class = qlatt_id[g_ipt[0][g_source_momentum[0]][g_source_momentum[1]][g_source_momentum[2]]]; source_momentum_no = qlatt_count[source_momentum_class]; source_momentum_runs = source_momentum_class==0 ? 1 : source_momentum_no + 1; if(g_cart_id==0) fprintf(stdout, "# [] source momentum belongs to class %d with %d members, which means %d runs\n", source_momentum_class, source_momentum_no, source_momentum_runs); } } if(g_source_type == 5) { if(g_seq_source_momentum_set) { if(g_seq_source_momentum[0]<0) g_seq_source_momentum[0] += LX_global; if(g_seq_source_momentum[1]<0) g_seq_source_momentum[1] += LY_global; if(g_seq_source_momentum[2]<0) g_seq_source_momentum[2] += LZ_global; } else if(g_source_momentum_set) { g_seq_source_momentum[0] = g_source_momentum[0]; g_seq_source_momentum[1] = g_source_momentum[1]; g_seq_source_momentum[2] = g_source_momentum[2]; } fprintf(stdout, "# [invert_dw_quda] using final sequential source momentum ( %d, %d, %d )\n", g_seq_source_momentum[0], g_seq_source_momentum[1], g_seq_source_momentum[2]); } /*********************************************** * loop on spin-color-index ***********************************************/ for(isc=g_source_index[0]; isc<=g_source_index[1]; isc++) // for(isc=g_source_index[0]; isc<=g_source_index[0]; isc++) { ispin = isc / n_c; icol = isc % n_c; for(imom=0; imom<source_momentum_runs; imom++) { /*********************************************** * set source momentum ***********************************************/ if(g_source_momentum_set) { if(imom == 0) { if(full_orbit) { source_momentum[0] = 0; source_momentum[1] = 0; source_momentum[2] = 0; } else { source_momentum[0] = g_source_momentum[0]; source_momentum[1] = g_source_momentum[1]; source_momentum[2] = g_source_momentum[2]; } } else { source_momentum[0] = qlatt_map[source_momentum_class][imom-1] / (LY_global*LZ_global); source_momentum[1] = ( qlatt_map[source_momentum_class][imom-1] % (LY_global*LZ_global) ) / LZ_global; source_momentum[2] = qlatt_map[source_momentum_class][imom-1] % LZ_global; } if(g_cart_id==0) fprintf(stdout, "# [] run no. %d, source momentum (%d, %d, %d)\n", imom, source_momentum[0], source_momentum[1], source_momentum[2]); } /*********************************************** * prepare the souce ***********************************************/ if(g_read_source == 0) { // create source switch(g_source_type) { case 0: // point source if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] Creating point source\n"); for(ix=0;ix<L5*VOLUME;ix++) { _fv_eq_zero(g_spinor_field[0]+ix); } if(have_source_flag) { if(g_source_momentum_set) { phase = 2*M_PI*( source_momentum[0]*sl1/(double)LX_global + source_momentum[1]*sl2/(double)LY_global + source_momentum[2]*sl3/(double)LZ_global ); g_spinor_field[0][_GSI(g_ipt[lsl0][lsl1][lsl2][lsl3]) + 2*(n_c*ispin+icol) ] = cos(phase); g_spinor_field[0][_GSI(g_ipt[lsl0][lsl1][lsl2][lsl3]) + 2*(n_c*ispin+icol)+1] = sin(phase); } else { g_spinor_field[0][_GSI(g_ipt[lsl0][lsl1][lsl2][lsl3]) + 2*(n_c*ispin+icol) ] = 1.; } } if(g_source_momentum_set) { sprintf(source_filename, "%s.%.4d.t%.2dx%.2dy%.2dz%.2d.%.2d.qx%.2dqy%.2dqz%.2d", filename_prefix, Nconf, sl0, sl1, sl2, sl3, n_c*ispin+icol, source_momentum[0], source_momentum[1], source_momentum[2]); } else { sprintf(source_filename, "%s.%.4d.t%.2dx%.2dy%.2dz%.2d.%.2d", filename_prefix, Nconf, sl0, sl1, sl2, sl3, n_c*ispin+icol); } #ifdef HAVE_QUDA // set matpc_tpye source_location_5d_iseven = ( (g_iseven[g_ipt[lsl0][lsl1][lsl2][lsl3]] && ispin<n_s/2) || (!g_iseven[g_ipt[lsl0][lsl1][lsl2][lsl3]] && ispin>=n_s/2) ) ? 1 : 0; if(source_location_5d_iseven) { inv_param.matpc_type = QUDA_MATPC_EVEN_EVEN; if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] matpc type is MATPC_EVEN_EVEN\n"); } else { inv_param.matpc_type = QUDA_MATPC_ODD_ODD; if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] matpc type is MATPC_ODD_ODD\n"); } #endif break; case 2: // timeslice source if(g_coherent_source==1) { if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] Creating coherent timeslice source\n"); status = prepare_coherent_timeslice_source(g_spinor_field[0], gauge_field_smeared, g_coherent_source_base, g_coherent_source_delta, VOLUME, g_rng_state, 1); if(status != 0) { fprintf(stderr, "[invert_dw_quda] Error from prepare source, status was %d\n", status); #ifdef MPI MPI_Abort(MPI_COMM_WORLD, 123); MPI_Finalize(); #endif exit(123); } check_error(prepare_coherent_timeslice_source(g_spinor_field[0], gauge_field_smeared, g_coherent_source_base, g_coherent_source_delta, VOLUME, g_rng_state, 1), "prepare_coherent_timeslice_source", NULL, 123); timeslice = g_coherent_source_base; } else { if(g_coherent_source==2) { timeslice = (g_coherent_source_base+isc*g_coherent_source_delta)%T_global; fprintf(stdout, "# [invert_dw_quda] Creating timeslice source\n"); check_error(prepare_timeslice_source(g_spinor_field[0], gauge_field_smeared, timeslice, VOLUME, g_rng_state, 1), "prepare_timeslice_source", NULL, 123); } else { if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] Creating timeslice source\n"); check_error(prepare_timeslice_source(g_spinor_field[0], gauge_field_smeared, g_source_timeslice, VOLUME, g_rng_state, 1), "prepare_timeslice_source", NULL, 124); timeslice = g_source_timeslice; } } if(g_source_momentum_set) { sprintf(source_filename, "%s.%.4d.%.2d.%.5d.qx%.2dqy%.2dqz%.2d", filename_prefix, Nconf, timeslice, isc, source_momentum[0], source_momentum[1], source_momentum[2]); } else { sprintf(source_filename, "%s.%.4d.%.2d.%.5d", filename_prefix, Nconf, timeslice, isc); } break; case 3: // timeslice sources for one-end trick (spin dilution) fprintf(stdout, "# [invert_dw_quda] Creating timeslice source for one-end-trick\n"); check_error( prepare_timeslice_source_one_end(g_spinor_field[0], gauge_field_smeared, source_timeslice, source_momentum, isc%n_s, g_rng_state, \ ( isc%n_s==(n_s-1) && imom==source_momentum_runs-1 )), "prepare_timeslice_source_one_end", NULL, 125 ); c = N_Jacobi > 0 ? isc%n_s + n_s : isc%n_s; if(g_source_momentum_set) { sprintf(source_filename, "%s.%.4d.%.2d.%.2d.qx%.2dqy%.2dqz%.2d", filename_prefix, Nconf, g_source_timeslice, c, source_momentum[0], source_momentum[1], source_momentum[2]); } else { sprintf(source_filename, "%s.%.4d.%.2d.%.2d", filename_prefix, Nconf, g_source_timeslice, c); } break; case 4: // timeslice sources for one-end trick (spin and color dilution ) fprintf(stdout, "# [invert_dw_quda] Creating timeslice source for one-end-trick\n"); check_error(prepare_timeslice_source_one_end_color(g_spinor_field[0], gauge_field_smeared, source_timeslice, source_momentum,\ isc%(n_s*n_c), g_rng_state, ( isc%(n_s*n_c)==(n_s*n_c-1) && imom==source_momentum_runs-1 )), "prepare_timeslice_source_one_end_color", NULL, 126); c = N_Jacobi > 0 ? isc%(n_s*n_c) + (n_s*n_c) : isc%(n_s*n_c); if(g_source_momentum_set) { sprintf(source_filename, "%s.%.4d.%.2d.%.2d.qx%.2dqy%.2dqz%.2d", filename_prefix, Nconf, g_source_timeslice, c, source_momentum[0], source_momentum[1], source_momentum[2]); } else { sprintf(source_filename, "%s.%.4d.%.2d.%.2d", filename_prefix, Nconf, g_source_timeslice, c); } break; case 5: if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] preparing sequential point source\n"); check_error( prepare_sequential_point_source (g_spinor_field[0], isc, sl0, g_seq_source_momentum, smear_source, g_spinor_field[1], gauge_field_smeared), "prepare_sequential_point_source", NULL, 33); sprintf(source_filename, "%s.%.4d.t%.2dx%.2d.y%.2d.z%.2d.%.2d.qx%.2dqy%.2dqz%.2d", filename_prefix2, Nconf, sl0, sl1, sl2, sl3, isc, g_source_momentum[0], g_source_momentum[1], g_source_momentum[2]); break; default: fprintf(stderr, "\nError, unrecognized source type\n"); exit(32); break; } } else { // read source switch(g_source_type) { case 0: // point source if(g_source_momentum_set) { sprintf(source_filename, "%s.%.4d.t%.2dx%.2dy%.2dz%.2d.%.2d.qx%.2dqy%.2dqz%.2d", \ filename_prefix2, Nconf, sl0, sl1, sl2, sl3, isc, source_momentum[0], source_momentum[1], source_momentum[2]); } else { sprintf(source_filename, "%s.%.4d.t%.2dx%.2dy%.2dz%.2d.%.2d", filename_prefix2, Nconf, sl0, sl1, sl2, sl3, isc); } fprintf(stdout, "# [invert_dw_quda] reading source from file %s\n", source_filename); check_error(read_lime_spinor(g_spinor_field[0], source_filename, 0), "read_lime_spinor", NULL, 115); break; case 2: // timeslice source if(g_source_momentum_set) { sprintf(source_filename, "%s.%.4d.%.2d.%.5d.qx%.2dqy%.2dqz%.2d", filename_prefix2, Nconf, g_source_timeslice, isc, source_momentum[0], source_momentum[1], source_momentum[2]); } else { sprintf(source_filename, "%s.%.4d.%.2d.%.5d", filename_prefix2, Nconf, g_source_timeslice, isc); } fprintf(stdout, "# [invert_dw_quda] reading source from file %s\n", source_filename); check_error(read_lime_spinor(g_spinor_field[0], source_filename, 0), "read_lime_spinor", NULL, 115); break; default: check_error(1, "source type", NULL, 104); break; case -1: // timeslice source sprintf(source_filename, "%s", filename_prefix2); fprintf(stdout, "# [invert_dw_quda] reading source from file %s\n", source_filename); check_error(read_lime_spinor(g_spinor_field[0], source_filename, 0), "read_lime_spinor", NULL, 115); break; } } // of if g_read_source if(g_write_source) { check_error(write_propagator(g_spinor_field[0], source_filename, 0, g_propagator_precision), "write_propagator", NULL, 27); } /*********************************************************************************************** * here threads split: ***********************************************************************************************/ if(dummy_flag==0) strcpy(source_filename_write, source_filename); memcpy((void*)(smearing_spinor_field[0]), (void*)(g_spinor_field[0]), 24*VOLUME*sizeof(double)); if(dummy_flag>0) { // copy only if smearing has been done; otherwise do not copy, do not invert if(g_cart_id==0) fprintf(stdout, "# [] copy smearing field -> g field\n"); memcpy((void*)(g_spinor_field[0]), (void*)(smearing_spinor_field[1]), 24*VOLUME*sizeof(double)); } omp_set_num_threads(g_num_threads); #pragma omp parallel private(threadid, _2_kappa, is, ix, iy, iix, ratime, retime) shared(key,g_read_source, smear_source, N_Jacobi, kappa_Jacobi, smearing_spinor_field, g_spinor_field, nthreads, convert_sign, VOLUME, VOL3, T, L5, isc, rotate_gamma_basis, g_cart_id) firstprivate(inv_param, gauge_param, ofs) { threadid = omp_get_thread_num(); if(threadid < nthreads) { fprintf(stdout, "# [] proc%.2d thread%.2d starting source preparation\n", g_cart_id, threadid); // smearing if( ( !g_read_source || (g_read_source && smear_source ) ) && N_Jacobi > 0 ) { if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] smearing source with N_Jacobi=%d, kappa_Jacobi=%e\n", N_Jacobi, kappa_Jacobi); Jacobi_Smearing_threaded(gauge_field_smeared, smearing_spinor_field[0], smearing_spinor_field[1], kappa_Jacobi, N_Jacobi, threadid, nthreads); } /*********************************************** * create the 5-dim. source field ***********************************************/ if(convert_sign == 0) { spinor_4d_to_5d_threaded(smearing_spinor_field[0], smearing_spinor_field[0], threadid, nthreads); } else if(convert_sign == 1 || convert_sign == -1) { spinor_4d_to_5d_sign_threaded(smearing_spinor_field[0], smearing_spinor_field[0], convert_sign, threadid, nthreads); } for(is=0; is<L5; is++) { for(it=threadid; it<T; it+=nthreads) { memcpy((void*)(g_spinor_field[0]+_GSI(g_ipt_5d[is][it][0][0][0])), (void*)(smearing_spinor_field[0]+_GSI(g_ipt_5d[is][it][0][0][0])), VOL3*24*sizeof(double)); } } // reorder, multiply with g2 for(is=0; is<L5; is++) { for(it=threadid; it<T; it+=nthreads) { for(i3=0; i3<VOL3; i3++) { ix = (is*T+it)*VOL3 + i3; _fv_eq_zero(smearing_spinor_field[1]+_GSI(ix)); }}} if(rotate_gamma_basis) { for(it=threadid; it<T; it+=nthreads) { for(i3=0; i3<VOL3; i3++) { ix = it * VOL3 + i3; iy = lexic2eot_5d(0, ix); _fv_eq_gamma_ti_fv(smearing_spinor_field[1]+_GSI(iy), 2, smearing_spinor_field[0]+_GSI(ix)); }} for(it=threadid; it<T; it+=nthreads) { for(i3=0; i3<VOL3; i3++) { ix = it * VOL3 + i3; iy = lexic2eot_5d(L5-1, ix); _fv_eq_gamma_ti_fv(smearing_spinor_field[1]+_GSI(iy), 2, smearing_spinor_field[0]+_GSI(ix+(L5-1)*VOLUME)); }} } else { for(it=threadid; it<T; it+=nthreads) { for(i3=0; i3<VOL3; i3++) { ix = it * VOL3 + i3; iy = lexic2eot_5d(0, ix); _fv_eq_fv(smearing_spinor_field[1]+_GSI(iy), smearing_spinor_field[0]+_GSI(ix)); }} for(it=threadid; it<T; it+=nthreads) { for(i3=0; i3<VOL3; i3++) { ix = it * VOL3 + i3; iy = lexic2eot_5d(L5-1, ix); _fv_eq_fv(smearing_spinor_field[1]+_GSI(iy), smearing_spinor_field[0]+_GSI(ix+(L5-1)*VOLUME)); }} } fprintf(stdout, "# [] proc%.2d thread%.2d finished source preparation\n", g_cart_id, threadid); } else if(threadid == g_num_threads-1 && dummy_flag > 0) { // else branch on threadid fprintf(stdout, "# [] proc%.2d thread%.2d starting inversion for dummy_flag = %d\n", g_cart_id, threadid, dummy_flag); /*********************************************** * perform the inversion ***********************************************/ if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] starting inversion\n"); xchange_field_5d(g_spinor_field[0]); memset(g_spinor_field[1], 0, (VOLUME+RAND)*L5*24*sizeof(double)); ratime = CLOCK; #ifdef MPI if(inv_param.inv_type == QUDA_BICGSTAB_INVERTER || inv_param.inv_type == QUDA_GCR_INVERTER) { if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] calling invertQuda\n"); invertQuda(g_spinor_field[1], g_spinor_field[0], &inv_param); } else if(inv_param.inv_type == QUDA_CG_INVERTER) { if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] calling testCG\n"); testCG(g_spinor_field[1], g_spinor_field[0], &inv_param); } else { if(g_cart_id==0) fprintf(stderr, "# [invert_dw_quda] unrecognized inverter\n"); } #else invertQuda(g_spinor_field[1], g_spinor_field[0], &inv_param); #endif retime = CLOCK; if(g_cart_id==0) { fprintf(stdout, "# [invert_dw_quda] QUDA time: %e seconds\n", inv_param.secs); fprintf(stdout, "# [invert_dw_quda] QUDA Gflops: %e\n", inv_param.gflops/inv_param.secs); fprintf(stdout, "# [invert_dw_quda] wall time: %e seconds\n", retime-ratime); fprintf(stdout, "# [invert_dw_quda] Device memory used:\n\tSpinor: %f GiB\n\tGauge: %f GiB\n", inv_param.spinorGiB, gauge_param.gaugeGiB); } } // of if threadid // wait till all threads are here #pragma omp barrier if(inv_param.mass_normalization == QUDA_KAPPA_NORMALIZATION) { _2_kappa = 2. * g_kappa5d; for(ix=threadid; ix<VOLUME*L5;ix+=g_num_threads) { _fv_ti_eq_re(g_spinor_field[1]+_GSI(ix), _2_kappa ); } } #pragma omp barrier // reorder, multiply with g2 for(is=0;is<L5;is++) { for(ix=threadid; ix<VOLUME; ix+=g_num_threads) { iy = lexic2eot_5d(is, ix); iix = is*VOLUME + ix; _fv_eq_fv(g_spinor_field[0]+_GSI(iix), g_spinor_field[1]+_GSI(iy)); }} #pragma omp barrier if(rotate_gamma_basis) { for(ix=threadid; ix<VOLUME*L5; ix+=g_num_threads) { _fv_eq_gamma_ti_fv(g_spinor_field[1]+_GSI(ix), 2, g_spinor_field[0]+_GSI(ix)); } } else { for(ix=threadid; ix<VOLUME*L5;ix+=g_num_threads) { _fv_eq_fv(g_spinor_field[1]+_GSI(ix), g_spinor_field[0]+_GSI(ix)); } } if(g_cart_id==0 && threadid==g_num_threads-1) fprintf(stdout, "# [invert_dw_quda] inversion done in %e seconds\n", retime-ratime); #pragma omp single { #ifdef MPI xchange_field_5d(g_spinor_field[1]); #endif /*********************************************** * check residuum ***********************************************/ if(check_residuum && dummy_flag>0) { // apply the Wilson Dirac operator in the gamma-basis defined in cvc_linalg, // which uses the tmLQCD conventions (same as in contractions) // without explicit boundary conditions #ifdef MPI xchange_field_5d(g_spinor_field[2]); xchange_field_5d(g_spinor_field[1]); #endif memset(g_spinor_field[0], 0, 24*(VOLUME+RAND)*L5*sizeof(double)); //sprintf(filename, "%s.inverted.ascii.%.2d", source_filename, g_cart_id); //ofs = fopen(filename, "w"); //printf_spinor_field_5d(g_spinor_field[1], ofs); //fclose(ofs); Q_DW_Wilson_phi(g_spinor_field[0], g_spinor_field[1]); for(ix=0;ix<VOLUME*L5;ix++) { _fv_mi_eq_fv(g_spinor_field[0]+_GSI(ix), g_spinor_field[2]+_GSI(ix)); } spinor_scalar_product_re(&norm2, g_spinor_field[2], g_spinor_field[2], VOLUME*L5); spinor_scalar_product_re(&norm, g_spinor_field[0], g_spinor_field[0], VOLUME*L5); if(g_cart_id==0) fprintf(stdout, "\n# [invert_dw_quda] absolut residuum squared: %e; relative residuum %e\n", norm, sqrt(norm/norm2) ); } if(dummy_flag>0) { /*********************************************** * create 4-dim. propagator ***********************************************/ if(convert_sign == 0) { spinor_5d_to_4d(g_spinor_field[1], g_spinor_field[1]); } else if(convert_sign == -1 || convert_sign == +1) { spinor_5d_to_4d_sign(g_spinor_field[1], g_spinor_field[1], convert_sign); } /*********************************************** * write the solution ***********************************************/ sprintf(filename, "%s.inverted", source_filename_write); if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] writing propagator to file %s\n", filename); check_error(write_propagator(g_spinor_field[1], filename, 0, g_propagator_precision), "write_propagator", NULL, 22); //sprintf(filename, "prop.ascii.4d.%.2d.%.2d.%.2d", isc, g_nproc, g_cart_id); //ofs = fopen(filename, "w"); //printf_spinor_field(g_spinor_field[1], ofs); //fclose(ofs); } if(check_residuum) memcpy(g_spinor_field[2], smearing_spinor_field[0], 24*VOLUME*L5*sizeof(double)); } // of omp single } // of omp parallel region if(dummy_flag > 0) strcpy(source_filename_write, source_filename); dummy_flag++; } // of loop on momenta } // of isc #if 0 // last inversion { memcpy(g_spinor_field[0], smearing_spinor_field[1], 24*VOLUME*L5*sizeof(double)); if(g_cart_id==0) fprintf(stdout, "# [] proc%.2d starting last inversion\n", g_cart_id); /*********************************************** * perform the inversion ***********************************************/ if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] starting inversion\n"); xchange_field_5d(g_spinor_field[0]); memset(g_spinor_field[1], 0, (VOLUME+RAND)*L5*24*sizeof(double)); ratime = CLOCK; #ifdef MPI if(inv_param.inv_type == QUDA_BICGSTAB_INVERTER || inv_param.inv_type == QUDA_GCR_INVERTER) { if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] calling invertQuda\n"); invertQuda(g_spinor_field[1], g_spinor_field[0], &inv_param); } else if(inv_param.inv_type == QUDA_CG_INVERTER) { if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] calling testCG\n"); testCG(g_spinor_field[1], g_spinor_field[0], &inv_param); } else { if(g_cart_id==0) fprintf(stderr, "# [invert_dw_quda] unrecognized inverter\n"); } #else invertQuda(g_spinor_field[1], g_spinor_field[0], &inv_param); #endif retime = CLOCK; if(g_cart_id==0) { fprintf(stdout, "# [invert_dw_quda] QUDA time: %e seconds\n", inv_param.secs); fprintf(stdout, "# [invert_dw_quda] QUDA Gflops: %e\n", inv_param.gflops/inv_param.secs); fprintf(stdout, "# [invert_dw_quda] wall time: %e seconds\n", retime-ratime); fprintf(stdout, "# [invert_dw_quda] Device memory used:\n\tSpinor: %f GiB\n\tGauge: %f GiB\n", inv_param.spinorGiB, gauge_param.gaugeGiB); } omp_set_num_threads(g_num_threads); #pragma omp parallel private(threadid,_2_kappa,is,ix,iy,iix) shared(VOLUME,L5,g_kappa,g_spinor_field,g_num_threads) { threadid = omp_get_thread_num(); if(inv_param.mass_normalization == QUDA_KAPPA_NORMALIZATION) { _2_kappa = 2. * g_kappa5d; for(ix=threadid; ix<VOLUME*L5;ix+=g_num_threads) { _fv_ti_eq_re(g_spinor_field[1]+_GSI(ix), _2_kappa ); } } #pragma omp barrier // reorder, multiply with g2 for(is=0;is<L5;is++) { for(ix=threadid; ix<VOLUME; ix+=g_num_threads) { iy = lexic2eot_5d(is, ix); iix = is*VOLUME + ix; _fv_eq_fv(g_spinor_field[0]+_GSI(iix), g_spinor_field[1]+_GSI(iy)); }} #pragma omp barrier if(rotate_gamma_basis) { for(ix=threadid; ix<VOLUME*L5; ix+=g_num_threads) { _fv_eq_gamma_ti_fv(g_spinor_field[1]+_GSI(ix), 2, g_spinor_field[0]+_GSI(ix)); } } else { for(ix=threadid; ix<VOLUME*L5;ix+=g_num_threads) { _fv_eq_fv(g_spinor_field[1]+_GSI(ix), g_spinor_field[0]+_GSI(ix)); } } } // end of parallel region if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] inversion done in %e seconds\n", retime-ratime); #ifdef MPI xchange_field_5d(g_spinor_field[1]); #endif /*********************************************** * check residuum ***********************************************/ if(check_residuum && dummy_flag>0) { // apply the Wilson Dirac operator in the gamma-basis defined in cvc_linalg, // which uses the tmLQCD conventions (same as in contractions) // without explicit boundary conditions #ifdef MPI xchange_field_5d(g_spinor_field[2]); #endif memset(g_spinor_field[0], 0, 24*(VOLUME+RAND)*L5*sizeof(double)); //sprintf(filename, "%s.inverted.ascii.%.2d", source_filename, g_cart_id); //ofs = fopen(filename, "w"); //printf_spinor_field_5d(g_spinor_field[1], ofs); //fclose(ofs); Q_DW_Wilson_phi(g_spinor_field[0], g_spinor_field[1]); for(ix=0;ix<VOLUME*L5;ix++) { _fv_mi_eq_fv(g_spinor_field[0]+_GSI(ix), g_spinor_field[2]+_GSI(ix)); } spinor_scalar_product_re(&norm, g_spinor_field[0], g_spinor_field[0], VOLUME*L5); spinor_scalar_product_re(&norm2, g_spinor_field[2], g_spinor_field[2], VOLUME*L5); if(g_cart_id==0) fprintf(stdout, "\n# [invert_dw_quda] absolut residuum squared: %e; relative residuum %e\n", norm, sqrt(norm/norm2) ); } /*********************************************** * create 4-dim. propagator ***********************************************/ if(convert_sign == 0) { spinor_5d_to_4d(g_spinor_field[1], g_spinor_field[1]); } else if(convert_sign == -1 || convert_sign == +1) { spinor_5d_to_4d_sign(g_spinor_field[1], g_spinor_field[1], convert_sign); } /*********************************************** * write the solution ***********************************************/ sprintf(filename, "%s.inverted", source_filename_write); if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] writing propagator to file %s\n", filename); check_error(write_propagator(g_spinor_field[1], filename, 0, g_propagator_precision), "write_propagator", NULL, 22); //sprintf(filename, "prop.ascii.4d.%.2d.%.2d.%.2d", isc, g_nproc, g_cart_id); //ofs = fopen(filename, "w"); //printf_spinor_field(g_spinor_field[1], ofs); //fclose(ofs); } // of last inversion #endif // of if 0 /*********************************************** * free the allocated memory, finalize ***********************************************/ #ifdef HAVE_QUDA // finalize the QUDA library if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] finalizing quda\n"); #ifdef MPI freeGaugeQuda(); #endif endQuda(); #endif if(g_gauge_field != NULL) free(g_gauge_field); if(gauge_field_smeared != NULL) free(gauge_field_smeared); if(no_fields>0) { if(g_spinor_field!=NULL) { for(i=0; i<no_fields; i++) if(g_spinor_field[i]!=NULL) free(g_spinor_field[i]); free(g_spinor_field); } } free_geometry(); if(g_source_momentum_set && full_orbit) { finalize_q_orbits(&qlatt_id, &qlatt_count, &qlatt_list, &qlatt_rep); if(qlatt_map != NULL) { free(qlatt_map[0]); free(qlatt_map); } } if(source_momentum != NULL) free(source_momentum); if(lck != NULL) free(lck); #ifdef MPI #ifdef HAVE_QUDA endCommsQuda(); #else MPI_Finalize(); #endif #endif if(g_cart_id==0) { g_the_time = time(NULL); fprintf(stdout, "\n# [invert_dw_quda] %s# [invert_dw_quda] end of run\n", ctime(&g_the_time)); fprintf(stderr, "\n# [invert_dw_quda] %s# [invert_dw_quda] end of run\n", ctime(&g_the_time)); } return(0); }
int main(int argc, char **argv) { int c, mu, nu, status; int i, j, ncon=-1, ir, is, ic, id; int filename_set = 0; int x0, x1, x2, x3, ix, iix; int y0, y1, y2, y3, iy, iiy; int start_valuet=0, start_valuex=0, start_valuey=0; int num_threads=1, threadid, nthreads; int seed, seed_set=0; double diff1, diff2; /* double *chi=NULL, *psi=NULL; */ double plaq=0., pl_ts, pl_xs, pl_global; double *gauge_field_smeared = NULL; double s[18], t[18], u[18], pl_loc; double spinor1[24], spinor2[24]; double *pl_gather=NULL; double dtmp; complex prod, w, w2; int verbose = 0; char filename[200]; char file1[200]; char file2[200]; FILE *ofs=NULL; double norm, norm2; fermion_propagator_type *prop=NULL, prop2=NULL, seq_prop=NULL, seq_prop2=NULL, prop_aux=NULL, prop_aux2=NULL; int idx, eoflag, shift; float *buffer = NULL; unsigned int VOL3; size_t items, bytes; #ifdef MPI MPI_Init(&argc, &argv); #endif while ((c = getopt(argc, argv, "h?vf:N:c:C:t:s:")) != -1) { switch (c) { case 'v': verbose = 1; break; case 'f': strcpy(filename, optarg); filename_set=1; break; case 'N': ncon = atoi(optarg); break; case 'c': strcpy(file1, optarg); break; case 'C': strcpy(file2, optarg); break; case 't': num_threads = atoi(optarg); break; case 's': seed = atoi(optarg); fprintf(stdout, "# [] use seed value %d\n", seed); seed_set = 1; break; case 'h': case '?': default: usage(); break; } } /* set the default values */ if(filename_set==0) strcpy(filename, "cvc.input"); if(g_cart_id==0) fprintf(stdout, "# Reading input from file %s\n", filename); read_input_parser(filename); /* some checks on the input data */ if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) { if(g_proc_id==0) fprintf(stdout, "T and L's must be set\n"); usage(); } if(g_kappa == 0.) { if(g_proc_id==0) fprintf(stdout, "kappa should be > 0.n"); usage(); } /* initialize MPI parameters */ mpi_init(argc, argv); /* initialize T etc. */ fprintf(stdout, "# [%2d] parameters:\n"\ "# [%2d] T_global = %3d\n"\ "# [%2d] T = %3d\n"\ "# [%2d] Tstart = %3d\n"\ "# [%2d] LX_global = %3d\n"\ "# [%2d] LX = %3d\n"\ "# [%2d] LXstart = %3d\n"\ "# [%2d] LY_global = %3d\n"\ "# [%2d] LY = %3d\n"\ "# [%2d] LYstart = %3d\n",\ g_cart_id, g_cart_id, T_global, g_cart_id, T, g_cart_id, Tstart, g_cart_id, LX_global, g_cart_id, LX, g_cart_id, LXstart, g_cart_id, LY_global, g_cart_id, LY, g_cart_id, LYstart); if(init_geometry() != 0) { fprintf(stderr, "ERROR from init_geometry\n"); exit(101); } geometry(); if(init_geometry_5d() != 0) { fprintf(stderr, "ERROR from init_geometry_5d\n"); exit(102); } geometry_5d(); VOL3 = LX*LY*LZ; /* read the gauge field */ alloc_gauge_field(&g_gauge_field, VOLUMEPLUSRAND); sprintf(filename, "%s.%.4d", gaugefilename_prefix, Nconf); if(g_cart_id==0) fprintf(stdout, "# reading gauge field from file %s\n", filename); if(strcmp(gaugefilename_prefix, "identity")==0) { status = unit_gauge_field(g_gauge_field, VOLUME); } else { // status = read_nersc_gauge_field_3x3(g_gauge_field, filename, &plaq); // status = read_ildg_nersc_gauge_field(g_gauge_field, filename); status = read_lime_gauge_field_doubleprec(filename); // status = read_nersc_gauge_field(g_gauge_field, filename, &plaq); // status = 0; } if(status != 0) { fprintf(stderr, "[apply_Dtm] Error, could not read gauge field\n"); exit(11); } xchange_gauge(); // measure the plaquette if(g_cart_id==0) fprintf(stdout, "# read plaquette value 1st field: %25.16e\n", plaq); plaquette(&plaq); if(g_cart_id==0) fprintf(stdout, "# measured plaquette value 1st field: %25.16e\n", plaq); g_kappa5d = 0.5 / (5. + g_m0); fprintf(stdout, "# [] g_kappa5d = %e\n", g_kappa5d); no_fields=4; g_spinor_field = (double**)calloc(no_fields, sizeof(double*)); for(i=0; i<no_fields; i++) alloc_spinor_field(&g_spinor_field[i], L5*VOLUMEPLUSRAND); /* items = VOL3 * 288; bytes = items * sizeof(float); if( (buffer = (float*)malloc( bytes ) ) == NULL ) { fprintf(stderr, "[] Error, could not allocate buffer\n"); exit(20); } */ /**************************************** * read read the spinor fields ****************************************/ /* prop = create_fp_field(VOL3); create_fp(&prop2); create_fp(&prop_aux); create_fp(&prop_aux2); create_fp(&seq_prop); create_fp(&seq_prop2); */ #ifdef MPI if(!seed_set) { seed = g_seed; } srand(seed+g_cart_id); for(ix=0;ix<VOLUME*L5;ix++) { for(i=0;i<24;i++) { spinor1[i] = 2* (double)rand() / (double)RAND_MAX - 1.; } _fv_eq_fv(g_spinor_field[0]+_GSI(ix), spinor1 ); } for(i=0;i<g_nproc;i++) { if(g_cart_id==i) { if(i==0) ofs = fopen("source", "w"); else ofs = fopen("source", "a"); for(is=0;is<L5;is++) { for(x0=0;x0<T; x0++) { for(x1=0;x1<LX; x1++) { for(x2=0;x2<LX; x2++) { for(x3=0;x3<LX; x3++) { iix = is*VOLUME*g_nproc + (((x0+g_proc_coords[0]*T)*LX*g_nproc_x+ x1+g_proc_coords[1]*LX )*LY*g_nproc_y + x2+g_proc_coords[2]*LY )*LZ*g_nproc_z + x3+g_proc_coords[3]*LZ; ix = g_ipt_5d[is][x0][x1][x2][x3]; for(c=0;c<24;c++) { fprintf(ofs, "%8d%8d%3d%25.16e\n", iix, ix, c, g_spinor_field[0][_GSI(ix)+c]); } }}}} } fclose(ofs); } #ifdef MPI MPI_Barrier(g_cart_grid); #endif } #else ofs = fopen("source", "r"); for(ix=0;ix<24*VOLUME*L5;ix++) { fscanf(ofs, "%d%d%d%lf", &x1,&x2,&x3, &dtmp); g_spinor_field[0][_GSI(x1)+x3] = dtmp; } fclose(ofs); #endif xchange_field_5d(g_spinor_field[0]); Q_DW_Wilson_dag_phi(g_spinor_field[1], g_spinor_field[0]); xchange_field_5d(g_spinor_field[1]); Q_DW_Wilson_phi(g_spinor_field[2], g_spinor_field[1]); sprintf(filename, "prop_%.2d.%.2d", g_nproc, g_cart_id); ofs = fopen(filename, "w"); printf_spinor_field_5d(g_spinor_field[2], ofs); fclose(ofs); // for(ix=0;ix<VOLUME*L5;ix++) { // for(i=0;i<24;i++) { // spinor1[i] = 2* (double)rand() / (double)RAND_MAX - 1.; // } // _fv_eq_fv(g_spinor_field[1]+_GSI(ix), spinor1 ); // } /* xchange_field_5d(g_spinor_field[0]); sprintf(filename, "spinor.%.2d", g_cart_id); ofs = fopen(filename, "w"); printf_spinor_field_5d(g_spinor_field[0], ofs); fclose(ofs); */ /* // 2 = D 0 Q_DW_Wilson_phi(g_spinor_field[2], g_spinor_field[0]); // 3 = D^dagger 1 Q_DW_Wilson_dag_phi(g_spinor_field[3], g_spinor_field[1]); // <1, 2> = <1, D 0 > spinor_scalar_product_co(&w, g_spinor_field[1], g_spinor_field[2], VOLUME*L5); // <3, 0> = < D^dagger 1, 0 > spinor_scalar_product_co(&w2, g_spinor_field[3], g_spinor_field[0], VOLUME*L5); fprintf(stdout, "# [] w = %e + %e*1.i\n", w.re, w.im); fprintf(stdout, "# [] w2 = %e + %e*1.i\n", w2.re, w2.im); fprintf(stdout, "# [] abs difference = %e \n", sqrt(_SQR(w2.re-w.re)+_SQR(w2.im-w.im)) ); */ /* for(i=0;i<12;i++) { fprintf(stdout, "s1[%2d] <- %25.16e + %25.16e*1.i\n", i+1, spinor1[2*i], spinor1[2*i+1]); } for(i=0;i<24;i++) { spinor2[i] = 2* (double)rand() / (double)RAND_MAX - 1.; } for(i=0;i<12;i++) { fprintf(stdout, "s2[%2d] <- %25.16e + %25.16e*1.i\n", i+1, spinor2[2*i], spinor2[2*i+1]); } _fv_mi_eq_PRe_fv(spinor2, spinor1); for(i=0;i<12;i++) { fprintf(stdout, "s3[%2d] <- %25.16e + %25.16e*1.i\n", i+1, spinor2[2*i], spinor2[2*i+1]); } */ /* ofs = fopen("dw_spinor", "w"); Q_DW_Wilson_phi(g_spinor_field[1], g_spinor_field[0]); printf_spinor_field(g_spinor_field[1], ofs); fclose(ofs); g_kappa = g_kappa5d; ofs = fopen("wilson_spinor", "w"); Q_Wilson_phi(g_spinor_field[2], g_spinor_field[0]); printf_spinor_field(g_spinor_field[2], ofs); fclose(ofs); */ #ifdef _UNDEF /******************************************************************* * propagators *******************************************************************/ // for(i=0; i<12;i++) for(i=0; i<1;i++) { //sprintf(file1, "source.%.4d.t00x00y00z00.%.2d.inverted", Nconf, i); sprintf(file1, "/home/mpetschlies/quda-0.3.2/tests/prop"); if(g_cart_id==0) fprintf(stdout, "# Reading prop. from file %s\n", file1); fflush(stdout); //if( read_lime_spinor(g_spinor_field[0], file1, 0) != 0 ) { ofs = fopen(file1, "rb"); if( fread(g_spinor_field[0], sizeof(double), 24*L5*VOLUME, ofs) != 24*L5*VOLUME) { fprintf(stderr, "Error, could not read proper amount of data from file %s\n", file1); exit(100); } fclose(ofs); for(ix=0;ix<VOLUME*L5;ix++) { _fv_ti_eq_re(g_spinor_field[0]+_GSI(ix), 2.*g_kappa5d); } /* if( (ofs = fopen("prop_full", "w")) == NULL ) exit(22); for(ix=0;ix<L5;ix++) { fprintf(ofs, "# [] s = %d\n", ix); printf_spinor_field(g_spinor_field[0]+_GSI(ix*VOLUME), ofs); } fclose(ofs); */ // reorder, multiply with g2 for(is=0,iix=0; is<L5; is++) { for(ix=0; ix<VOLUME; ix++) { iiy = lexic2eot_5d (is, ix); _fv_eq_fv(spinor1, g_spinor_field[0]+_GSI(iiy)); _fv_eq_gamma_ti_fv(g_spinor_field[1]+_GSI(iix), 2, spinor1 ); iix++; }} Q_DW_Wilson_phi(g_spinor_field[2], g_spinor_field[1]); // Q_DW_Wilson_dag_phi(g_spinor_field[2], g_spinor_field[1]); fprintf(stdout, "# [] finished application of Dirac operator\n"); fflush(stdout); // reorder, multiply with g2 for(is=0, iix=0;is<L5;is++) { for(ix=0; ix<VOLUME; ix++) { iiy = lexic2eot_5d(is, ix); _fv_eq_fv(spinor1, g_spinor_field[2]+_GSI(iix)); _fv_eq_gamma_ti_fv(g_spinor_field[1]+_GSI(iiy), 2, spinor1 ); iix++; }} if( (ofs = fopen("my_out", "w")) == NULL ) exit(23); for(ix=0;ix<L5;ix++) { fprintf(ofs, "# [] s = %d\n", ix); printf_spinor_field(g_spinor_field[1]+_GSI(ix*VOLUME), ofs); } fclose(ofs); sprintf(file1, "/home/mpetschlies/quda-0.3.2/tests/source"); if(g_cart_id==0) fprintf(stdout, "# Reading prop. from file %s\n", file1); fflush(stdout); //if( read_lime_spinor(g_spinor_field[0], file1, 0) != 0 ) { ofs = fopen(file1, "rb"); if( fread(g_spinor_field[2], sizeof(double), 24*L5*VOLUME, ofs) != 24*L5*VOLUME) { fprintf(stderr, "Error, could not read proper amount of data from file %s\n", file1); exit(100); } fclose(ofs); /* if( (ofs = fopen("v_out", "w")) == NULL ) exit(23); for(ix=0;ix<L5;ix++) { fprintf(ofs, "# [] s = %d\n", ix); printf_spinor_field(g_spinor_field[2]+_GSI(ix*VOLUME), ofs); } fclose(ofs); */ spinor_scalar_product_re(&norm2, g_spinor_field[2], g_spinor_field[2], VOLUME*L5); for(ix=0;ix<VOLUME*L5;ix++) { _fv_mi_eq_fv(g_spinor_field[1]+_GSI(ix), g_spinor_field[2]+_GSI(ix)); } spinor_scalar_product_re(&norm, g_spinor_field[1], g_spinor_field[1], VOLUME*L5); fprintf(stdout, "\n# [] absolut residuum squared: %e; relative residuum %e\n", norm, sqrt(norm/norm2) ); } // of loop on spin color indices #endif /*********************************************** * free the allocated memory, finalize ***********************************************/ free(g_gauge_field); free_geometry(); if(gauge_field_smeared != NULL) free(gauge_field_smeared); if(g_spinor_field != NULL) { for(i=0; i<no_fields; i++) free(g_spinor_field[i]); free(g_spinor_field); } free(buffer); free_fp_field(&prop); free_fp(&prop2); free_fp(&prop_aux); free_fp(&prop_aux2); free_fp(&seq_prop); free_fp(&seq_prop2); g_the_time = time(NULL); fprintf(stdout, "# [] %s# [] end fo run\n", ctime(&g_the_time)); fflush(stdout); fprintf(stderr, "# [] %s# [] end fo run\n", ctime(&g_the_time)); fflush(stderr); #ifdef MPI MPI_Finalize(); #endif return(0); }
int main(int argc, char *argv[]) { FILE *parameterfile = NULL; int c, j; char * filename = NULL; char datafilename[50]; char parameterfilename[50]; char conf_filename[50]; char * input_filename = NULL; char * xlfmessage = NULL; char * gaugelfn = NULL; char * gaugecksum = NULL; double plaquette_energy; #ifdef _KOJAK_INST #pragma pomp inst init #pragma pomp inst begin(main) #endif #ifdef HAVE_LIBLEMON MPI_File fh; LemonWriter *lemonWriter; paramsXlfInfo *xlfInfo; paramsPropagatorFormat *propagatorFormat; #endif #if (defined SSE || defined SSE2 || SSE3) signal(SIGILL, &catch_ill_inst); #endif DUM_DERI = 6; /* DUM_DERI + 2 is enough (not 7) */ DUM_SOLVER = DUM_DERI + 3; DUM_MATRIX = DUM_SOLVER + 8; /* DUM_MATRIX + 2 is enough (not 6) */ NO_OF_SPINORFIELDS = DUM_MATRIX + 2; verbose = 0; g_use_clover_flag = 0; #ifdef MPI MPI_Init(&argc, &argv); #endif while ((c = getopt(argc, argv, "h?f:o:")) != -1) { switch (c) { case 'f': input_filename = calloc(200, sizeof(char)); strcpy(input_filename, optarg); break; case 'o': filename = calloc(200, sizeof(char)); strcpy(filename, optarg); break; case 'h': case '?': default: usage(); break; } } if (input_filename == NULL) { input_filename = "hmc.input"; } if (filename == NULL) { filename = "output"; } /* Read the input file */ read_input(input_filename); if (solver_flag == 12 && even_odd_flag == 1) { even_odd_flag = 0; if (g_proc_id == 0) { fprintf(stderr, "CGMMS works only without even/odd! Forcing!\n"); } } /* this DBW2 stuff is not needed for the inversion ! */ if (g_dflgcr_flag == 1) { even_odd_flag = 0; } g_rgi_C1 = 0; if (Nsave == 0) { Nsave = 1; } if(g_running_phmc) { NO_OF_SPINORFIELDS = DUM_MATRIX + 8; } mpi_init(argc, argv); g_dbw2rand = 0; /* starts the single and double precision random number */ /* generator */ start_ranlux(rlxd_level, random_seed); #ifndef MPI g_dbw2rand = 0; #endif #ifdef _GAUGE_COPY j = init_gauge_field(VOLUMEPLUSRAND, 1); #else j = init_gauge_field(VOLUMEPLUSRAND, 0); #endif if(j != 0) { fprintf(stderr, "Not enough memory for gauge_fields! Aborting...\n"); exit(-1); } j = init_geometry_indices(VOLUMEPLUSRAND); if(j != 0) { fprintf(stderr, "Not enough memory for geometry indices! Aborting...\n"); exit(-1); } if(no_monomials > 0) { if(even_odd_flag) { j = init_monomials(VOLUMEPLUSRAND / 2, even_odd_flag); } else { j = init_monomials(VOLUMEPLUSRAND, even_odd_flag); } if(j != 0) { fprintf(stderr, "Not enough memory for monomial pseudo fermion fields! Aborting...\n"); exit(0); } } if(even_odd_flag) { j = init_spinor_field(VOLUMEPLUSRAND / 2, NO_OF_SPINORFIELDS); } else { j = init_spinor_field(VOLUMEPLUSRAND, NO_OF_SPINORFIELDS); } if(j != 0) { fprintf(stderr, "Not enough memory for spinor fields! Aborting...\n"); exit(-1); } if(g_running_phmc) { j = init_chi_up_spinor_field(VOLUMEPLUSRAND / 2, 20); if(j != 0) { fprintf(stderr, "Not enough memory for PHMC Chi_up fields! Aborting...\n"); exit(0); } j = init_chi_dn_spinor_field(VOLUMEPLUSRAND / 2, 20); if(j != 0) { fprintf(stderr, "Not enough memory for PHMC Chi_dn fields! Aborting...\n"); exit(0); } } g_mu = g_mu1; if(g_proc_id == 0) { /*construct the filenames for the observables and the parameters*/ strcpy(datafilename, filename); strcat(datafilename, ".data"); strcpy(parameterfilename, filename); strcat(parameterfilename, ".para"); parameterfile = fopen(parameterfilename, "w"); write_first_messages(parameterfile, 1); fclose(parameterfile); } /* this is for the extra masses of the CGMMS */ if (solver_flag == 12 && g_no_extra_masses > 0) { if ((parameterfile = fopen("extra_masses.input", "r")) != NULL) { for (j = 0; j < g_no_extra_masses; j++) { fscanf(parameterfile, "%lf", &g_extra_masses[j]); if (g_proc_id == 0 && g_debug_level > 0) { printf("# g_extra_masses[%d] = %lf\n", j, g_extra_masses[j]); } } fclose(parameterfile); } else { fprintf(stderr, "Could not open file extra_masses.input!\n"); g_no_extra_masses = 0; } } /* define the geometry */ geometry(); /* define the boundary conditions for the fermion fields */ boundary(g_kappa); phmc_invmaxev = 1.; #ifdef _USE_HALFSPINOR j = init_dirac_halfspinor(); if (j != 0) { fprintf(stderr, "Not enough memory for halffield! Aborting...\n"); exit(-1); } if (g_sloppy_precision_flag == 1) { j = init_dirac_halfspinor32(); if (j != 0) { fprintf(stderr, "Not enough memory for 32-Bit halffield! Aborting...\n"); exit(-1); } } # if (defined _PERSISTENT) if (even_odd_flag) { init_xchange_halffield(); } # endif #endif for (j = 0; j < Nmeas; j++) { sprintf(conf_filename, "%s.%.4d", gauge_input_filename, nstore); if (g_proc_id == 0) { printf("Reading Gauge field from file %s\n", conf_filename); fflush(stdout); } #ifdef HAVE_LIBLEMON read_lemon_gauge_field_parallel(conf_filename, &gaugecksum, &xlfmessage, &gaugelfn); #else /* HAVE_LIBLEMON */ if (xlfmessage != (char*)NULL) free(xlfmessage); if (gaugelfn != (char*)NULL) free(gaugelfn); if (gaugecksum != (char*)NULL) free(gaugecksum); read_lime_gauge_field(conf_filename); xlfmessage = read_message(conf_filename, "xlf-info"); gaugelfn = read_message(conf_filename, "ildg-data-lfn"); gaugecksum = read_message(conf_filename, "scidac-checksum"); printf("%s \n", gaugecksum); #endif /* HAVE_LIBLEMON */ if (g_proc_id == 0) { printf("done!\n"); fflush(stdout); } /* unit_g_gauge_field(); */ #ifdef MPI xchange_gauge(g_gauge_field); #endif /*compute the energy of the gauge field*/ plaquette_energy = measure_gauge_action(); if (g_proc_id == 0) { printf("The plaquette value is %e\n", plaquette_energy / (6.*VOLUME*g_nproc)); fflush(stdout); } if (use_stout_flag == 1) { if (stout_smear_gauge_field(stout_rho , stout_no_iter) != 0) { exit(1) ; } plaquette_energy = measure_gauge_action(); if (g_proc_id == 0) { printf("The plaquette value after stouting is %e\n", plaquette_energy / (6.*VOLUME*g_nproc)); fflush(stdout); } } /* Compute minimal eigenvalues, necessary for overlap! */ if (compute_evs != 0) eigenvalues(&no_eigenvalues, max_solver_iterations, eigenvalue_precision, 0, compute_evs, nstore, even_odd_flag); else { compute_evs = 1; no_eigenvalues = 1; eigenvalues(&no_eigenvalues, max_solver_iterations, eigenvalue_precision, 0, compute_evs, nstore, even_odd_flag); no_eigenvalues = 0; compute_evs = 0; } if (phmc_compute_evs != 0) { #ifdef MPI MPI_Finalize(); #endif return (0); } /* here we can do something */ ov_n_cheby = (-log(delta))/(2*sqrt(ev_minev)); printf("// Degree of cheby polynomial: %d\n", ov_n_cheby); // g_mu = 0.; ov_check_locality(); // ov_check_ginsparg_wilson_relation_strong(); // ov_compare_4x4("overlap.mat"); // ov_compare_12x12("overlap.mat"); // ov_save_12x12("overlap.mat"); // ov_check_operator(1,0,0,0); nstore += Nsave; } #ifdef MPI MPI_Finalize(); #endif free_blocks(); free_dfl_subspace(); free_gauge_field(); free_geometry_indices(); free_spinor_field(); free_moment_field(); if (g_running_phmc) { free_chi_up_spinor_field(); free_chi_dn_spinor_field(); } return(0); #ifdef _KOJAK_INST #pragma pomp inst end(main) #endif }
int main(int argc, char **argv) { int c, i, mu, nu; int count = 0; int filename_set = 0; int dims[4] = {0,0,0,0}; int l_LX_at, l_LXstart_at; int x0, x1, x2, x3, ix, iix; int sid, status, gid; double *disc = (double*)NULL; double *work = (double*)NULL; double q[4], fnorm; int verbose = 0; int do_gt = 0; char filename[100], contype[200]; double ratime, retime; double plaq; double spinor1[24], spinor2[24], U_[18]; double *gauge_trafo=(double*)NULL; complex w, w1, *cp1, *cp2, *cp3; FILE *ofs; #ifdef MPI // MPI_Init(&argc, &argv); fprintf(stderr, "[jc_ud_x] Error, only non-mpi version implemented\n"); exit(1); #endif while ((c = getopt(argc, argv, "h?f:")) != -1) { switch (c) { case 'f': strcpy(filename, optarg); filename_set=1; break; case 'h': case '?': default: usage(); break; } } /* set the default values */ if(filename_set==0) strcpy(filename, "cvc.input"); fprintf(stdout, "# Reading input from file %s\n", filename); read_input_parser(filename); /* some checks on the input data */ if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) { if(g_proc_id==0) fprintf(stdout, "T and L's must be set\n"); usage(); } if(g_kappa == 0.) { if(g_proc_id==0) fprintf(stdout, "kappa should be > 0.n"); usage(); } fprintf(stdout, "\n**************************************************\n"); fprintf(stdout, "* jc_ud_x\n"); fprintf(stdout, "**************************************************\n\n"); /********************************* * initialize MPI parameters *********************************/ // mpi_init(argc, argv); /* initialize fftw */ dims[0]=T_global; dims[1]=LX; dims[2]=LY; dims[3]=LZ; T = T_global; Tstart = 0; l_LX_at = LX; l_LXstart_at = 0; FFTW_LOC_VOLUME = T*LX*LY*LZ; fprintf(stdout, "# [%2d] parameters:\n"\ "# [%2d] T = %3d\n"\ "# [%2d] Tstart = %3d\n"\ "# [%2d] l_LX_at = %3d\n"\ "# [%2d] l_LXstart_at = %3d\n"\ "# [%2d] FFTW_LOC_VOLUME = %3d\n", g_cart_id, g_cart_id, T, g_cart_id, Tstart, g_cart_id, l_LX_at, g_cart_id, l_LXstart_at, g_cart_id, FFTW_LOC_VOLUME); if(init_geometry() != 0) { fprintf(stderr, "ERROR from init_geometry\n"); exit(1); } geometry(); /************************************************* * allocate mem for gauge field and spinor fields *************************************************/ alloc_gauge_field(&g_gauge_field, VOLUMEPLUSRAND); no_fields = 2; g_spinor_field = (double**)calloc(no_fields, sizeof(double*)); for(i=0; i<no_fields; i++) alloc_spinor_field(&g_spinor_field[i], VOLUMEPLUSRAND); /**************************************** * allocate memory for the contractions ****************************************/ disc = (double*)calloc( 8*VOLUME, sizeof(double)); if( disc == (double*)NULL ) { fprintf(stderr, "could not allocate memory for disc\n"); exit(3); } /*********************************************** * start loop on gauge id.s ***********************************************/ for(gid=g_gaugeid; gid<=g_gaugeid2; gid++) { for(ix=0; ix<8*VOLUME; ix++) disc[ix] = 0.; sprintf(filename, "%s.%.4d", gaugefilename_prefix, gid); if(g_cart_id==0) fprintf(stdout, "# reading gauge field from file %s\n", filename); read_lime_gauge_field_doubleprec(filename); xchange_gauge(); plaquette(&plaq); if(g_cart_id==0) fprintf(stdout, "# measured plaquette value: %25.16e\n", plaq); /*********************************************** * start loop on source id.s ***********************************************/ for(sid=g_sourceid; sid<=g_sourceid2; sid+=g_sourceid_step) { /* reset disc to zero */ for(ix=0; ix<8*VOLUME; ix++) disc[ix] = 0.; /* read the new propagator to g_spinor_field[0] */ ratime = (double)clock() / CLOCKS_PER_SEC; if(format==0) { sprintf(filename, "%s.%.4d.%.2d.inverted", filename_prefix, gid, sid); if(read_lime_spinor(g_spinor_field[0], filename, 0) != 0) break; } else if(format==1) { sprintf(filename, "%s.%.4d.%.5d.inverted", filename_prefix, gid, sid); if(read_cmi(g_spinor_field[0], filename) != 0) break; } xchange_field(g_spinor_field[0]); retime = (double)clock() / CLOCKS_PER_SEC; if(g_cart_id==0) fprintf(stdout, "# time to read prop.: %e seconds\n", retime-ratime); ratime = (double)clock() / CLOCKS_PER_SEC; /* apply D_W once, save in g_spinor_field[1] */ Hopping(g_spinor_field[1], g_spinor_field[0]); for(ix=0; ix<VOLUME; ix++) { _fv_pl_eq_fv(g_spinor_field[1]+_GSI(ix), g_spinor_field[0]+_GSI(ix)); _fv_ti_eq_re(g_spinor_field[1]+_GSI(ix), 1./(2.*g_kappa)); } xchange_field(g_spinor_field[1]); retime = (double)clock() / CLOCKS_PER_SEC; if(g_cart_id==0) fprintf(stdout, "# time to apply D_W: %e seconds\n", retime-ratime); ratime = (double)clock() / CLOCKS_PER_SEC; /* calculate real and imaginary part */ for(mu=0; mu<4; mu++) { for(ix=0; ix<VOLUME; ix++) { _cm_eq_cm_ti_co(U_, g_gauge_field+_GGI(ix,mu), &(co_phase_up[mu])); _fv_eq_gamma_ti_fv(spinor1, 5, g_spinor_field[0]+_GSI(g_iup[ix][mu])); _fv_eq_gamma_ti_fv(spinor2, mu, spinor1); _fv_pl_eq_fv(spinor2, spinor1); _fv_eq_cm_ti_fv(spinor1, U_, spinor2); _co_eq_fv_dag_ti_fv(&w, g_spinor_field[0]+_GSI(ix), spinor1); disc[_GWI(mu,ix,VOLUME) ] = g_mu * w.im; _fv_eq_gamma_ti_fv(spinor1, mu, g_spinor_field[1]+_GSI(g_iup[ix][mu])); _fv_pl_eq_fv(spinor1, g_spinor_field[1]+_GSI(g_iup[ix][mu])); _fv_eq_cm_ti_fv(spinor2, U_, spinor1); _co_eq_fv_dag_ti_fv(&w, g_spinor_field[0]+_GSI(ix), spinor2); disc[_GWI(mu,ix,VOLUME)+1] = w.im / 3.; } } retime = (double)clock() / CLOCKS_PER_SEC; if(g_cart_id==0) fprintf(stdout, "# time to calculate contractions: %e seconds\n", retime-ratime); /************************************************ * save results ************************************************/ if(g_cart_id == 0) fprintf(stdout, "# save results for gauge id %d and sid %d\n", gid, sid); /* save the result in position space */ fnorm = 1. / g_prop_normsqr; if(g_cart_id==0) fprintf(stdout, "X-fnorm = %e\n", fnorm); for(mu=0; mu<4; mu++) { for(ix=0; ix<VOLUME; ix++) { disc[_GWI(mu,ix,VOLUME) ] *= fnorm; disc[_GWI(mu,ix,VOLUME)+1] *= fnorm; } } sprintf(filename, "jc_ud_x.%.4d.%.4d", gid, sid); sprintf(contype, "jc-u_and_d-X"); write_lime_contraction(disc, filename, 64, 4, contype, gid, sid); //sprintf(filename, "jc_ud_x.%.4d.%.4d.ascii", gid, sid); //write_contraction (disc, NULL, filename, 4, 2, 0); } /* of loop on sid */ } /* of loop on gid */ /*********************************************** * free the allocated memory, finalize ***********************************************/ free(g_gauge_field); for(i=0; i<no_fields; i++) free(g_spinor_field[i]); free(g_spinor_field); free_geometry(); free(disc); return(0); }
int stout_smear_gauge_field(const double rho , const int no_iters) { const int dim=4 ; int iter , mu , x; su3 *gauge_wk[4] ; su3 wk_staple ; su3 omega , Exp_p ; su3adj p; su3 *gauge_local ; su3 new_gauge_local ; /*printf("Entering stout_smear_gauge_field\n");*/ if(g_proc_id == 0 && g_debug_level > 3) { printf("DUMP OF g_gauge_field in STOUT\n"); print_config_to_screen(g_gauge_field); printf("STOUT smearing the gauge fields\n") ; printf("rho = %g number of iterations = %d\n",rho,no_iters) ; } /* reserve memory */ for(mu = 0 ; mu < dim ; ++mu) { gauge_wk[mu] = calloc(VOLUME, sizeof(su3)); if(errno == ENOMEM) { return(1); } } /* start of the the stout smearing **/ for(iter = 0 ; iter < no_iters ; ++iter) { for(mu = 0 ; mu < dim ; ++mu) { for(x= 0 ; x < VOLUME ; x++) { /* * we need to save all intermediate gauge configurations * because they are needed for the force back iteration in * "stout_smear_force.c" */ /*_su3_assign(g_gauge_field_smear_iterations[iter][x][mu], g_gauge_field[x][mu]);*/ /* get staples */ wk_staple = get_staples(x, mu, g_gauge_field) ; scale_su3(&wk_staple, rho) ; /* omega = staple * u^dagger */ gauge_local = &g_gauge_field[x][mu]; _su3_times_su3d(omega,wk_staple,*gauge_local); /* project out anti-hermitian traceless part */ project_anti_herm(&omega) ; /* exponentiate */ _trace_lambda(p,omega) ; /* -2.0 to get su3 to su3adjoint consistency ****/ p.d1 /= -2.0 ; p.d2 /= -2.0 ; p.d3 /= -2.0 ; p.d4 /= -2.0 ; p.d5 /= -2.0 ; p.d6 /= -2.0 ; p.d7 /= -2.0 ; p.d8 /= -2.0 ; Exp_p = exposu3(p); /* new_gauge_local = Exp_p * gauge_local */ _su3_times_su3(new_gauge_local,Exp_p,*gauge_local); gauge_wk[mu][x] = new_gauge_local ; } /* end the loop over space-time */ } /** update gauge field on this node **/ for(mu = 0 ; mu < dim ; ++mu) { for(x= 0 ; x < VOLUME ; ++x) { g_gauge_field[x][mu] = gauge_wk[mu][x] ; } } if(g_debug_level > 3 && g_proc_id == 0) { printf("DUMP OF g_gauge_field in STOUT\n"); print_config_to_screen(g_gauge_field); } #ifdef MPI /** update boundaries for parallel stuff **/ xchange_gauge(); #endif g_update_gauge_copy = 1; g_update_gauge_energy = 1; g_update_rectangle_energy = 1; /* * here we save the intermediate smeares gauge fields a large array */ } /* end loop over stout smearing iterations */ /* free up memory */ for(mu=0 ; mu < dim ; ++mu) { free(gauge_wk[mu]); } if(g_debug_level > 3 && g_proc_id == 0) { printf("Leaving stout_smear_gauge_field\n"); } return(0); }