INT WINAPI WinMain( HINSTANCE hInst, HINSTANCE, LPSTR, INT ) { WNDCLASSEX wc = { sizeof(WNDCLASSEX), CS_CLASSDC, MsgProc, 0L, 0L, GetModuleHandle(NULL), NULL, NULL, NULL, NULL, "D3D Tutorial", NULL }; RegisterClassEx( &wc ); HWND hWnd = CreateWindow( "D3D Tutorial", "D3D Tutorial 04", WS_OVERLAPPEDWINDOW, 100, 100, 300, 300, GetDesktopWindow(), NULL, wc.hInstance, NULL ); if (FAILED( InitD3D(hWnd) )) goto MAIN_END; if (FAILED( init_geometry() )) goto MAIN_END; ShowWindow( hWnd, SW_SHOWDEFAULT ); UpdateWindow( hWnd ); MSG msg; ZeroMemory( &msg, sizeof(msg) ); while ( msg.message != WM_QUIT ) { if ( PeekMessage( &msg, NULL, 0, 0, PM_REMOVE ) ) { TranslateMessage( &msg ); DispatchMessage( &msg ); } else { Render(); } } MAIN_END: UnregisterClass( "D3D Tutorial", wc.hInstance ); return 0; }
int main() { init_geometry(); shape_t *s = new_taurus(30, 15, 30, 30); flushOBJ(stdout); free_shape(s); finalize_geometry(); return 0; }
void to_revolution() { init_geometry(); shape_t *s = new_shape(); for(unsigned i = 0; i < numpoints; ++i) { add_vertex(s, new_vertex(pt[i].x, WINDOW_HEIGHT-pt[i].y, 0)); } new_revolution(s, DIVISION_NUMBER); flushOBJ(file_out); finalize_geometry(); }
int main() { init_geometry(); shape_t *s = new_shape(); add_vertex(s, new_vertex(0, 300, 0)); add_vertex(s, new_vertex(20, 300, 0)); add_vertex(s, new_vertex(20, 290, 0)); add_vertex(s, new_vertex(300, 220, 0)); add_vertex(s, new_vertex(295, 218, 0)); add_vertex(s, new_vertex(60, 260, 0)); add_vertex(s, new_vertex(5, 220, 0)); add_vertex(s, new_vertex(5, 0, 0)); add_vertex(s, new_vertex(0, 0, 0)); shape_t *u = new_revolution(s, 60); flushOBJ(stdout); free_shape(s); free_shape(u); finalize_geometry(); return 0; }
int main() { init_geometry(); shape_t *planet = new_sphere(50, 20, 20); shape_t *sp1 = new_sphere(10, 16, 16); shape_t *sp2 = new_sphere(10, 16, 16); shape_t *t1 = new_taurus(30, 4, 50, 10); shape_t *t2 = new_taurus(30, 4, 40, 10); shape_translate(sp1, 80, 70, 40); shape_translate(sp2, -90, 10, -60); shape_scale(t1, 5, 1.2, 5); shape_scale(t2, 3.5, 1.2, 3.5); shape_rotate(t1, -20, 0, 0, 1); shape_rotate(t2, 20, 0, 0, 1); flushOBJ(stdout); free_shape(planet); free_shape(t1); free_shape(t2); finalize_geometry(); return 0; }
int main(int argc, char **argv) { int c, i, mu, nu; int filename_set = 0; int dims[4] = {0,0,0,0}; int l_LX_at, l_LXstart_at; int x0, x1, x2, x3, ix, iix; int xx0, xx1, xx2, xx3; int y0min, y0max, y1min, y1max, y2min, y2max, y3min, y3max; int y0, y1, y2, y3, iy; int z0, z1, z2, z3, iz; int gid, status; int model_type = -1; double *disc = (double*)NULL; double *disc2 = (double*)NULL; double *work = (double*)NULL; double q[4], fnorm; char filename[100], contype[200]; double ratime, retime; double rmin2, rmax2, rsqr; complex w, w1; FILE *ofs; fftw_complex *in=(fftw_complex*)NULL; #ifdef MPI fftwnd_mpi_plan plan_p, plan_m; #else fftwnd_plan plan_p, plan_m; #endif #ifdef MPI MPI_Init(&argc, &argv); #endif while ((c = getopt(argc, argv, "h?f:t:")) != -1) { switch (c) { case 't': model_type = atoi(optarg); break; case 'f': strcpy(filename, optarg); filename_set=1; break; case 'h': case '?': default: usage(); break; } } /* set the default values */ if(filename_set==0) strcpy(filename, "cvc.input"); fprintf(stdout, "# Reading input from file %s\n", filename); read_input_parser(filename); /* some checks on the input data */ if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) { if(g_proc_id==0) fprintf(stdout, "T and L's must be set\n"); usage(); } fprintf(stdout, "\n**************************************************\n"); fprintf(stdout, "* vp_disc_ft\n"); fprintf(stdout, "**************************************************\n\n"); #ifdef MPI if(g_cart_id==0) fprintf(stdout, "# Warning: MPI-version not yet available; exit\n"); exit(200); #endif /********************************* * initialize MPI parameters *********************************/ mpi_init(argc, argv); /* initialize fftw */ dims[0]=T_global; dims[1]=LX; dims[2]=LY; dims[3]=LZ; #ifdef MPI plan_p = fftwnd_mpi_create_plan(g_cart_grid, 4, dims, FFTW_BACKWARD, FFTW_MEASURE); plan_m = fftwnd_mpi_create_plan(g_cart_grid, 4, dims, FFTW_FORWARD, FFTW_MEASURE); fftwnd_mpi_local_sizes(plan_p, &T, &Tstart, &l_LX_at, &l_LXstart_at, &FFTW_LOC_VOLUME); #else plan_p = fftwnd_create_plan(4, dims, FFTW_BACKWARD, FFTW_MEASURE | FFTW_IN_PLACE); plan_m = fftwnd_create_plan(4, dims, FFTW_FORWARD, FFTW_MEASURE | FFTW_IN_PLACE); T = T_global; Tstart = 0; l_LX_at = LX; l_LXstart_at = 0; FFTW_LOC_VOLUME = T*LX*LY*LZ; #endif fprintf(stdout, "# [%2d] fftw parameters:\n"\ "# [%2d] T = %3d\n"\ "# [%2d] Tstart = %3d\n"\ "# [%2d] l_LX_at = %3d\n"\ "# [%2d] l_LXstart_at = %3d\n"\ "# [%2d] FFTW_LOC_VOLUME = %3d\n", g_cart_id, g_cart_id, T, g_cart_id, Tstart, g_cart_id, l_LX_at, g_cart_id, l_LXstart_at, g_cart_id, FFTW_LOC_VOLUME); #ifdef MPI if(T==0) { fprintf(stderr, "[%2d] local T is zero; exit\n", g_cart_id); MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); exit(2); } #endif if(init_geometry() != 0) { fprintf(stderr, "ERROR from init_geometry\n"); #ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); #endif exit(1); } geometry(); /**************************************** * allocate memory for the contractions ****************************************/ disc = (double*)calloc( 8*VOLUME, sizeof(double)); if( disc == (double*)NULL ) { fprintf(stderr, "could not allocate memory for disc\n"); # ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); # endif exit(3); } disc2 = (double*)calloc( 32*VOLUME, sizeof(double)); if( disc2 == (double*)NULL ) { fprintf(stderr, "could not allocate memory for disc2\n"); # ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); # endif exit(3); } for(ix=0; ix<32*VOLUME; ix++) disc2[ix] = 0.; work = (double*)calloc(32*VOLUME, sizeof(double)); if( work == (double*)NULL ) { fprintf(stderr, "could not allocate memory for work\n"); # ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); # endif exit(3); } /**************************************** * prepare Fourier transformation arrays ****************************************/ in = (fftw_complex*)malloc(FFTW_LOC_VOLUME*sizeof(fftw_complex)); if(in==(fftw_complex*)NULL) { #ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); #endif exit(4); } /*************************************** * set model type function ***************************************/ switch (model_type) { case 0: model_type_function = pidisc_model; fprintf(stdout, "# function pointer set to type pidisc_model\n"); case 1: model_type_function = pidisc_model1; fprintf(stdout, "# function pointer set to type pidisc_model1\n"); break; case 2: model_type_function = pidisc_model2; fprintf(stdout, "# function pointer set to type pidisc_model2\n"); break; case 3: model_type_function = pidisc_model3; fprintf(stdout, "# function pointer set to type pidisc_model3\n"); break; default: model_type_function = NULL; fprintf(stdout, "# no model function selected; will add zero\n"); break; } /**************************************** * prepare the model for pidisc * - same for all gauge configurations ****************************************/ rmin2 = g_rmin * g_rmin; rmax2 = g_rmax * g_rmax; if(model_type > -1) { for(mu=0; mu<16; mu++) { model_type_function(model_mrho, model_dcoeff_re, model_dcoeff_im, work, plan_m, mu); for(x0=-(T-1); x0<T; x0++) { y0 = (x0 + T_global) % T_global; for(x1=-(LX-1); x1<LX; x1++) { y1 = (x1 + LX) % LX; for(x2=-(LY-1); x2<LY; x2++) { y2 = (x2 + LY) % LY; for(x3=-(LZ-1); x3<LZ; x3++) { y3 = (x3 + LZ) % LZ; iy = g_ipt[y0][y1][y2][y3]; rsqr = (double)(x1*x1) + (double)(x2*x2) + (double)(x3*x3); if(rmin2-rsqr<=_Q2EPS && rsqr-rmax2<=_Q2EPS) continue; /* radius in range for data usage, so continue */ disc2[_GWI(mu,iy,VOLUME) ] += work[2*iy ]; disc2[_GWI(mu,iy,VOLUME)+1] += work[2*iy+1]; }}}} memcpy((void*)in, (void*)(disc2+_GWI(mu,0,VOLUME)), 2*VOLUME*sizeof(double)); #ifdef MPI fftwnd_mpi(plan_p, 1, in, NULL, FFTW_NORMAL_ORDER); #else fftwnd_one(plan_p, in, NULL); #endif memcpy((void*)(disc2+_GWI(mu,0,VOLUME)), (void*)in, 2*VOLUME*sizeof(double)); } } else { for(ix=0; ix<32*VOLUME; ix++) disc2[ix] = 0.; } /*********************************************** * start loop on gauge id.s ***********************************************/ for(gid=g_gaugeid; gid<=g_gaugeid2; gid+=g_gauge_step) { if(g_cart_id==0) fprintf(stdout, "# Start working on gauge id %d\n", gid); /* read the new contractions */ #ifdef MPI ratime = MPI_Wtime(); #else ratime = (double)clock() / CLOCKS_PER_SEC; #endif sprintf(filename, "%s.%.4d.%.4d", filename_prefix, gid, Nsave); if(g_cart_id==0) fprintf(stdout, "# Reading contraction data from file %s\n", filename); if(read_lime_contraction(disc, filename, 4, 0) == 106) { if(g_cart_id==0) fprintf(stderr, "Error, could not read from file %s, continue\n", filename); continue; } #ifdef MPI retime = MPI_Wtime(); #else retime = (double)clock() / CLOCKS_PER_SEC; #endif if(g_cart_id==0) fprintf(stdout, "# time to read contraction: %e seconds\n", retime-ratime); /************************************************ * prepare \Pi_\mu\nu (x,y) ************************************************/ # ifdef MPI ratime = MPI_Wtime(); # else ratime = (double)clock() / CLOCKS_PER_SEC; # endif for(x0=-T+1; x0<T; x0++) { y0min = x0<0 ? -x0 : 0; y0max = x0<0 ? T : T-x0; for(x1=-LX+1; x1<LX; x1++) { y1min = x1<0 ? -x1 : 0; y1max = x1<0 ? LX : LX-x1; for(x2=-LY+1; x2<LY; x2++) { y2min = x2<0 ? -x2 : 0; y2max = x2<0 ? LY : LY-x2; for(x3=-LZ+1; x3<LZ; x3++) { y3min = x3<0 ? -x3 : 0; y3max = x3<0 ? LZ : LZ-x3; xx0 = (x0+T ) % T; xx1 = (x1+LX) % LX; xx2 = (x2+LX) % LY; xx3 = (x3+LX) % LZ; ix = g_ipt[xx0][xx1][xx2][xx3]; rsqr = (double)(x1*x1) + (double)(x2*x2) + (double)(x3*x3); if(rmin2-rsqr>_Q2EPS || rsqr-rmax2>_Q2EPS) continue; for(y0=y0min; y0<y0max; y0++) { z0 = y0 + x0; for(y1=y1min; y1<y1max; y1++) { z1 = y1 + x1; for(y2=y2min; y2<y2max; y2++) { z2 = y2 + x2; for(y3=y3min; y3<y3max; y3++) { z3 = y3 + x3; iy = g_ipt[y0][y1][y2][y3]; iz = g_ipt[z0][z1][z2][z3]; i=0; for(mu=0; mu<4; mu++) { for(nu=0; nu<4; nu++) { iix = _GWI(i,ix,VOLUME); _co_eq_co_ti_co(&w, (complex*)(disc+_GWI(mu,iz,VOLUME)), (complex*)(disc+_GWI(nu,iy,VOLUME))); work[iix ] += w.re; work[iix+1] += w.im; i++; }} }}}} }}}} #ifdef MPI retime = MPI_Wtime(); #else retime = (double)clock() / CLOCKS_PER_SEC; #endif if(g_cart_id==0) fprintf(stdout, "# time to calculate \\Pi_\\mu\\nu in position space: %e seconds\n", retime-ratime); /*********************************************** * Fourier transform ***********************************************/ for(mu=0; mu<16; mu++) { memcpy((void*)in, (void*)(work+_GWI(mu,0,VOLUME)), 2*VOLUME*sizeof(double)); #ifdef MPI fftwnd_mpi(plan_p, 1, in, NULL, FFTW_NORMAL_ORDER); #else fftwnd_one(plan_p, in, NULL); #endif memcpy((void*)(work+_GWI(mu,0,VOLUME)), (void*)in, 2*VOLUME*sizeof(double)); } fnorm = 1. / ((double)T_global * (double)(LX*LY*LZ)); if(g_cart_id==0) fprintf(stdout, "# P-fnorm = %16.5e\n", fnorm); for(x0=0; x0<T; x0++) { q[0] = (double)(x0+Tstart) / (double)T_global; for(x1=0; x1<LX; x1++) { q[1] = (double)x1 / (double)LX; for(x2=0; x2<LY; x2++) { q[2] = (double)x2 / (double)LY; for(x3=0; x3<LZ; x3++) { q[3] = (double)x3 / (double)LZ; ix = g_ipt[x0][x1][x2][x3]; i=0; for(mu=0; mu<4; mu++) { for(nu=0; nu<4; nu++) { iix = _GWI(i,ix,VOLUME); w.re = cos(M_PI * (q[mu] - q[nu])); w.im = sin(M_PI * (q[mu] - q[nu])); work[iix ] = work[iix ] * fnorm + disc2[iix ]; work[iix+1] = work[iix+1] * fnorm + disc2[iix+1]; _co_eq_co_ti_co(&w1, (complex*)(work+iix), &w); work[iix ] = w1.re; work[iix+1] = w1.im; i++; }} }}}} /*********************************************** * save results ***********************************************/ sprintf(filename, "%s.%.4d.%.4d", filename_prefix2, gid, Nsave); if(g_cart_id==0) fprintf(stdout, "# Saving results to file %s\n", filename); sprintf(contype, "cvc-disc-P"); write_lime_contraction(work, filename, 64, 16, contype, gid, Nsave); /* sprintf(filename, "%sascii.%.4d.%.4d", filename_prefix2, gid, Nsave); write_contraction(work, NULL, filename, 16, 2, 0); */ if(g_cart_id==0) fprintf(stdout, "# Finished working on gauge id %d\n", gid); } /* of loop on gid */ /*********************************************** * free the allocated memory, finalize ***********************************************/ free_geometry(); fftw_free(in); free(disc); free(disc2); free(work); #ifdef MPI fftwnd_mpi_destroy_plan(plan_p); fftwnd_mpi_destroy_plan(plan_m); MPI_Finalize(); #else fftwnd_destroy_plan(plan_p); fftwnd_destroy_plan(plan_m); #endif return(0); }
int main(int argc, char **argv) { int c, mu, status; int filename_set = 0; int mode = 0; int l_LX_at, l_LXstart_at; int x0, x1, x2, x3, ix, iix, iiy, gid, iclass; int Thp1, nclass; int *picount; double *conn = (double*)NULL; double *conn2 = (double*)NULL; double q[4], qsqr; int verbose = 0; char filename[800]; double ratime, retime; int *qid=NULL, *qcount=NULL, **qrep=NULL, **qmap=NULL; double **qlist=NULL, qmax=0.; int VOL3; FILE *ofs; fftw_complex *corrt=NULL; fftw_complex *pi00=(fftw_complex*)NULL, *pijj=(fftw_complex*)NULL, *piavg=(fftw_complex*)NULL; fftw_plan plan_m; while ((c = getopt(argc, argv, "h?vf:m:q:")) != -1) { switch (c) { case 'v': verbose = 1; break; case 'f': strcpy(filename, optarg); filename_set=1; break; case 'm': mode = atoi(optarg); break; case 'q': qmax = atof(optarg); fprintf(stdout, "\n# [] qmax set to %e\n", qmax); break; case 'h': case '?': default: usage(); break; } } /* set the default values */ if(filename_set==0) strcpy(filename, "cvc.input"); fprintf(stdout, "# Reading input from file %s\n", filename); read_input_parser(filename); /* some checks on the input data */ if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) { if(g_proc_id==0) fprintf(stdout, "T and L's must be set\n"); usage(); } /* initialize MPI parameters */ mpi_init(argc, argv); /* initialize fftw, create plan with FFTW_FORWARD --- in contrast to * FFTW_BACKWARD in e.g. avc_exact */ plan_m = fftw_create_plan(T_global, FFTW_FORWARD, FFTW_MEASURE | FFTW_IN_PLACE); if(plan_m==NULL) { fprintf(stderr, "Error, could not create fftw plan\n"); return(1); } T = T_global; Thp1 = T/2 + 1; Tstart = 0; l_LX_at = LX; l_LXstart_at = 0; FFTW_LOC_VOLUME = T*LX*LY*LZ; fprintf(stdout, "# [%2d] fftw parameters:\n"\ "# [%2d] T = %3d\n"\ "# [%2d] Tstart = %3d\n"\ "# [%2d] l_LX_at = %3d\n"\ "# [%2d] l_LXstart_at = %3d\n"\ "# [%2d] FFTW_LOC_VOLUME = %3d\n", g_cart_id, g_cart_id, T, g_cart_id, Tstart, g_cart_id, l_LX_at, g_cart_id, l_LXstart_at, g_cart_id, FFTW_LOC_VOLUME); if(init_geometry() != 0) { fprintf(stderr, "ERROR from init_geometry\n"); exit(1); } geometry(); VOL3 = LX*LY*LZ; status = make_qlatt_orbits_3d_parity_avg(&qid, &qcount, &qlist, &nclass, &qrep, &qmap); if(status != 0) { fprintf(stderr, "\n[] Error while creating h4-lists\n"); exit(4); } fprintf(stdout, "# [] number of classes = %d\n", nclass); // exit(255); /**************************************** * allocate memory for the contractions * ****************************************/ conn = (double*)calloc(32*VOLUME, sizeof(double)); if( (conn==(double*)NULL) ) { fprintf(stderr, "could not allocate memory for contr. fields\n"); exit(3); } /* conn2 = (double*)calloc(32*VOLUME, sizeof(double)); if( (conn2==(double*)NULL) ) { fprintf(stderr, "could not allocate memory for contr. fields\n"); exit(4); } pi00 = (fftw_complex*)malloc(VOLUME*sizeof(fftw_complex)); if( (pi00==(fftw_complex*)NULL) ) { fprintf(stderr, "could not allocate memory for pi00\n"); exit(2); } pijj = (fftw_complex*)fftw_malloc(VOLUME*sizeof(fftw_complex)); if( (pijj==(fftw_complex*)NULL) ) { fprintf(stderr, "could not allocate memory for pijj\n"); exit(2); } */ corrt = fftw_malloc(T*sizeof(fftw_complex)); if(corrt == NULL) { fprintf(stderr, "\nError, could not alloc corrt\n"); exit(3); } for(gid=g_gaugeid; gid<=g_gaugeid2; gid+=g_gauge_step) { // for(ix=0; ix<VOLUME; ix++) {pi00[ix].re = 0.; pi00[ix].im = 0.;} // for(ix=0; ix<VOLUME; ix++) {pijj[ix].re = 0.; pijj[ix].im = 0.;} /*********************** * read contractions * ***********************/ ratime = (double)clock() / CLOCKS_PER_SEC; sprintf(filename, "%s.%.4d", filename_prefix, gid); fprintf(stdout, "# Reading data from file %s\n", filename); if(format==2) { status = read_contraction(conn, NULL, filename, 16); } else { status = read_lime_contraction(conn, filename, 16, 0); } if(status != 0) { fprintf(stderr, "Error: could not read from file %s; status was %d\n", filename, status); continue; } /* sprintf(filename, "%s.%.4d.%.4d", filename_prefix2, gid); fprintf(stdout, "# Reading data from file %s\n", filename); status = read_lime_contraction(conn2, filename, 16, 0); if(status == 106) { fprintf(stderr, "Error: could not read from file %s; status was %d\n", filename, status); continue; } */ retime = (double)clock() / CLOCKS_PER_SEC; fprintf(stdout, "# time to read contractions %e seconds\n", retime-ratime); /*********************** * fill the correlator * ***********************/ ratime = (double)clock() / CLOCKS_PER_SEC; /* for(x1=0; x1<LX; x1++) { for(x2=0; x2<LY; x2++) { for(x3=0; x3<LZ; x3++) { for(x0=0; x0<T; x0++) { iix = g_ipt[0][x1][x2][x3]*T+x0; for(mu=1; mu<4; mu++) { ix = _GWI(5*mu,g_ipt[x0][x1][x2][x3],VOLUME); pijj[iix].re += ( conn[ix ] - conn2[ix ] ) * (double)Nsave / (double)(Nsave-1); pijj[iix].im += ( conn[ix+1] - conn2[ix+1] ) * (double)Nsave / (double)(Nsave-1); } ix = 2*g_ipt[x0][x1][x2][x3]; pi00[iix].re += ( conn[ix ] - conn2[ix ] ) * (double)Nsave / (double)(Nsave-1); pi00[iix].im += ( conn[ix+1] - conn2[ix+1] ) * (double)Nsave / (double)(Nsave-1); } }}} */ for(iclass=0;iclass<nclass;iclass++) { if(qlist[iclass][0] >= qmax) { // fprintf(stdout, "\n# [] will skip class %d, momentum squared = %f is too large\n", iclass, qlist[iclass][0]); continue; // } else { // fprintf(stdout, "\n# [] processing class %d, momentum squared = %f\n", iclass, qlist[iclass][0]); } for(x0=0; x0<T; x0++) { corrt[x0].re = 0.; corrt[x0].im = 0.; } /* for(x1=0;x1<VOL3;x1++) { if(qid[x1]==iclass) { fprintf(stdout, "# using mom %d ---> (%d, %d, %d)\n", x1, qrep[iclass][1], qrep[iclass][2], qrep[iclass][3]); for(x0=0; x0<T; x0++) { ix = x0*VOL3 + x1; corrt[x0].re += conn[_GWI(5,ix,VOLUME) ] + conn[_GWI(10,ix,VOLUME) ] + conn[_GWI(15,ix,VOLUME) ]; corrt[x0].im += conn[_GWI(5,ix,VOLUME)+1] + conn[_GWI(10,ix,VOLUME)+1] + conn[_GWI(15,ix,VOLUME)+1]; } } } */ for(x0=0; x0<T; x0++) { for(x1=0;x1<qcount[iclass];x1++) { x2 = qmap[iclass][x1]; // if(x0==0) fprintf(stdout, "# using mom %d ---> (%d, %d, %d)\n", x2, qrep[iclass][1], qrep[iclass][2], qrep[iclass][3]); ix = x0*VOL3 + x2; corrt[x0].re += conn[_GWI(5,ix,VOLUME) ] + conn[_GWI(10,ix,VOLUME) ] + conn[_GWI(15,ix,VOLUME) ]; corrt[x0].im += conn[_GWI(5,ix,VOLUME)+1] + conn[_GWI(10,ix,VOLUME)+1] + conn[_GWI(15,ix,VOLUME)+1]; } } // fprintf(stdout, "\n\n# ------------------------------\n"); for(x0=0; x0<T; x0++) { corrt[x0].re /= (double)T * qcount[iclass]; corrt[x0].im /= (double)T * qcount[iclass]; } /* fftw(plan_m, 1, corrt, 1, T, (fftw_complex*)NULL, 0, 0); */ fftw_one(plan_m, corrt, NULL); sprintf(filename, "rho.%.4d.x%.2dy%.2dz%.2d", gid, qrep[iclass][1], qrep[iclass][2], qrep[iclass][3]); if( (ofs=fopen(filename, "w")) == (FILE*)NULL ) { fprintf(stderr, "Error: could not open file %s for writing\n", filename); exit(5); } fprintf(stdout, "# writing VKVK data to file %s\n", filename); fprintf(ofs, "# %6d%3d%3d%3d%3d%12.7f%12.7f%21.12f\n", gid, T_global, LX, LY, LZ, g_kappa, g_mu, qlist[iclass][0]); fprintf(ofs, "%3d%3d%3d%25.16e%25.16e%6d\n", 0, 0, 0, corrt[0].re, 0., gid); for(x0=1; x0<(T/2); x0++) { fprintf(ofs, "%3d%3d%3d%25.16e%25.16e%6d\n", 0, 0, x0, corrt[x0].re, corrt[T-x0].re, gid); } fprintf(ofs, "%3d%3d%3d%25.16e%25.16e%6d\n", 0, 0, (T/2), corrt[T/2].re, 0., gid); fflush(ofs); fclose(ofs); retime = (double)clock() / CLOCKS_PER_SEC; fprintf(stdout, "# time to fill correlator %e seconds\n", retime-ratime); } // of loop on classes } // end of loop on gauge id /*************************************** * free the allocated memory, finalize * ***************************************/ if(corrt != NULL) free(corrt); free_geometry(); if(pi00 != NULL) free(pi00); if(pijj != NULL) free(pijj); fftw_destroy_plan(plan_m); finalize_q_orbits(&qid, &qcount, &qlist, &qrep); if(qmap != NULL) { free(qmap[0]); free(qmap); } if(g_cart_id == 0) { g_the_time = time(NULL); fprintf(stdout, "\n# [] %s# [] end of run\n", ctime(&g_the_time)); fprintf(stderr, "\n# [] %s# [] end of run\n", ctime(&g_the_time)); } return(0); }
int main(int argc, char **argv) { int c, mu, nu, status, gid; int filename_set = 0; int l_LX_at, l_LXstart_at; int source_location, have_source_flag = 0; int x0, x1, x2, x3, ix; int sx0, sx1, sx2, sx3; int tsize = 0; double *conn = NULL; double *conn2 = (double*)NULL; int verbose = 0; char filename[800]; double ratime, retime; FILE *ofs; int ivec[4], idx[4], imu; double q[4], wre, wim; fftw_complex *inT=NULL, *outT=NULL, *inL=NULL, *outL=NULL; fftw_plan plan_m_T, plan_m_L; while ((c = getopt(argc, argv, "h?vf:")) != -1) { switch (c) { case 'v': verbose = 1; break; case 'f': strcpy(filename, optarg); filename_set=1; break; case 'h': case '?': default: usage(); break; } } // set the default values set_default_input_values(); if(filename_set==0) strcpy(filename, "cvc.input"); fprintf(stdout, "# [get_corr_v2] reading input parameters from file %s\n", filename); read_input_parser(filename); // some checks on the input data if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) { fprintf(stdout, "# [get_corr_v2] T=%d, LX=%d, LY=%d, LZ=%d\n", T_global, LX, LY, LZ); if(g_proc_id==0) fprintf(stderr, "[get_corr_v2] Error, T and L's must be set\n"); usage(); } // initialize MPI parameters mpi_init(argc, argv); /* initialize fftw, create plan with FFTW_FORWARD --- in contrast to * FFTW_BACKWARD in e.g. avc_exact */ plan_m_T = fftw_create_plan(T_global, FFTW_FORWARD, FFTW_MEASURE); plan_m_L = fftw_create_plan(LX, FFTW_FORWARD, FFTW_MEASURE); T = T_global; Tstart = 0; l_LX_at = LX; l_LXstart_at = 0; FFTW_LOC_VOLUME = T*LX*LY*LZ; fprintf(stdout, "# [%2d] fftw parameters:\n"\ "# [%2d] T = %3d\n"\ "# [%2d] Tstart = %3d\n"\ "# [%2d] l_LX_at = %3d\n"\ "# [%2d] l_LXstart_at = %3d\n"\ "# [%2d] FFTW_LOC_VOLUME = %3d\n", g_cart_id, g_cart_id, T, g_cart_id, Tstart, g_cart_id, l_LX_at, g_cart_id, l_LXstart_at, g_cart_id, FFTW_LOC_VOLUME); if(init_geometry() != 0) { fprintf(stderr, "[get_corr_v2] Error from init_geometry\n"); EXIT(1); } geometry(); /**************************************** * allocate memory for the contractions * ****************************************/ conn = (double*)calloc(32 * VOLUME, sizeof(double)); if( (conn==NULL) ) { fprintf(stderr, "[get_corr_v2] Error, could not allocate memory for contr. fields\n"); EXIT(2); } conn2= (double*)calloc(8 * T, sizeof(double)); if( (conn2==NULL) ) { fprintf(stderr, "[get_corr_v2] Error, could not allocate memory for corr.\n"); EXIT(3); } /***************************************** * prepare Fourier transformation arrays * *****************************************/ inT = (fftw_complex*)malloc(T * sizeof(fftw_complex)); inL = (fftw_complex*)malloc(LX * sizeof(fftw_complex)); outT = (fftw_complex*)malloc(T * sizeof(fftw_complex)); outL = (fftw_complex*)malloc(LX * sizeof(fftw_complex)); if( inT==NULL || inL==NULL || outT==NULL || outL==NULL ) { fprintf(stderr, "[get_corr_v2] Error, could not allocate fftw fields\n"); EXIT(4); } /******************************** * determine source coordinates * ********************************/ /* have_source_flag = (int)(g_source_location/(LX*LY*LZ)>=Tstart && g_source_location/(LX*LY*LZ)<(Tstart+T)); if(have_source_flag==1) fprintf(stdout, "# [get_corr_v2] process %2d has source location\n", g_cart_id); sx0 = g_source_location/(LX*LY*LZ)-Tstart; sx1 = (g_source_location%(LX*LY*LZ)) / (LY*LZ); sx2 = (g_source_location%(LY*LZ)) / LZ; sx3 = (g_source_location%LZ); if(have_source_flag==1) { fprintf(stdout, "# [get_corr_v2] local source coordinates: (%3d,%3d,%3d,%3d)\n", sx0, sx1, sx2, sx3); source_location = g_ipt[sx0][sx1][sx2][sx3]; } have_source_flag = 0; */ for(gid=g_gaugeid; gid<=g_gaugeid2; gid+=g_gauge_step) { memset(conn, 0, 32*VOLUME*sizeof(double)); memset(conn2, 0, 8*T*sizeof(double)); /*********************** * read contractions * ***********************/ ratime = (double)clock() / CLOCKS_PER_SEC; sprintf(filename, "%s.%.4d", filename_prefix, gid); if(format==2 || format==3) { status = read_contraction(conn, NULL, filename, 16); } else if( format==0) { status = read_lime_contraction(conn, filename, 16, 0); } if(status != 0) { // fprintf(stderr, "[get_corr_v2] Error from read_contractions, status was %d\n", status); // EXIT(5); fprintf(stderr, "[get_corr_v2] Warning, could not read contractions for gid %d, status was %d\n", gid, status); continue; } retime = (double)clock() / CLOCKS_PER_SEC; fprintf(stdout, "# [get_corr_v2] time to read contractions %e seconds\n", retime-ratime); // TEST Pi_mm /* fprintf(stdout, "# [get_corr_v2] Pi_mm\n"); for(x0=0; x0<T; x0++) { for(x1=0; x1<LX; x1++) { for(x2=0; x2<LY; x2++) { for(x3=0; x3<LZ; x3++) { ix = g_ipt[x0][x1][x2][x3]; for(nu=0;nu<4;nu++) { wre = conn[_GWI(5*nu,ix,VOLUME)]; wim = conn[_GWI(5*nu,ix,VOLUME)+1]; fprintf(stdout, "\t%3d%3d%3d%3d%3d%16.7e%16.7e\n", nu, x0, x1, x2, x3, wre, wim); } }}}} */ // TEST Ward Identity /* fprintf(stdout, "# [get_corr_v2] Ward identity\n"); for(x0=0; x0<T; x0++) { q[0] = 2. * sin(M_PI * (double)x0 / (double)T); for(x1=0; x1<LX; x1++) { q[1] = 2. * sin(M_PI * (double)x1 / (double)LX); for(x2=0; x2<LY; x2++) { q[2] = 2. * sin(M_PI * (double)x2 / (double)LY); for(x3=0; x3<LZ; x3++) { q[3] = 2. * sin(M_PI * (double)x3 / (double)LZ); ix = g_ipt[x0][x1][x2][x3]; for(nu=0;nu<4;nu++) { wre = q[0] * conn[_GWI(4*0+nu,ix,VOLUME)] + q[1] * conn[_GWI(4*1+nu,ix,VOLUME)] \ + q[2] * conn[_GWI(4*2+nu,ix,VOLUME)] + q[3] * conn[_GWI(4*3+nu,ix,VOLUME)]; wim = q[0] * conn[_GWI(4*0+nu,ix,VOLUME)+1] + q[1] * conn[_GWI(4*1+nu,ix,VOLUME)+1] \ + q[2] * conn[_GWI(4*2+nu,ix,VOLUME)+1] + q[3] * conn[_GWI(4*3+nu,ix,VOLUME)+1]; fprintf(stdout, "\t%3d%3d%3d%3d%3d%16.7e%16.7e\n", nu, x0, x1, x2, x3, wre, wim); } }}}} */ /*********************** * fill the correlator * ***********************/ ratime = (double)clock() / CLOCKS_PER_SEC; for(mu=0; mu<4; mu++) { ivec[0] = (0 + mu)%4; ivec[1] = (1 + mu)%4; ivec[2] = (2 + mu)%4; ivec[3] = (3 + mu)%4; idx[ivec[1]] = 0; idx[ivec[2]] = 0; idx[ivec[3]] = 0; tsize = (mu==0) ? T : LX; for(x0=0; x0<tsize; x0++) { idx[ivec[0]] = x0; for(nu=1; nu<4; nu++) { imu = (mu+nu) % 4; // ix = get_indexf(idx[0],idx[1],idx[2],idx[3],imu,imu); ix = _GWI(5*imu, g_ipt[idx[0]][idx[1]][idx[2]][idx[3]], VOLUME); // TEST //fprintf(stdout, "\tPi_%d_%d x0=%3d mu=%3d\tix=%8d\n", mu, mu, x0, imu, ix); conn2[2*(mu*T+x0) ] += conn[ix ]; conn2[2*(mu*T+x0)+1] += conn[ix+1]; } } } retime = (double)clock() / CLOCKS_PER_SEC; fprintf(stdout, "# [get_corr_v2] time to fill correlator %e seconds\n", retime-ratime); // TEST /* fprintf(stdout, "# [get_corr_v2] correlators\n"); for(mu=0;mu<4;mu++) { for(x0=0; x0<T; x0++) { fprintf(stdout, "\t%3d%3d%25.16e%25.16e\n", mu, x0, conn2[2*(mu*T+x0)], conn2[2*(mu*T+x0)+1]); }} */ /***************************************** * reverse Fourier transformation *****************************************/ ratime = (double)clock() / CLOCKS_PER_SEC; memcpy((void*)inT, (void*)conn2, 2*T*sizeof(double)); fftw_one(plan_m_T, inT, outT); for(ix=0; ix<T; ix++) { conn2[2*ix ] = outT[ix].re / (double)T; conn2[2*ix+1] = outT[ix].im / (double)T; } for(mu=1; mu<4; mu++) { memcpy((void*)inL, (void*)(conn2+2*mu*T), 2*LX*sizeof(double)); fftw_one(plan_m_L, inL, outL); for(ix=0; ix<LX; ix++) { conn2[2*(mu*T+ix) ] = outL[ix].re / (double)LX; conn2[2*(mu*T+ix)+1] = outL[ix].im / (double)LX; } } retime = (double)clock() / CLOCKS_PER_SEC; fprintf(stdout, "# [get_corr_v2] time for Fourier transform %e seconds\n", retime-ratime); ratime = (double)clock() / CLOCKS_PER_SEC; sprintf(filename, "v0v0_corr.%.4d", gid); if( (ofs=fopen(filename, "w")) == (FILE*)NULL ) { fprintf(stderr, "[get_corr_v2] Error, could not open file %s for writing\n", filename); EXIT(6); } x0 = 0; fprintf(ofs, "%3d%3d%3d%25.16e%25.16e%6d\n", 5, 1, x0, conn2[2*x0], 0., gid); for(x0=1; x0<T/2; x0++) { fprintf(ofs, "%3d%3d%3d%25.16e%25.16e%6d\n", 5, 1, x0, conn2[2*x0], conn2[2*(T-x0)], gid); } x0 = T / 2; fprintf(ofs, "%3d%3d%3d%25.16e%25.16e%6d\n", 5, 1, x0, conn2[2*x0], 0., gid); fclose(ofs); for(mu=1; mu<4; mu++) { sprintf(filename, "v%dv%d_corr.%.4d", mu, mu, gid); if( (ofs=fopen(filename, "w")) == (FILE*)NULL ) { fprintf(stderr, "[get_corr_v2] Error, could not open file %s for writing\n", filename); EXIT(7); } x0 = 0; fprintf(ofs, "%3d%3d%3d%25.16e%25.16e%6d\n", 5, 1, x0, conn2[2*(mu*T+x0)], 0., gid); for(x0=1; x0<LX/2; x0++) { fprintf(ofs, "%3d%3d%3d%25.16e%25.16e%6d\n", 5, 1, x0, conn2[2*(mu*T+x0)], conn2[2*(mu*T+ LX-x0)], gid); } x0 = LX / 2; fprintf(ofs, "%3d%3d%3d%25.16e%25.16e%6d\n", 5, 1, x0, conn2[2*(mu*T+x0)], 0., gid); fclose(ofs); } retime = (double)clock() / CLOCKS_PER_SEC; fprintf(stdout, "# [get_corr_v2] time to write correlator %e seconds\n", retime-ratime); } // of loop on gid /*************************************** * free the allocated memory, finalize * ***************************************/ free_geometry(); fftw_free(inT); fftw_free(outT); fftw_free(inL); fftw_free(outL); free(conn); free(conn2); fftw_destroy_plan(plan_m_T); fftw_destroy_plan(plan_m_L); fprintf(stdout, "# [get_corr_v2] %s# [get_corr_v2] end of run\n", ctime(&g_the_time)); fflush(stdout); fprintf(stderr, "[get_corr_v2] %s[get_corr_v2] end of run\n", ctime(&g_the_time)); fflush(stderr); return(0); }
int main(int argc, char **argv) { int c, mu; int filename_set = 0; int l_LX_at, l_LXstart_at; int source_location, have_source_flag = 0; int x0, ix; int sx0, sx1, sx2, sx3; int check_WI=0; double *conn = (double*)NULL; double *conn2 = (double*)NULL; int verbose = 0; char filename[800]; double ratime, retime; FILE *ofs; /************************** * variables for WI check */ int x1, x2, x3, nu; double wre, wim, q[4]; /**************************/ fftw_complex *in=(fftw_complex*)NULL, *out=(fftw_complex*)NULL; fftw_plan plan_m; while ((c = getopt(argc, argv, "wh?vf:")) != -1) { switch (c) { case 'v': verbose = 1; break; case 'f': strcpy(filename, optarg); filename_set=1; break; case 'w': check_WI = 1; fprintf(stdout, "# [get_rho_corr] check WI in momentum space\n"); break; case 'h': case '?': default: usage(); break; } } /* set the default values */ set_default_input_values(); if(filename_set==0) strcpy(filename, "cvc.input"); // set the default values set_default_input_values(); if(filename_set==0) strcpy(filename, "cvc.input"); fprintf(stdout, "# [get_rho_corr] reading input parameters from file %s\n", filename); read_input_parser(filename); /* some checks on the input data */ if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) { if(g_proc_id==0) fprintf(stdout, "T and L's must be set\n"); usage(); } if(g_kappa == 0.) { if(g_proc_id==0) fprintf(stdout, "kappa should be > 0.n"); usage(); } /* initialize MPI parameters */ mpi_init(argc, argv); /* initialize fftw, create plan with FFTW_FORWARD --- in contrast to * FFTW_BACKWARD in e.g. avc_exact */ plan_m = fftw_create_plan(T_global, FFTW_FORWARD, FFTW_MEASURE); T = T_global; Tstart = 0; l_LX_at = LX; l_LXstart_at = 0; FFTW_LOC_VOLUME = T*LX*LY*LZ; fprintf(stdout, "# [%2d] fftw parameters:\n"\ "# [%2d] T = %3d\n"\ "# [%2d] Tstart = %3d\n"\ "# [%2d] l_LX_at = %3d\n"\ "# [%2d] l_LXstart_at = %3d\n"\ "# [%2d] FFTW_LOC_VOLUME = %3d\n", g_cart_id, g_cart_id, T, g_cart_id, Tstart, g_cart_id, l_LX_at, g_cart_id, l_LXstart_at, g_cart_id, FFTW_LOC_VOLUME); if(init_geometry() != 0) { fprintf(stderr, "ERROR from init_geometry\n"); exit(1); } geometry(); /**************************************** * allocate memory for the contractions * ****************************************/ conn = (double*)calloc(2 * 16 * VOLUME, sizeof(double)); if( (conn==(double*)NULL) ) { fprintf(stderr, "could not allocate memory for contr. fields\n"); exit(3); } for(ix=0; ix<32*VOLUME; ix++) conn[ix] = 0.; conn2= (double*)calloc(2 * T, sizeof(double)); if( (conn2==(double*)NULL) ) { fprintf(stderr, "could not allocate memory for corr.\n"); exit(2); } for(ix=0; ix<2*T; ix++) conn2[ix] = 0.; /***************************************** * prepare Fourier transformation arrays * *****************************************/ in = (fftw_complex*)malloc(T*sizeof(fftw_complex)); out = (fftw_complex*)malloc(T*sizeof(fftw_complex)); if( (in==(fftw_complex*)NULL) || (out==(fftw_complex*)NULL) ) exit(4); /******************************** * determine source coordinates * ********************************/ have_source_flag = (int)(g_source_location/(LX*LY*LZ)>=Tstart && g_source_location/(LX*LY*LZ)<(Tstart+T)); if(have_source_flag==1) fprintf(stdout, "process %2d has source location\n", g_cart_id); sx0 = g_source_location/(LX*LY*LZ)-Tstart; sx1 = (g_source_location%(LX*LY*LZ)) / (LY*LZ); sx2 = (g_source_location%(LY*LZ)) / LZ; sx3 = (g_source_location%LZ); if(have_source_flag==1) { fprintf(stdout, "local source coordinates: (%3d,%3d,%3d,%3d)\n", sx0, sx1, sx2, sx3); source_location = g_ipt[sx0][sx1][sx2][sx3]; } have_source_flag = 0; /*********************** * read contractions * ***********************/ ratime = (double)clock() / CLOCKS_PER_SEC; // read_contraction(conn, (int*)NULL, filename_prefix, 16); read_lime_contraction(conn, filename_prefix, 16, 0); retime = (double)clock() / CLOCKS_PER_SEC; fprintf(stdout, "time to read contractions %e seconds\n", retime-ratime); // TEST Ward Identity if(check_WI) { fprintf(stdout, "# [get_corr_v5] Ward identity\n"); sprintf(filename, "WI.%.4d", Nconf); ofs = fopen(filename, "w"); if(ofs == NULL) exit(32); for(x0=0; x0<T; x0++) { q[0] = 2. * sin(M_PI * (double)x0 / (double)T); for(x1=0; x1<LX; x1++) { q[1] = 2. * sin(M_PI * (double)x1 / (double)LX); for(x2=0; x2<LY; x2++) { q[2] = 2. * sin(M_PI * (double)x2 / (double)LY); for(x3=0; x3<LZ; x3++) { q[3] = 2. * sin(M_PI * (double)x3 / (double)LZ); ix = g_ipt[x0][x1][x2][x3]; for(nu=0;nu<4;nu++) { wre = q[0] * conn[_GWI(4*0+nu,ix,VOLUME)] + q[1] * conn[_GWI(4*1+nu,ix,VOLUME)] \ + q[2] * conn[_GWI(4*2+nu,ix,VOLUME)] + q[3] * conn[_GWI(4*3+nu,ix,VOLUME)]; wim = q[0] * conn[_GWI(4*0+nu,ix,VOLUME)+1] + q[1] * conn[_GWI(4*1+nu,ix,VOLUME)+1] \ + q[2] * conn[_GWI(4*2+nu,ix,VOLUME)+1] + q[3] * conn[_GWI(4*3+nu,ix,VOLUME)+1]; fprintf(ofs, "\t%3d%3d%3d%3d%3d%16.7e%16.7e\n", nu, x0, x1, x2, x3, wre, wim); } }}}} fclose(ofs); } /*********************** * fill the correlator * ***********************/ ratime = (double)clock() / CLOCKS_PER_SEC; for(x0=0; x0<T; x0++) { for(mu=1; mu<4; mu++) { ix = get_indexf(x0,0,0,0,mu,mu); fprintf(stdout, "x0=%3d, mu=%3d\tix=%8d\n", x0, mu, ix); conn2[2*x0 ] += conn[ix ]; conn2[2*x0+1] += conn[ix+1]; } } retime = (double)clock() / CLOCKS_PER_SEC; fprintf(stdout, "time to fill correlator %e seconds\n", retime-ratime); /******************************** * test: print correl to stdout * ********************************/ for(x0=0; x0<T; x0++) { fprintf(stdout, "%3d%25.16e%25.16e\n", x0, conn2[2*x0], conn[2*x0+1]); } /***************************************** * do the reverse Fourier transformation * *****************************************/ ratime = (double)clock() / CLOCKS_PER_SEC; memcpy((void*)in, (void*)conn2, 2*T*sizeof(double)); fftw_one(plan_m, in, out); for(ix=0; ix<T; ix++) { conn2[2*ix ] = out[ix].re / (double)T; conn2[2*ix+1] = out[ix].im / (double)T; } retime = (double)clock() / CLOCKS_PER_SEC; fprintf(stdout, "time for Fourier transform %e seconds\n", retime-ratime); ratime = (double)clock() / CLOCKS_PER_SEC; sprintf(filename, "rho_corr.%.4d", Nconf); if( (ofs=fopen(filename, "w")) == (FILE*)NULL ) { fprintf(stderr, "could not open file %s for writing\n", filename); exit(5); } //for(x0=0; x0<T; x0++) { // fprintf(ofs, "%3d%25.16e%25.16e\n", x0, conn2[2*x0], conn2[2*x0+1]); //} x0 = 0; fprintf(ofs, "%3d%3d%3d%25.16e%25.16e%6d\n", 5, 1, x0, conn2[2*x0], 0., Nconf); for(x0=1; x0<T/2; x0++) { fprintf(ofs, "%3d%3d%3d%25.16e%25.16e%6d\n", 5, 1, x0, conn2[2*x0], conn2[2*(T-x0)], Nconf); } x0 = T/2; fprintf(ofs, "%3d%3d%3d%25.16e%25.16e%6d\n", 5, 1, x0, conn2[2*x0], 0., Nconf); fclose(ofs); retime = (double)clock() / CLOCKS_PER_SEC; fprintf(stdout, "time to write correlator %e seconds\n", retime-ratime); /*************************************** * free the allocated memory, finalize * ***************************************/ free_geometry(); fftw_free(in); fftw_free(out); free(conn); free(conn2); fftw_destroy_plan(plan_m); return(0); }
int main(int argc, char **argv) { int c, i, mu, nu; int count = 0; int filename_set = 0; //int use_real_part = 1; int ix, iix; int sid, status, gid, it, ir, it2; double *disc = (double*)NULL; double *work = (double*)NULL; double *bias = (double*)NULL; //double fnorm; int verbose = 0; unsigned int VOL3; char filename[100]; double ratime, retime; double *tmp = NULL; complex w; FILE *ofs = NULL; #ifdef MPI MPI_Init(&argc, &argv); #endif while ((c = getopt(argc, argv, "h?vf:")) != -1) { switch (c) { case 'v': verbose = 1; break; case 'f': strcpy(filename, optarg); filename_set=1; break; case 'h': case '?': default: usage(); break; } } g_the_time = time(NULL); fprintf(stdout, "# [pi_ud_tp0] using global time stamp %s", ctime(&g_the_time)); /* set the default values */ if(filename_set==0) strcpy(filename, "cvc.input"); fprintf(stdout, "# Reading input from file %s\n", filename); read_input_parser(filename); /* some checks on the input data */ if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) { if(g_proc_id==0) fprintf(stdout, "T and L's must be set\n"); usage(); } fprintf(stdout, "# [pi_ud_tp0] **************************************************\n"); fprintf(stdout, "# [pi_ud_tp0] pi_ud_p\n"); fprintf(stdout, "# [pi_ud_tp0] **************************************************\n\n"); /********************************* * initialize MPI parameters *********************************/ mpi_init(argc, argv); #ifdef MPI if(T==0) { fprintf(stderr, "[%2d] local T is zero; exit\n", g_cart_id); EXIT(2); } #endif if(init_geometry() != 0) { fprintf(stderr, "ERROR from init_geometry\n"); EXIT(1); } geometry(); /**************************************** * allocate memory for the contractions ****************************************/ disc = (double*)calloc(16*VOLUME, sizeof(double)); if( disc == (double*)NULL ) { fprintf(stderr, "could not allocate memory for disc\n"); EXIT(3); } work = (double*)calloc(2*T_global, sizeof(double)); if( work == (double*)NULL ) { fprintf(stderr, "[pi_ud_tp0] could not allocate memory for work\n"); EXIT(5); } bias = (double*)calloc(2*T_global, sizeof(double)); if( bias == (double*)NULL ) { fprintf(stderr, "[pi_ud_tp0] could not allocate memory for bias\n"); EXIT(6); } tmp = (double*)calloc(2*T_global, sizeof(double)); if( tmp == (double*)NULL ) { fprintf(stderr, "[pi_ud_tp0] could not allocate memory for tmp\n"); EXIT(8); } /*********************************************** * start loop on gauge id.s ***********************************************/ for(gid=g_gaugeid; gid<=g_gaugeid2; gid+=g_gauge_step) { memset(work, 0, 2*T_global*sizeof(double)); memset(bias, 0, 2*T_global*sizeof(double)); count = 0; /*********************************************** * start loop on source id.s ***********************************************/ for(sid=g_sourceid; sid<=g_sourceid2; sid+=g_sourceid_step) { memset(disc, 0, 16*VOLUME*sizeof(double)); ratime = CLOCK; sprintf(filename, "jc_ud_x.%.4d.%.4d", gid, sid); status = read_lime_contraction(disc, filename, 4, 0); if(status!=0) { fprintf(stderr, "Error, could not read contraction data from file %s\n", filename); EXIT(7); } retime = CLOCK; if(g_cart_id==0) fprintf(stdout, "# time to read contractions: %e seconds\n", retime-ratime); count++; ratime = CLOCK; // add current to sum for(it=0; it<T; it++) { tmp[2*it ] = 0.; tmp[2*it+1] = 0.; for(iix=0; iix<VOL3; iix++) { ix = it * VOL3 + iix; tmp[2*it ] += disc[_GWI(1,ix,VOLUME) ] + disc[_GWI(2,ix,VOLUME) ] + disc[_GWI(3,ix,VOLUME) ]; tmp[2*it+1] += disc[_GWI(1,ix,VOLUME)+1] + disc[_GWI(2,ix,VOLUME)+1] + disc[_GWI(3,ix,VOLUME)+1]; } } for(it=0; it<2*T_global; it++) { work[it] += tmp[it]; } // add to bias for(it=0; it<T_global; it++) { for(ir=0; ir<T_global; ir++) { it2 = (it + ir ) % T_global; _co_eq_co_ti_co( &w, (complex*)&(tmp[2*it2]), (complex*)&(tmp[2*it]) ); bias[2*it ] += w.re; bias[2*it+1] += w.im; }} retime = CLOCK; if(g_cart_id==0) fprintf(stdout, "# [pi_ud_tp0] time to calculate contractions: %e seconds\n", retime-ratime); if(count==Nsave) { memset(disc, 0, 2*T_global*sizeof(double)); for(it=0; it<T_global; it++) { for(ir=0; ir<T_global; ir++) { it2 = (it + ir ) % T_global; _co_eq_co_ti_co( &w, (complex*)&(work[2*it2]), (complex*)&(work[2*it]) ); disc[2*it ] += w.re; disc[2*it+1] += w.im; }} for(it=0; it<2*T_global; it++) { disc[it] -= bias[it]; } sprintf(filename, "pi_ud_t.%.4d.%.4d", gid, count); ofs = fopen(filename, "w"); if(ofs == NULL) { fprintf(stderr, "[pi_ud_tp0] Error, could not open file %s for writing\n", filename); EXIT(8); } fprintf(ofs, "# [pi_ud_tp0] results for disc. t-dependent correlator at zero spatial momentum\n# %s", ctime(&g_the_time)); fprintf(ofs, "%3d%3d%3d%25.16e%25.16e%6d\n", 11, 1, 0, disc[0], 0., Nconf); for(it=1; it<T_global/2; it++) { fprintf(ofs, "%3d%3d%3d%25.16e%25.16e%6d\n", 11, 1, it, disc[it], disc[2*(T_global-it)], Nconf); } fprintf(ofs, "%3d%3d%3d%25.16e%25.16e%6d\n", 11, 1, T_global/2, disc[T_global/2], 0., Nconf); fclose(ofs); retime = CLOCK; if(g_cart_id==0) fprintf(stdout, "# [pi_ud_tp0] time to save cvc results: %e seconds\n", retime-ratime); } // of count % Nsave == 0 } // of loop on sid } // of loop on gid /*********************************************** * free the allocated memory, finalize ***********************************************/ free_geometry(); if(disc != NULL) free(disc); if(work != NULL) free(work); if(bias != NULL) free(bias); if(tmp != NULL) free(tmp); if(g_cart_id == 0) { fprintf(stdout, "# [pi_ud_tp0] %s# [pi_ud_tp0] end of run\n", ctime(&g_the_time)); fflush(stdout); fprintf(stderr, "# [pi_ud_tp0] %s# [pi_ud_tp0] end of run\n", ctime(&g_the_time)); fflush(stderr); } #ifdef MPI MPI_Finalize(); #endif return(0); }
int main(int argc, char **argv) { int Thm1; int c, i, mu, nthreads; int count = 0; int filename_set = 0; int l_LX_at, l_LXstart_at; int x0, x1, y0; int ix, iy, idx1, idx2; int VOL3; int sid1, sid2, status, gid; size_t nprop=0; double *data=NULL, *data2=NULL, *data3=NULL; double fnorm; double *mom2=NULL, *mom4=NULL; char filename[100]; double ratime, retime; FILE *ofs=NULL; /**************************************** * initialize the distance vectors ****************************************/ while ((c = getopt(argc, argv, "h?f:")) != -1) { switch (c) { case 'f': strcpy(filename, optarg); filename_set=1; break; case 'h': case '?': default: usage(); break; } } /* set the default values */ if(filename_set==0) strcpy(filename, "cvc.input"); fprintf(stdout, "# Reading input from file %s\n", filename); read_input_parser(filename); /* some checks on the input data */ if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) { if(g_proc_id==0) fprintf(stdout, "T and L's must be set\n"); usage(); } if(g_kappa == 0.) { if(g_proc_id==0) fprintf(stdout, "kappa should be > 0.n"); usage(); } fprintf(stdout, "\n**************************************************\n"); fprintf(stdout, "* jc_corr\n"); fprintf(stdout, "**************************************************\n\n"); T = T_global; Thm1 = T / 2 - 1; Tstart = 0; l_LX_at = LX; l_LXstart_at = 0; VOL3 = LX*LY*LZ; fprintf(stdout, "# [%2d] parameters:\n"\ "# T = %3d\n"\ "# Tstart = %3d\n"\ "# l_LX_at = %3d\n"\ "# l_LXstart_at = %3d\n"\ "# FFTW_LOC_VOLUME = %3d\n", g_cart_id, T, Tstart, l_LX_at, l_LXstart_at, FFTW_LOC_VOLUME); if(init_geometry() != 0) { fprintf(stderr, "ERROR from init_geometry\n"); exit(1); } geometry(); /**************************************** * allocate memory for the contractions ****************************************/ nprop = (size_t)(g_sourceid2 - g_sourceid) / (size_t)g_sourceid_step + 1; fprintf(stdout, "\n# [jc_corr] number of stoch. propagators = %lu\n", nprop); data = (double*)calloc(8*FFTW_LOC_VOLUME, sizeof(double)); if( data==NULL ) { fprintf(stderr, "could not allocate memory for data\n"); exit(3); } /* nprop * T * 3(i=1,2,3) * 2(real and imaginary part) */ data2 = (double*)calloc(nprop*T*6, sizeof(double)); if( data2==NULL ) { fprintf(stderr, "could not allocate memory for data2\n"); exit(3); } data3 = (double*)calloc(2*T, sizeof(double)); if( data3==NULL ) { fprintf(stderr, "could not allocate memory for data3\n"); exit(3); } fnorm = 1. / ( (double)nprop * (double)(nprop-1) * (double)(LX*LY*LZ) ); fprintf(stdout, "\n# [jc_corr] fnorm = %25.16e\n", fnorm); for(ix=0; ix<nprop*T; ix++) data2[ix] = 0.; /*********************************************** * start loop on gauge id.s ***********************************************/ for(gid=g_gaugeid; gid<=g_gaugeid2; gid++) { /* calculate the t-dependent current at zero spatial momentum */ for(sid1=0; sid1<nprop; sid1++) { sprintf(filename, "jc_ud_x.%.4d.%.4d", gid, g_sourceid + sid1*g_sourceid_step); if(read_lime_contraction(data, filename, 4, 0) != 0) { fprintf(stderr, "\n[jc_corr] Error, could not read field no. %d\n", sid1); exit(15); } for(mu=0;mu<3;mu++) { for(x0=0;x0<T;x0++) { ix = g_ipt[x0][0][0][0]; ix = _GWI(5*(mu+1), ix, VOLUME); for(iy=0;iy<VOL3;iy++) { data2[2*(sid1*3*T + mu*T + x0) ] += data[ix + 2*iy ]; data2[2*(sid1*3*T + mu*T + x0)+1] += data[ix + 2*iy+1]; } } } } /*********************************************** * calculate the correlator * - remember: x1 is the time difference of the correlator, * x0 and y0 are the time coordinates of the currents ***********************************************/ ratime = (double)clock() / CLOCKS_PER_SEC; for(i=0;i<2*T; i++) data3[i] = 0.; for(sid1=0; sid1<nprop-1; sid1++) { for(sid2=sid1+1; sid2<nprop; sid2++) { for(y0=0;y0<T;y0++) { for(x1=0;x1<T;x1++) { x0 = (y0 + x1) % T; // first component idx1 = 2 * ( sid1*3*T + 0*T + y0 ); idx2 = 2 * ( sid2*3*T + 0*T + x0 ); // real part of the product data3[2*x1 ] += data2[idx1 ] * data2[idx2 ] - data2[idx1+1]*data2[idx2+1]; // imaginary part of the product data3[2*x1+1] += data2[idx1+1] * data2[idx2 ] + data2[idx1 ]*data2[idx2+1]; // second component idx1 = 2 * ( sid1*3*T + 1*T + y0 ); idx2 = 2 * ( sid2*3*T + 1*T + x0 ); // real part of the product data3[2*x1 ] += data2[idx1 ] * data2[idx2 ] - data2[idx1+1]*data2[idx2+1]; // imaginary part of the product data3[2*x1+1] += data2[idx1+1] * data2[idx2 ] + data2[idx1 ]*data2[idx2+1]; // third component idx1 = 2 * ( sid1*3*T + 2*T + y0 ); idx2 = 2 * ( sid2*3*T + 2*T + x0 ); // real part of the product data3[2*x1 ] += data2[idx1 ] * data2[idx2 ] - data2[idx1+1]*data2[idx2+1]; // imaginary part of the product data3[2*x1+1] += data2[idx1+1] * data2[idx2 ] + data2[idx1 ]*data2[idx2+1]; } } }} // of sid2 and sid1 // normalization for(x0=0;x0<2*T;x0++) { data3[x0] *= fnorm; } for(x0=0;x0<T/2-1;x0++) { mom2[x0] = 0.; mom4[x0] = 0.; } for(x0=1;x0<T/2;x0++) { if(x0==1) { mom2[0] = ( data3[2*x0] + data3[2*(T-x0)] ) * (double)(x0*x0); mom4[0] = ( data3[2*x0] + data3[2*(T-x0)] ) * (double)(x0*x0*x0*x0); } else { mom2[x0-1] = mom2[x0-2] + ( data3[2*x0] + data3[2*(T-x0)] ) * (double)(x0*x0); mom4[x0-1] = mom4[x0-2] + ( data3[2*x0] + data3[2*(T-x0)] ) * (double)(x0*x0*x0*x0); } } for(i=0;i<Thm1;i++) mom2[i] /= 6.; for(i=0;i<Thm1;i++) mom4[i] /= 72.; /************************************************ * save results in position space ************************************************/ sprintf(filename, "pi_ud_tp0.%4d.%.4d", gid, nprop); ofs = fopen(filename, "w"); if (ofs==NULL) { fprintf(stderr, "\n[jc_corr] Error, could not open file %s for writing\n", filename); exit(9); } fprintf(ofs, "0 1 0%25.16e%25.16e%d\n", data3[0], 0., gid); for(x0=1;x0<=Thm1;x0++) fprintf(ofs, "0 1 %2d%25.16e%25.16e%d\n", x0, data3[x0], data3[T-x0], gid); fprintf(ofs, "0 1 %2d%25.16e%25.16e%d\n", x0, data3[x0], 0., gid); fclose(ofs); sprintf(filename, "pi_ud_mom.%4d.%.4d", gid, nprop); ofs = fopen(filename, "w"); if (ofs==NULL) { fprintf(stderr, "\n[jc_corr] Error, could not open file %s for writing\n", filename); exit(9); } for(i=0;i<Thm1;i++) fprintf(ofs, "%2d%25.16e%25.16e\n", i, mom2[i], mom4[i]); fclose(ofs); retime = (double)clock() / CLOCKS_PER_SEC; if(g_cart_id == 0) fprintf(stdout, "# time for building correl.: %e seconds\n", retime-ratime); } /* of loop on gid */ /*********************************************** * free the allocated memory, finalize ***********************************************/ free_geometry(); free(data); free(data2); free(data3); free(mom2); free(mom4); return(0); }
int main(int argc, char **argv) { int c, mu, status; int filename_set = 0; int mode = 0; int l_LX_at, l_LXstart_at; int x0, x1, x2, x3, ix, iix, iiy, gid; int Thp1, nclass; int *oh_count=(int*)NULL, *oh_id=(int*)NULL, oh_nc; int *picount; double *conn = (double*)NULL; double *conn2 = (double*)NULL; double **oh_val=(double**)NULL; double q[4], qsqr; int verbose = 0; char filename[800]; double ratime, retime; FILE *ofs; fftw_complex *corrt=NULL; fftw_complex *pi00=(fftw_complex*)NULL, *pijj=(fftw_complex*)NULL, *piavg=(fftw_complex*)NULL; fftw_plan plan_m; while ((c = getopt(argc, argv, "h?vf:m:")) != -1) { switch (c) { case 'v': verbose = 1; break; case 'f': strcpy(filename, optarg); filename_set=1; break; case 'm': mode = atoi(optarg); break; case 'h': case '?': default: usage(); break; } } /* set the default values */ if(filename_set==0) strcpy(filename, "cvc.input"); fprintf(stdout, "# Reading input from file %s\n", filename); read_input_parser(filename); /* some checks on the input data */ if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) { if(g_proc_id==0) fprintf(stdout, "T and L's must be set\n"); usage(); } /* initialize MPI parameters */ mpi_init(argc, argv); /* initialize fftw, create plan with FFTW_FORWARD --- in contrast to * FFTW_BACKWARD in e.g. avc_exact */ plan_m = fftw_create_plan(T_global, FFTW_FORWARD, FFTW_MEASURE | FFTW_IN_PLACE); if(plan_m==NULL) { fprintf(stderr, "Error, could not create fftw plan\n"); return(1); } T = T_global; Thp1 = T/2 + 1; Tstart = 0; l_LX_at = LX; l_LXstart_at = 0; FFTW_LOC_VOLUME = T*LX*LY*LZ; fprintf(stdout, "# [%2d] fftw parameters:\n"\ "# [%2d] T = %3d\n"\ "# [%2d] Tstart = %3d\n"\ "# [%2d] l_LX_at = %3d\n"\ "# [%2d] l_LXstart_at = %3d\n"\ "# [%2d] FFTW_LOC_VOLUME = %3d\n", g_cart_id, g_cart_id, T, g_cart_id, Tstart, g_cart_id, l_LX_at, g_cart_id, l_LXstart_at, g_cart_id, FFTW_LOC_VOLUME); if(init_geometry() != 0) { fprintf(stderr, "ERROR from init_geometry\n"); exit(1); } geometry(); /**************************************** * allocate memory for the contractions * ****************************************/ conn = (double*)calloc(32*VOLUME, sizeof(double)); if( (conn==(double*)NULL) ) { fprintf(stderr, "could not allocate memory for contr. fields\n"); exit(3); } /* conn2 = (double*)calloc(32*VOLUME, sizeof(double)); if( (conn2==(double*)NULL) ) { fprintf(stderr, "could not allocate memory for contr. fields\n"); exit(4); } pi00 = (fftw_complex*)malloc(VOLUME*sizeof(fftw_complex)); if( (pi00==(fftw_complex*)NULL) ) { fprintf(stderr, "could not allocate memory for pi00\n"); exit(2); } pijj = (fftw_complex*)fftw_malloc(VOLUME*sizeof(fftw_complex)); if( (pijj==(fftw_complex*)NULL) ) { fprintf(stderr, "could not allocate memory for pijj\n"); exit(2); } */ corrt = fftw_malloc(T*sizeof(fftw_complex)); for(gid=g_gaugeid; gid<=g_gaugeid2; gid++) { // for(ix=0; ix<VOLUME; ix++) {pi00[ix].re = 0.; pi00[ix].im = 0.;} // for(ix=0; ix<VOLUME; ix++) {pijj[ix].re = 0.; pijj[ix].im = 0.;} /*********************** * read contractions * ***********************/ ratime = (double)clock() / CLOCKS_PER_SEC; sprintf(filename, "%s", filename_prefix); fprintf(stdout, "# Reading data from file %s\n", filename); status = read_lime_contraction(conn, filename, 16, 0); if(status == 106) { fprintf(stderr, "Error: could not read from file %s; status was %d\n", filename, status); continue; } /* sprintf(filename, "%s.%.4d.%.4d", filename_prefix2, gid); fprintf(stdout, "# Reading data from file %s\n", filename); status = read_lime_contraction(conn2, filename, 16, 0); if(status == 106) { fprintf(stderr, "Error: could not read from file %s; status was %d\n", filename, status); continue; } */ retime = (double)clock() / CLOCKS_PER_SEC; fprintf(stdout, "# time to read contractions %e seconds\n", retime-ratime); /*********************** * fill the correlator * ***********************/ ratime = (double)clock() / CLOCKS_PER_SEC; /* for(x1=0; x1<LX; x1++) { for(x2=0; x2<LY; x2++) { for(x3=0; x3<LZ; x3++) { for(x0=0; x0<T; x0++) { iix = g_ipt[0][x1][x2][x3]*T+x0; for(mu=1; mu<4; mu++) { ix = _GWI(5*mu,g_ipt[x0][x1][x2][x3],VOLUME); pijj[iix].re += ( conn[ix ] - conn2[ix ] ) * (double)Nsave / (double)(Nsave-1); pijj[iix].im += ( conn[ix+1] - conn2[ix+1] ) * (double)Nsave / (double)(Nsave-1); } ix = 2*g_ipt[x0][x1][x2][x3]; pi00[iix].re += ( conn[ix ] - conn2[ix ] ) * (double)Nsave / (double)(Nsave-1); pi00[iix].im += ( conn[ix+1] - conn2[ix+1] ) * (double)Nsave / (double)(Nsave-1); } }}} */ for(x0=0; x0<T; x0++) { ix = g_ipt[x0][0][0][0]; corrt[x0].re = conn[_GWI(5,ix,VOLUME) ] + conn[_GWI(10,ix,VOLUME) ] + conn[_GWI(15,ix,VOLUME) ]; corrt[x0].im = conn[_GWI(5,ix,VOLUME)+1] + conn[_GWI(10,ix,VOLUME)+1] + conn[_GWI(15,ix,VOLUME)+1]; corrt[x0].re /= (double)T; corrt[x0].im /= (double)T; } /* fftw(plan_m, 1, corrt, 1, T, (fftw_complex*)NULL, 0, 0); */ fftw_one(plan_m, corrt, NULL); sprintf(filename, "rho.%.4d", gid); if( (ofs=fopen(filename, "w")) == (FILE*)NULL ) { fprintf(stderr, "Error: could not open file %s for writing\n", filename); exit(5); } fprintf(stdout, "# writing VKVK data to file %s\n", filename); fprintf(ofs, "# %6d%3d%3d%3d%3d%12.7f%12.7f\n", gid, T_global, LX, LY, LZ, g_kappa, g_mu); fprintf(ofs, "%3d%3d%3d%25.16e%25.16e%6d\n", 0, 0, 0, corrt[0].re, 0., gid); for(x0=1; x0<(T/2); x0++) { fprintf(ofs, "%3d%3d%3d%25.16e%25.16e%6d\n", 0, 0, x0, corrt[x0].re, corrt[T-x0].re, gid); } fprintf(ofs, "%3d%3d%3d%25.16e%25.16e%6d\n", 0, 0, (T/2), corrt[T/2].re, 0., gid); retime = (double)clock() / CLOCKS_PER_SEC; fprintf(stdout, "# time to fill correlator %e seconds\n", retime-ratime); #ifdef _UNDEF free(conn); /* free(conn2); */ /******************************** * test: print correl to stdout * ********************************/ /* fprintf(stdout, "\n\n# ***************** pijj *****************\n"); for(ix=0; ix<LX*LY*LZ; ix++) { iix = ix*T; for(x0=0; x0<T; x0++) { fprintf(stdout, "%6d%3d%25.16e%25.16e\n", ix, x0, pijj[iix+x0].re, pijj[iix+x0].im); } } fprintf(stdout, "\n\n# ***************** pi00 *****************\n"); for(ix=0; ix<LX*LY*LZ; ix++) { iix = ix*T; for(x0=0; x0<T; x0++) { fprintf(stdout, "%6d%3d%25.16e%25.16e\n", ix, x0, pi00[iix+x0].re, pi00[iix+x0].im); } } */ /***************************************** * do the reverse Fourier transformation * *****************************************/ ratime = (double)clock() / CLOCKS_PER_SEC; fftw(plan_m, LX*LY*LZ, pi00, 1, T, (fftw_complex*)NULL, 0, 0); fftw(plan_m, LX*LY*LZ, pijj, 1, T, (fftw_complex*)NULL, 0, 0); for(ix=0; ix<VOLUME; ix++) { pi00[ix].re /= (double)T; pi00[ix].im /= (double)T; pijj[ix].re /= 3.*(double)T; pijj[ix].im /= 3.*(double)T; } retime = (double)clock() / CLOCKS_PER_SEC; fprintf(stdout, "# time for Fourier transform %e seconds\n", retime-ratime); /***************************************** * write to file *****************************************/ ratime = (double)clock() / CLOCKS_PER_SEC; sprintf(filename, "pi00.%.4d", gid); if( (ofs=fopen(filename, "w")) == (FILE*)NULL ) { fprintf(stderr, "Error: could not open file %s for writing\n", filename); exit(5); } fprintf(stdout, "# writing pi00-data to file %s\n", filename); fprintf(ofs, "# %6d%3d%3d%3d%3d%12.7f%12.7f\n", gid, T_global, LX, LY, LZ, g_kappa, g_mu); for(x1=0; x1<LX; x1++) { for(x2=0; x2<LY; x2++) { for(x3=0; x3<LZ; x3++) { ix = g_ipt[0][x1][x2][x3]*T; /* fprintf(ofs, "# px=%3d, py=%3d, pz=%3d\n", x1, x2, x3); */ for(x0=0; x0<T; x0++) { /* fprintf(ofs, "%3d%25.16e%25.16e\n", x0, pi00[ix+x0].re, pi00[ix+x0].im); */ fprintf(ofs, "%3d%3d%3d%3d%25.16e%25.16e\n", x1, x2, x3, x0, pi00[ix+x0].re, pi00[ix+x0].im); } }}} fclose(ofs); sprintf(filename, "pijj.%.4d", gid); if( (ofs=fopen(filename, "w")) == (FILE*)NULL ) { fprintf(stderr, "Error: could not open file %s for writing\n", filename); exit(5); } fprintf(stdout, "# writing pijj-data to file %s\n", filename); fprintf(ofs, "# %6d%3d%3d%3d%3d%12.7f%12.7f\n", gid, T_global, LX, LY, LZ, g_kappa, g_mu); for(x1=0; x1<LX; x1++) { for(x2=0; x2<LY; x2++) { for(x3=0; x3<LZ; x3++) { ix = g_ipt[0][x1][x2][x3]*T; /* fprintf(ofs, "# px=%3d, py=%3d, pz=%3d\n", x1, x2, x3); */ for(x0=0; x0<T; x0++) { /* fprintf(ofs, "%3d%25.16e%25.16e\n", x0, pijj[ix+x0].re, pijj[ix+x0].im); */ fprintf(ofs, "%3d%3d%3d%3d%25.16e%25.16e\n", x1, x2, x3, x0, pijj[ix+x0].re, pijj[ix+x0].im); } }}} fclose(ofs); retime = (double)clock() / CLOCKS_PER_SEC; fprintf(stdout, "# time to write correlator %e seconds\n", retime-ratime); /* if(mode==0) { ratime = (double)clock() / CLOCKS_PER_SEC; if( (picount = (int*)malloc(VOLUME*sizeof(int))) == (int*)NULL) exit(110); sprintf(filename, "corr.00.mom"); if( (ofs=fopen(filename, "w")) == (FILE*)NULL ) { fprintf(stderr, "Error: could not open file %s for writing\n", filename); exit(5); } for(ix=0; ix<VOLUME; ix++) picount[ix] = 0; for(x1=0; x1<LX; x1++) { q[1] = 2. * sin(M_PI * (double)x1 / (double)LX); for(x2=0; x2<LY; x2++) { q[2] = 2. * sin(M_PI * (double)x2 / (double)LY); for(x3=0; x3<LZ; x3++) { q[3] = 2. * sin(M_PI * (double)x3 / (double)LZ); qsqr = q[1]*q[1] + q[2]*q[2] + q[3]*q[3]; if( qsqr>=g_qhatsqr_min-_Q2EPS && qsqr<= g_qhatsqr_max+_Q2EPS ) { ix = g_ipt[0][x1][x2][x3]; picount[ix] = 1; fprintf(ofs, "%3d%3d%3d%6d%25.16e\n", x1, x2, x3, ix, qsqr); } }}} fclose(ofs); sprintf(filename, "corr_00.00.%.4d", gid); if( (ofs=fopen(filename, "w")) == (FILE*)NULL ) { fprintf(stderr, "Error: could not open file %s for writing\n", filename); exit(5); } fprintf(stdout, "# writing corr_00-data to file %s\n", filename); fprintf(ofs, "# %6d%3d%3d%3d%3d%12.7f%12.7f\n", gid, T_global, LX, LY, LZ, g_kappa, g_mu); for(ix=0; ix<VOLUME; ix++) { if(picount[ix]>0) { for(x0=0; x0<T; x0++) { fprintf(ofs, "%3d%3d%25.16e%25.16e\n", ix, x0, pi00[ix*T+x0].re, pi00[ix*T+x0].im); } } } fclose(ofs); sprintf(filename, "corr_jj.00.%.4d", gid); if( (ofs=fopen(filename, "w")) == (FILE*)NULL ) { fprintf(stderr, "Error: could not open file %s for writing\n", filename); exit(5); } fprintf(stdout, "# writing corr_jj-data to file %s\n", filename); fprintf(ofs, "# %6d%3d%3d%3d%3d%12.7f%12.7f\n", gid, T_global, LX, LY, LZ, g_kappa, g_mu); for(ix=0; ix<VOLUME; ix++) { if(picount[ix]>0) { for(x0=0; x0<T; x0++) { fprintf(ofs, "%3d%3d%25.16e%25.16e\n", ix, x0, pijj[ix*T+x0].re, pijj[ix*T+x0].im); } } } fclose(ofs); retime = (double)clock() / CLOCKS_PER_SEC; fprintf(stdout, "# time for O_h averaging %e seconds\n", retime-ratime); free(picount); } else if(mode==1) { ratime = (double)clock() / CLOCKS_PER_SEC; if( (picount = (int*)malloc(VOLUME*sizeof(int))) == (int*)NULL) exit(110); sprintf(filename, "corr.01.mom"); if( (ofs=fopen(filename, "w")) == (FILE*)NULL ) { fprintf(stderr, "Error: could not open file %s for writing\n", filename); exit(5); } if( (picount = (int*)malloc(VOLUME*sizeof(int))) == (int*)NULL) exit(110); for(ix=0; ix<VOLUME; ix++) picount[ix] = 0; for(x1=0; x1<LX; x1++) { q[1] = 2. * M_PI * (double)x1 / (double)LX; for(x2=0; x2<LY; x2++) { q[2] = 2. * M_PI * (double)x2 / (double)LY; for(x3=0; x3<LZ; x3++) { q[3] = 2. * M_PI * (double)x3 / (double)LZ; qsqr = q[1]*q[1] + q[2]*q[2] + q[3]*q[3]; if( qsqr>=g_qhatsqr_min-_Q2EPS && qsqr<= g_qhatsqr_max+_Q2EPS ) { ix = g_ipt[0][x1][x2][x3]; picount[ix] = 1; fprintf(ofs, "%3d%3d%3d%6d%25.16e\n", x1, x2, x3, ix, qsqr); } }}} fclose(ofs); sprintf(filename, "corr_00.01.%.4d", gid); if( (ofs=fopen(filename, "w")) == (FILE*)NULL ) { fprintf(stderr, "Error: could not open file %s for writing\n", filename); exit(5); } fprintf(stdout, "# writing corr_01-data to file %s\n", filename); fprintf(ofs, "# %6d%3d%3d%3d%3d%12.7f%12.7f\n", gid, T_global, LX, LY, LZ, g_kappa, g_mu); for(ix=0; ix<VOLUME; ix++) { if(picount[ix]>0) { for(x0=0; x0<T; x0++) { fprintf(ofs, "%3d%3d%25.16e%25.16e\n", ix, x0, pi00[ix*T+x0].re, pi00[ix*T+x0].im); } } } fclose(ofs); sprintf(filename, "corr_jj.01.%.4d", gid); if( (ofs=fopen(filename, "w")) == (FILE*)NULL ) { fprintf(stderr, "Error: could not open file %s for writing\n", filename); exit(5); } fprintf(stdout, "# writing corr_jj-data to file %s\n", filename); fprintf(ofs, "# %6d%3d%3d%3d%3d%12.7f%12.7f\n", gid, T_global, LX, LY, LZ, g_kappa, g_mu); for(ix=0; ix<VOLUME; ix++) { if(picount[ix]>0) { for(x0=0; x0<T; x0++) { fprintf(ofs, "%3d%3d%25.16e%25.16e\n", ix, x0, pijj[ix*T+x0].re, pijj[ix*T+x0].im); } } } fclose(ofs); retime = (double)clock() / CLOCKS_PER_SEC; fprintf(stdout, "# time for writing: %e seconds\n", retime-ratime); free(picount); } else if(mode==2) { if(make_H3orbits(&oh_id, &oh_count, &oh_val, &oh_nc) != 0) return(123); ratime = (double)clock() / CLOCKS_PER_SEC; nclass = oh_nc / Thp1; if( (piavg = (fftw_complex*)malloc(oh_nc*sizeof(fftw_complex))) == (fftw_complex*)NULL) exit(110); if( (picount = (int*)malloc(oh_nc*sizeof(int))) == (int*)NULL) exit(110); for(ix=0; ix<oh_nc; ix++) { piavg[ix].re = 0.; piavg[ix].im = 0.; picount[ix] = 0; } for(ix=0; ix<LX*LY*LZ; ix++) { for(x0=0; x0<Thp1; x0++) { iix = ix*T+x0; iiy = oh_id[ix]*Thp1+x0; piavg[iiy].re += pi00[iix].re; piavg[iiy].im += pi00[iix].im; if(x0>0 && x0<T/2) { iix = ix*T+(T-x0); piavg[iiy].re += pi00[iix].re; piavg[iiy].im += pi00[iix].im; } } picount[oh_id[ix]]++; } for(ix=0; ix<nclass; ix++) { for(x0=0; x0<Thp1; x0++) { iix = ix*Thp1+x0; if(picount[ix]>0) { piavg[iix].re /= (double)picount[ix]; piavg[iix].im /= (double)picount[ix]; if(x0>0 && x0<T/2) { piavg[iix].re /= 2.; piavg[iix].im /= 2.; } } } } sprintf(filename, "corr02_00.%.4d", gid); if( (ofs=fopen(filename, "w")) == (FILE*)NULL ) { fprintf(stderr, "Error: could not open file %s for writing\n", filename); exit(5); } fprintf(stdout, "# writing corr-00-data to file %s\n", filename); fprintf(ofs, "# %6d%3d%3d%3d%3d%12.7f%12.7f\n", gid, T_global, LX, LY, LZ, g_kappa, g_mu); for(x1=0; x1<nclass; x1++) { if(oh_val[0][x1]>=g_qhatsqr_min-_Q2EPS && oh_val[0][x1]<=g_qhatsqr_max+_Q2EPS) { ix = x1*Thp1; for(x0=0; x0<Thp1; x0++) { fprintf(ofs, "%25.16e%3d%25.16e%25.16e%5d\n", oh_val[0][x1], x0, piavg[ix+x0].re, piavg[ix+x0].im, picount[x1]); } } } fclose(ofs); for(ix=0; ix<oh_nc; ix++) { piavg[ix].re = 0.; piavg[ix].im = 0.; picount[ix] = 0; } for(ix=0; ix<LX*LY*LZ; ix++) { for(x0=0; x0<Thp1; x0++) { iix = ix*T+x0; iiy = oh_id[ix]*Thp1+x0; piavg[iiy].re += pijj[iix].re; piavg[iiy].im += pijj[iix].im; if(x0>0 && x0<T/2) { iix = ix*T+(T-x0); piavg[iiy].re += pijj[iix].re; piavg[iiy].im += pijj[iix].im; } } picount[oh_id[ix]]++; } for(ix=0; ix<nclass; ix++) { for(x0=0; x0<Thp1; x0++) { iix = ix*Thp1+x0; if(picount[ix]>0) { piavg[iix].re /= (double)picount[ix]; piavg[iix].im /= (double)picount[ix]; if(x0>0 && x0<T/2) { piavg[iix].re /= 2.; piavg[iix].im /= 2.; } } }} sprintf(filename, "corr02_jj.%.4d", gid); if( (ofs=fopen(filename, "w")) == (FILE*)NULL ) { fprintf(stderr, "Error: could not open file %s for writing\n", filename); exit(5); } fprintf(stdout, "# writing corr-jj-data to file %s\n", filename); fprintf(ofs, "# %6d%3d%3d%3d%3d%12.7f%12.7f\n", gid, T_global, LX, LY, LZ, g_kappa, g_mu); for(x1=0; x1<nclass; x1++) { ix = x1*Thp1; for(x0=0; x0<Thp1; x0++) { fprintf(ofs, "%25.16e%3d%25.16e%25.16e%5d\n", oh_val[0][x1], x0, piavg[ix+x0].re, piavg[ix+x0].im, picount[x1]); } } fclose(ofs); sprintf(filename, "corr.02.mom"); if( (ofs=fopen(filename, "w")) == (FILE*)NULL ) { fprintf(stderr, "Error: could not open file %s for writing\n", filename); exit(5); } for(ix=0; ix<VOLUME; ix++) fprintf(ofs, "%5d%25.16e%5d", ix, oh_val[0][ix], picount[ix]); fclose(ofs); retime = (double)clock() / CLOCKS_PER_SEC; fprintf(stdout, "# time for O_h averaging %e seconds\n", retime-ratime); free(piavg); free(picount); } */ #endif } /*************************************** * free the allocated memory, finalize * ***************************************/ free(corrt); free_geometry(); /* free(pi00); free(pijj); */ fftw_destroy_plan(plan_m); return(0); }
FX_ENTRY GrContext_t FX_CALL grSstWinOpen( GrScreenResolution_t screen_resolution, GrScreenRefresh_t refresh_rate, GrColorFormat_t color_format, GrOriginLocation_t origin_location, int nColBuffers, int nAuxBuffers) { uint32_t screen_width, screen_height; struct retro_variable var = { "mupen64-screensize", 0 }; // ZIGGY // allocate static texture names // the initial value should be big enough to support the maximal resolution free_texture = 32*1024*1024; default_texture = free_texture++; color_texture = free_texture++; depth_texture = free_texture++; LOG("grSstWinOpen(%d, %d, %d, %d, %d %d)\r\n", screen_resolution&~0x80000000, refresh_rate, color_format, origin_location, nColBuffers, nAuxBuffers); width = 640; height = 480; bool ret = environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var); if (ret && var.value) { if (sscanf(var.value ? var.value : "640x480", "%dx%d", &width, &height) != 2) { width = 640; height = 480; } } glViewport(0, 0, width, height); lfb_color_fmt = color_format; if (origin_location != GR_ORIGIN_UPPER_LEFT) DISPLAY_WARNING("origin must be in upper left corner"); if (nColBuffers != 2) DISPLAY_WARNING("number of color buffer is not 2"); if (nAuxBuffers != 1) DISPLAY_WARNING("number of auxiliary buffer is not 1"); if (isExtensionSupported("GL_ARB_texture_env_combine") == 0 && isExtensionSupported("GL_EXT_texture_env_combine") == 0) DISPLAY_WARNING("Your video card doesn't support GL_ARB_texture_env_combine extension"); if (isExtensionSupported("GL_ARB_multitexture") == 0) DISPLAY_WARNING("Your video card doesn't support GL_ARB_multitexture extension"); if (isExtensionSupported("GL_ARB_texture_mirrored_repeat") == 0) DISPLAY_WARNING("Your video card doesn't support GL_ARB_texture_mirrored_repeat extension"); nbAuxBuffers = 4; //glGetIntegerv(GL_AUX_BUFFERS, &nbAuxBuffers); if (nbAuxBuffers > 0) printf("Congratulations, you have %d auxilliary buffers, we'll use them wisely !\n", nbAuxBuffers); blend_func_separate_support = 1; packed_pixels_support = 0; if (isExtensionSupported("GL_EXT_blend_func_separate") == 0) { DISPLAY_WARNING("GL_EXT_blend_func_separate not supported.\n"); blend_func_separate_support = 0; } else { printf("GL_EXT_blend_func_separate supported.\n"); blend_func_separate_support = 1; } // we can assume that non-GLES has GL_EXT_packed_pixels // support -it's included since OpenGL 1.2 #ifdef GLES if (isExtensionSupported("GL_EXT_packed_pixels") != 0) #endif packed_pixels_support = 1; if (isExtensionSupported("GL_ARB_texture_non_power_of_two") == 0) { DISPLAY_WARNING("GL_ARB_texture_non_power_of_two supported.\n"); npot_support = 0; } else { printf("GL_ARB_texture_non_power_of_two supported.\n"); npot_support = 1; } if (isExtensionSupported("GL_EXT_fog_coord") == 0) { DISPLAY_WARNING("GL_EXT_fog_coord not supported.\n"); fog_coord_support = 0; } else { printf("GL_EXT_fog_coord supported.\n"); fog_coord_support = 1; } if (isExtensionSupported("GL_ARB_shading_language_100") && isExtensionSupported("GL_ARB_shader_objects") && isExtensionSupported("GL_ARB_fragment_shader") && isExtensionSupported("GL_ARB_vertex_shader")) {} #ifdef GLES if (isExtensionSupported("GL_EXT_texture_format_BGRA8888")) { printf("GL_EXT_texture_format_BGRA8888 supported.\n"); bgra8888_support = 1; } else { DISPLAY_WARNING("GL_EXT_texture_format_BGRA8888 not supported.\n"); bgra8888_support = 0; } #endif glViewport(0, 0, width, height); viewport_width = width; viewport_height = height; // VP try to resolve z precision issues // glMatrixMode(GL_MODELVIEW); // glLoadIdentity(); // glTranslatef(0, 0, 1-zscale); // glScalef(1, 1, zscale); widtho = width/2; heighto = height/2; pBufferWidth = pBufferHeight = -1; current_buffer = GL_BACK; texture_unit = GL_TEXTURE0; { int i; for (i=0; i<NB_TEXBUFS; i++) texbufs[i].start = texbufs[i].end = 0xffffffff; } FindBestDepthBias(); init_geometry(); init_textures(); init_combiner(); return 1; }
int main(int argc, char **argv) { int c, i, j, mu, nu; int count = 0; int filename_set = 0; int dims[4] = {0,0,0,0}; int l_LX_at, l_LXstart_at; int x0, x1, x2, x3, ix, iix, it; int sid, status, gid; double **corr=NULL, **corr2=NULL; double *tcorr=NULL, *tcorr2=NULL; double *work = (double*)NULL; double q[4], fnorm; int verbose = 0; int do_gt = 0; int nsource=0; char filename[100], contype[200]; double ratime, retime; double plaq; double spinor1[24], spinor2[24], U_[18]; double *gauge_trafo=(double*)NULL; double mom2, mom4; complex w, w1, *cp1, *cp2, *cp3; FILE *ofs; #ifdef MPI // MPI_Init(&argc, &argv); fprintf(stderr, "[jc_ud_x] Error, only non-mpi version implemented\n"); exit(1); #endif while ((c = getopt(argc, argv, "h?f:")) != -1) { switch (c) { case 'f': strcpy(filename, optarg); filename_set=1; break; case 'h': case '?': default: usage(); break; } } /* set the default values */ if(filename_set==0) strcpy(filename, "cvc.input"); fprintf(stdout, "# Reading input from file %s\n", filename); read_input_parser(filename); /* some checks on the input data */ if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) { if(g_proc_id==0) fprintf(stdout, "T and L's must be set\n"); usage(); } if(g_kappa == 0.) { if(g_proc_id==0) fprintf(stdout, "kappa should be > 0.n"); usage(); } fprintf(stdout, "\n**************************************************\n"); fprintf(stdout, "* jc_ud_x\n"); fprintf(stdout, "**************************************************\n\n"); /********************************* * initialize MPI parameters *********************************/ // mpi_init(argc, argv); /* initialize */ dims[0]=T_global; dims[1]=LX; dims[2]=LY; dims[3]=LZ; T = T_global; Tstart = 0; l_LX_at = LX; l_LXstart_at = 0; FFTW_LOC_VOLUME = T*LX*LY*LZ; fprintf(stdout, "# [%2d] parameters:\n"\ "# [%2d] T = %3d\n"\ "# [%2d] Tstart = %3d\n"\ "# [%2d] l_LX_at = %3d\n"\ "# [%2d] l_LXstart_at = %3d\n"\ "# [%2d] FFTW_LOC_VOLUME = %3d\n", g_cart_id, g_cart_id, T, g_cart_id, Tstart, g_cart_id, l_LX_at, g_cart_id, l_LXstart_at, g_cart_id, FFTW_LOC_VOLUME); if(init_geometry() != 0) { fprintf(stderr, "ERROR from init_geometry\n"); exit(1); } geometry(); /************************************************* * allocate mem for gauge field and spinor fields *************************************************/ alloc_gauge_field(&g_gauge_field, VOLUMEPLUSRAND); no_fields = 2; g_spinor_field = (double**)calloc(no_fields, sizeof(double*)); for(i=0; i<no_fields; i++) alloc_spinor_field(&g_spinor_field[i], VOLUMEPLUSRAND); /**************************************** * allocate memory for the contractions ****************************************/ nsource = (g_sourceid2 - g_sourceid + 1) / g_sourceid_step; if(g_cart_id==0) fprintf(stdout, "# nsource = %d\n", nsource); corr = (double**)calloc( nsource, sizeof(double*)); corr[0] = (double*)calloc( nsource*T*8, sizeof(double)); for(i=1;i<nsource;i++) corr[i] = corr[i-1] + 8*T; corr2 = (double**)calloc( nsource, sizeof(double*)); corr2[0] = (double*)calloc( nsource*8*T, sizeof(double)); for(i=1;i<nsource;i++) corr2[i] = corr2[i-1] + 8*T; tcorr = (double*)calloc(T*8, sizeof(double)); tcorr2 = (double*)calloc(T*8, sizeof(double)); /*********************************************** * start loop on gauge id.s ***********************************************/ for(gid=g_gaugeid; gid<=g_gaugeid2; gid++) { sprintf(filename, "%s.%.4d", gaugefilename_prefix, gid); if(g_cart_id==0) fprintf(stdout, "# reading gauge field from file %s\n", filename); read_lime_gauge_field_doubleprec(filename); xchange_gauge(); plaquette(&plaq); if(g_cart_id==0) fprintf(stdout, "# measured plaquette value: %25.16e\n", plaq); /* reset disc to zero */ for(ix=0; ix<nsource*8*T; ix++) corr[0][ix] = 0.; for(ix=0; ix<nsource*8*T; ix++) corr2[0][ix] = 0.; count=0; /*********************************************** * start loop on source id.s ***********************************************/ for(sid=g_sourceid; sid<=g_sourceid2; sid+=g_sourceid_step) { /* read the new propagator to g_spinor_field[0] */ ratime = (double)clock() / CLOCKS_PER_SEC; if(format==0) { sprintf(filename, "%s.%.4d.%.2d.inverted", filename_prefix, gid, sid); if(read_lime_spinor(g_spinor_field[0], filename, 0) != 0) break; } else if(format==1) { sprintf(filename, "%s.%.4d.%.5d.inverted", filename_prefix, gid, sid); if(read_cmi(g_spinor_field[0], filename) != 0) { fprintf(stderr, "\nError from read_cmi\n"); break; } } xchange_field(g_spinor_field[0]); retime = (double)clock() / CLOCKS_PER_SEC; if(g_cart_id==0) fprintf(stdout, "# time to read prop.: %e seconds\n", retime-ratime); ratime = (double)clock() / CLOCKS_PER_SEC; /* apply [1] = D_tm [0] */ Q_phi_tbc(g_spinor_field[1], g_spinor_field[0]); xchange_field(g_spinor_field[1]); retime = (double)clock() / CLOCKS_PER_SEC; if(g_cart_id==0) fprintf(stdout, "# time to apply D_W: %e seconds\n", retime-ratime); ratime = (double)clock() / CLOCKS_PER_SEC; /* calculate real and imaginary part */ for(mu=0; mu<4; mu++) { for(x0=0; x0<T; x0++) { for(x1=0; x1<LX; x1++) { for(x2=0; x2<LY; x2++) { for(x3=0; x3<LZ; x3++) { ix = g_ipt[x0][x1][x2][x3]; _cm_eq_cm_ti_co(U_, g_gauge_field+_GGI(ix,mu), &(co_phase_up[mu])); _fv_eq_cm_ti_fv(spinor1, U_, &g_spinor_field[0][_GSI(g_iup[ix][mu])]); _fv_eq_gamma_ti_fv(spinor2, mu, spinor1); _fv_mi_eq_fv(spinor2, spinor1); _co_eq_fv_dag_ti_fv(&w, &g_spinor_field[1][_GSI(ix)], spinor2); corr[count][2*(mu*T+x0) ] -= 0.5*w.re; corr[count][2*(mu*T+x0)+1] -= 0.5*w.im; _fv_eq_cm_dag_ti_fv(spinor1, U_, &g_spinor_field[0][_GSI(ix)]); _fv_eq_gamma_ti_fv(spinor2, mu, spinor1); _fv_pl_eq_fv(spinor2, spinor1); _co_eq_fv_dag_ti_fv(&w, &g_spinor_field[1][_GSI(g_iup[ix][mu])], spinor2); corr[count][2*(mu*T+x0) ] -= 0.5*w.re; corr[count][2*(mu*T+x0)+1] -= 0.5*w.im; _fv_eq_gamma_ti_fv(spinor1, mu, &g_spinor_field[0][_GSI(ix)]); _co_eq_fv_dag_ti_fv(&w, &g_spinor_field[1][_GSI(ix)], spinor1); corr2[count][2*(mu*T+x0) ] -= w.re; corr2[count][2*(mu*T+x0)+1] -= w.im; }}} } } // of mu count++; } // of sid retime = (double)clock() / CLOCKS_PER_SEC; if(g_cart_id==0) fprintf(stdout, "# time to calculate contractions: %e seconds\n", retime-ratime); for(ix=0;ix<8*T;ix++) tcorr[ix] = 0.; for(ix=0;ix<8*T;ix++) tcorr2[ix] = 0.; for(i=0;i<nsource-1;i++) { for(j=i+1;j<nsource;j++) { for(mu=0;mu<4;mu++) { for(x0=0;x0<T;x0++) { // times at source for(x1=0;x1<T;x1++) { // times at sink it = (x1 - x0 + T) % T; // conserved current tcorr[2*(mu*T+it) ] += corr[i][2*(mu*T+x1)] * corr[j][2*(mu*T+x0) ] - corr[i][2*(mu*T+x1)+1] * corr[j][2*(mu*T+x0)+1]; tcorr[2*(mu*T+it)+1] += corr[i][2*(mu*T+x1)] * corr[j][2*(mu*T+x0)+1] + corr[i][2*(mu*T+x1)+1] * corr[j][2*(mu*T+x0) ]; tcorr[2*(mu*T+it) ] += corr[j][2*(mu*T+x1)] * corr[i][2*(mu*T+x0) ] - corr[j][2*(mu*T+x1)+1] * corr[i][2*(mu*T+x0)+1]; tcorr[2*(mu*T+it)+1] += corr[j][2*(mu*T+x1)] * corr[i][2*(mu*T+x0)+1] + corr[j][2*(mu*T+x1)+1] * corr[i][2*(mu*T+x0) ]; // local current tcorr2[2*(mu*T+it) ] += corr2[i][2*(mu*T+x1)] * corr2[j][2*(mu*T+x0) ] - corr2[i][2*(mu*T+x1)+1] * corr2[j][2*(mu*T+x0)+1]; tcorr2[2*(mu*T+it)+1] += corr2[i][2*(mu*T+x1)] * corr2[j][2*(mu*T+x0)+1] + corr2[i][2*(mu*T+x1)+1] * corr2[j][2*(mu*T+x0) ]; tcorr2[2*(mu*T+it) ] += corr2[j][2*(mu*T+x1)] * corr2[i][2*(mu*T+x0) ] - corr2[j][2*(mu*T+x1)+1] * corr2[i][2*(mu*T+x0)+1]; tcorr2[2*(mu*T+it)+1] += corr2[j][2*(mu*T+x1)] * corr2[i][2*(mu*T+x0)+1] + corr2[j][2*(mu*T+x1)+1] * corr2[i][2*(mu*T+x0) ]; }} } }} fnorm = 1. / ( g_prop_normsqr * g_prop_normsqr * (double)(LX*LY*LZ) * (double)(LX*LY*LZ) * nsource * (nsource-1)); if(g_cart_id==0) fprintf(stdout, "X-fnorm = %e\n", fnorm); for(ix=0;ix<8*T;ix++) tcorr[ix] *= fnorm; for(ix=0;ix<8*T;ix++) tcorr2[ix] *= fnorm; /************************************************ * save results ************************************************/ if(g_cart_id == 0) fprintf(stdout, "# save results for gauge id %d and sid %d\n", gid, sid); /* save the result in position space */ sprintf(filename, "jc_u_tp0.%.4d.%.4d", gid, sid); ofs = fopen(filename, "w"); for(x0=0;x0<T;x0++) fprintf(ofs, "%d%25.16e%25.16e%25.16e%25.16e%25.16e%25.16e%25.16e%25.16e\n", x0, tcorr[2*(0*T+x0)], tcorr[2*(0*T+x0)+1], tcorr[2*(1*T+x0)], tcorr[2*(1*T+x0)+1], tcorr[2*(2*T+x0)], tcorr[2*(2*T+x0)+1], tcorr[2*(3*T+x0)], tcorr[2*(3*T+x0)+1]); fclose(ofs); } /* of loop on gid */ /*********************************************** * free the allocated memory, finalize ***********************************************/ free(g_gauge_field); for(i=0; i<no_fields; i++) free(g_spinor_field[i]); free(g_spinor_field); free_geometry(); free(corr); free(corr2); free(tcorr); free(tcorr2); return(0); }
int main(int argc, char **argv) { int c, mu, nu, status; int i, j, ncon=-1, ir, is, ic, id; int filename_set = 0; int x0, x1, x2, x3, ix, iix; int y0, y1, y2, y3, iy, iiy; int start_valuet=0, start_valuex=0, start_valuey=0; int num_threads=1, threadid, nthreads; int seed, seed_set=0; double diff1, diff2; /* double *chi=NULL, *psi=NULL; */ double plaq=0., pl_ts, pl_xs, pl_global; double *gauge_field_smeared = NULL; double s[18], t[18], u[18], pl_loc; double spinor1[24], spinor2[24]; double *pl_gather=NULL; double dtmp; complex prod, w, w2; int verbose = 0; char filename[200]; char file1[200]; char file2[200]; FILE *ofs=NULL; double norm, norm2; fermion_propagator_type *prop=NULL, prop2=NULL, seq_prop=NULL, seq_prop2=NULL, prop_aux=NULL, prop_aux2=NULL; int idx, eoflag, shift; float *buffer = NULL; unsigned int VOL3; size_t items, bytes; #ifdef MPI MPI_Init(&argc, &argv); #endif while ((c = getopt(argc, argv, "h?vf:N:c:C:t:s:")) != -1) { switch (c) { case 'v': verbose = 1; break; case 'f': strcpy(filename, optarg); filename_set=1; break; case 'N': ncon = atoi(optarg); break; case 'c': strcpy(file1, optarg); break; case 'C': strcpy(file2, optarg); break; case 't': num_threads = atoi(optarg); break; case 's': seed = atoi(optarg); fprintf(stdout, "# [] use seed value %d\n", seed); seed_set = 1; break; case 'h': case '?': default: usage(); break; } } /* set the default values */ if(filename_set==0) strcpy(filename, "cvc.input"); if(g_cart_id==0) fprintf(stdout, "# Reading input from file %s\n", filename); read_input_parser(filename); /* some checks on the input data */ if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) { if(g_proc_id==0) fprintf(stdout, "T and L's must be set\n"); usage(); } if(g_kappa == 0.) { if(g_proc_id==0) fprintf(stdout, "kappa should be > 0.n"); usage(); } /* initialize MPI parameters */ mpi_init(argc, argv); /* initialize T etc. */ fprintf(stdout, "# [%2d] parameters:\n"\ "# [%2d] T_global = %3d\n"\ "# [%2d] T = %3d\n"\ "# [%2d] Tstart = %3d\n"\ "# [%2d] LX_global = %3d\n"\ "# [%2d] LX = %3d\n"\ "# [%2d] LXstart = %3d\n"\ "# [%2d] LY_global = %3d\n"\ "# [%2d] LY = %3d\n"\ "# [%2d] LYstart = %3d\n",\ g_cart_id, g_cart_id, T_global, g_cart_id, T, g_cart_id, Tstart, g_cart_id, LX_global, g_cart_id, LX, g_cart_id, LXstart, g_cart_id, LY_global, g_cart_id, LY, g_cart_id, LYstart); if(init_geometry() != 0) { fprintf(stderr, "ERROR from init_geometry\n"); exit(101); } geometry(); if(init_geometry_5d() != 0) { fprintf(stderr, "ERROR from init_geometry_5d\n"); exit(102); } geometry_5d(); VOL3 = LX*LY*LZ; /* read the gauge field */ alloc_gauge_field(&g_gauge_field, VOLUMEPLUSRAND); sprintf(filename, "%s.%.4d", gaugefilename_prefix, Nconf); if(g_cart_id==0) fprintf(stdout, "# reading gauge field from file %s\n", filename); if(strcmp(gaugefilename_prefix, "identity")==0) { status = unit_gauge_field(g_gauge_field, VOLUME); } else { // status = read_nersc_gauge_field_3x3(g_gauge_field, filename, &plaq); // status = read_ildg_nersc_gauge_field(g_gauge_field, filename); status = read_lime_gauge_field_doubleprec(filename); // status = read_nersc_gauge_field(g_gauge_field, filename, &plaq); // status = 0; } if(status != 0) { fprintf(stderr, "[apply_Dtm] Error, could not read gauge field\n"); exit(11); } xchange_gauge(); // measure the plaquette if(g_cart_id==0) fprintf(stdout, "# read plaquette value 1st field: %25.16e\n", plaq); plaquette(&plaq); if(g_cart_id==0) fprintf(stdout, "# measured plaquette value 1st field: %25.16e\n", plaq); g_kappa5d = 0.5 / (5. + g_m0); fprintf(stdout, "# [] g_kappa5d = %e\n", g_kappa5d); no_fields=4; g_spinor_field = (double**)calloc(no_fields, sizeof(double*)); for(i=0; i<no_fields; i++) alloc_spinor_field(&g_spinor_field[i], L5*VOLUMEPLUSRAND); /* items = VOL3 * 288; bytes = items * sizeof(float); if( (buffer = (float*)malloc( bytes ) ) == NULL ) { fprintf(stderr, "[] Error, could not allocate buffer\n"); exit(20); } */ /**************************************** * read read the spinor fields ****************************************/ /* prop = create_fp_field(VOL3); create_fp(&prop2); create_fp(&prop_aux); create_fp(&prop_aux2); create_fp(&seq_prop); create_fp(&seq_prop2); */ #ifdef MPI if(!seed_set) { seed = g_seed; } srand(seed+g_cart_id); for(ix=0;ix<VOLUME*L5;ix++) { for(i=0;i<24;i++) { spinor1[i] = 2* (double)rand() / (double)RAND_MAX - 1.; } _fv_eq_fv(g_spinor_field[0]+_GSI(ix), spinor1 ); } for(i=0;i<g_nproc;i++) { if(g_cart_id==i) { if(i==0) ofs = fopen("source", "w"); else ofs = fopen("source", "a"); for(is=0;is<L5;is++) { for(x0=0;x0<T; x0++) { for(x1=0;x1<LX; x1++) { for(x2=0;x2<LX; x2++) { for(x3=0;x3<LX; x3++) { iix = is*VOLUME*g_nproc + (((x0+g_proc_coords[0]*T)*LX*g_nproc_x+ x1+g_proc_coords[1]*LX )*LY*g_nproc_y + x2+g_proc_coords[2]*LY )*LZ*g_nproc_z + x3+g_proc_coords[3]*LZ; ix = g_ipt_5d[is][x0][x1][x2][x3]; for(c=0;c<24;c++) { fprintf(ofs, "%8d%8d%3d%25.16e\n", iix, ix, c, g_spinor_field[0][_GSI(ix)+c]); } }}}} } fclose(ofs); } #ifdef MPI MPI_Barrier(g_cart_grid); #endif } #else ofs = fopen("source", "r"); for(ix=0;ix<24*VOLUME*L5;ix++) { fscanf(ofs, "%d%d%d%lf", &x1,&x2,&x3, &dtmp); g_spinor_field[0][_GSI(x1)+x3] = dtmp; } fclose(ofs); #endif xchange_field_5d(g_spinor_field[0]); Q_DW_Wilson_dag_phi(g_spinor_field[1], g_spinor_field[0]); xchange_field_5d(g_spinor_field[1]); Q_DW_Wilson_phi(g_spinor_field[2], g_spinor_field[1]); sprintf(filename, "prop_%.2d.%.2d", g_nproc, g_cart_id); ofs = fopen(filename, "w"); printf_spinor_field_5d(g_spinor_field[2], ofs); fclose(ofs); // for(ix=0;ix<VOLUME*L5;ix++) { // for(i=0;i<24;i++) { // spinor1[i] = 2* (double)rand() / (double)RAND_MAX - 1.; // } // _fv_eq_fv(g_spinor_field[1]+_GSI(ix), spinor1 ); // } /* xchange_field_5d(g_spinor_field[0]); sprintf(filename, "spinor.%.2d", g_cart_id); ofs = fopen(filename, "w"); printf_spinor_field_5d(g_spinor_field[0], ofs); fclose(ofs); */ /* // 2 = D 0 Q_DW_Wilson_phi(g_spinor_field[2], g_spinor_field[0]); // 3 = D^dagger 1 Q_DW_Wilson_dag_phi(g_spinor_field[3], g_spinor_field[1]); // <1, 2> = <1, D 0 > spinor_scalar_product_co(&w, g_spinor_field[1], g_spinor_field[2], VOLUME*L5); // <3, 0> = < D^dagger 1, 0 > spinor_scalar_product_co(&w2, g_spinor_field[3], g_spinor_field[0], VOLUME*L5); fprintf(stdout, "# [] w = %e + %e*1.i\n", w.re, w.im); fprintf(stdout, "# [] w2 = %e + %e*1.i\n", w2.re, w2.im); fprintf(stdout, "# [] abs difference = %e \n", sqrt(_SQR(w2.re-w.re)+_SQR(w2.im-w.im)) ); */ /* for(i=0;i<12;i++) { fprintf(stdout, "s1[%2d] <- %25.16e + %25.16e*1.i\n", i+1, spinor1[2*i], spinor1[2*i+1]); } for(i=0;i<24;i++) { spinor2[i] = 2* (double)rand() / (double)RAND_MAX - 1.; } for(i=0;i<12;i++) { fprintf(stdout, "s2[%2d] <- %25.16e + %25.16e*1.i\n", i+1, spinor2[2*i], spinor2[2*i+1]); } _fv_mi_eq_PRe_fv(spinor2, spinor1); for(i=0;i<12;i++) { fprintf(stdout, "s3[%2d] <- %25.16e + %25.16e*1.i\n", i+1, spinor2[2*i], spinor2[2*i+1]); } */ /* ofs = fopen("dw_spinor", "w"); Q_DW_Wilson_phi(g_spinor_field[1], g_spinor_field[0]); printf_spinor_field(g_spinor_field[1], ofs); fclose(ofs); g_kappa = g_kappa5d; ofs = fopen("wilson_spinor", "w"); Q_Wilson_phi(g_spinor_field[2], g_spinor_field[0]); printf_spinor_field(g_spinor_field[2], ofs); fclose(ofs); */ #ifdef _UNDEF /******************************************************************* * propagators *******************************************************************/ // for(i=0; i<12;i++) for(i=0; i<1;i++) { //sprintf(file1, "source.%.4d.t00x00y00z00.%.2d.inverted", Nconf, i); sprintf(file1, "/home/mpetschlies/quda-0.3.2/tests/prop"); if(g_cart_id==0) fprintf(stdout, "# Reading prop. from file %s\n", file1); fflush(stdout); //if( read_lime_spinor(g_spinor_field[0], file1, 0) != 0 ) { ofs = fopen(file1, "rb"); if( fread(g_spinor_field[0], sizeof(double), 24*L5*VOLUME, ofs) != 24*L5*VOLUME) { fprintf(stderr, "Error, could not read proper amount of data from file %s\n", file1); exit(100); } fclose(ofs); for(ix=0;ix<VOLUME*L5;ix++) { _fv_ti_eq_re(g_spinor_field[0]+_GSI(ix), 2.*g_kappa5d); } /* if( (ofs = fopen("prop_full", "w")) == NULL ) exit(22); for(ix=0;ix<L5;ix++) { fprintf(ofs, "# [] s = %d\n", ix); printf_spinor_field(g_spinor_field[0]+_GSI(ix*VOLUME), ofs); } fclose(ofs); */ // reorder, multiply with g2 for(is=0,iix=0; is<L5; is++) { for(ix=0; ix<VOLUME; ix++) { iiy = lexic2eot_5d (is, ix); _fv_eq_fv(spinor1, g_spinor_field[0]+_GSI(iiy)); _fv_eq_gamma_ti_fv(g_spinor_field[1]+_GSI(iix), 2, spinor1 ); iix++; }} Q_DW_Wilson_phi(g_spinor_field[2], g_spinor_field[1]); // Q_DW_Wilson_dag_phi(g_spinor_field[2], g_spinor_field[1]); fprintf(stdout, "# [] finished application of Dirac operator\n"); fflush(stdout); // reorder, multiply with g2 for(is=0, iix=0;is<L5;is++) { for(ix=0; ix<VOLUME; ix++) { iiy = lexic2eot_5d(is, ix); _fv_eq_fv(spinor1, g_spinor_field[2]+_GSI(iix)); _fv_eq_gamma_ti_fv(g_spinor_field[1]+_GSI(iiy), 2, spinor1 ); iix++; }} if( (ofs = fopen("my_out", "w")) == NULL ) exit(23); for(ix=0;ix<L5;ix++) { fprintf(ofs, "# [] s = %d\n", ix); printf_spinor_field(g_spinor_field[1]+_GSI(ix*VOLUME), ofs); } fclose(ofs); sprintf(file1, "/home/mpetschlies/quda-0.3.2/tests/source"); if(g_cart_id==0) fprintf(stdout, "# Reading prop. from file %s\n", file1); fflush(stdout); //if( read_lime_spinor(g_spinor_field[0], file1, 0) != 0 ) { ofs = fopen(file1, "rb"); if( fread(g_spinor_field[2], sizeof(double), 24*L5*VOLUME, ofs) != 24*L5*VOLUME) { fprintf(stderr, "Error, could not read proper amount of data from file %s\n", file1); exit(100); } fclose(ofs); /* if( (ofs = fopen("v_out", "w")) == NULL ) exit(23); for(ix=0;ix<L5;ix++) { fprintf(ofs, "# [] s = %d\n", ix); printf_spinor_field(g_spinor_field[2]+_GSI(ix*VOLUME), ofs); } fclose(ofs); */ spinor_scalar_product_re(&norm2, g_spinor_field[2], g_spinor_field[2], VOLUME*L5); for(ix=0;ix<VOLUME*L5;ix++) { _fv_mi_eq_fv(g_spinor_field[1]+_GSI(ix), g_spinor_field[2]+_GSI(ix)); } spinor_scalar_product_re(&norm, g_spinor_field[1], g_spinor_field[1], VOLUME*L5); fprintf(stdout, "\n# [] absolut residuum squared: %e; relative residuum %e\n", norm, sqrt(norm/norm2) ); } // of loop on spin color indices #endif /*********************************************** * free the allocated memory, finalize ***********************************************/ free(g_gauge_field); free_geometry(); if(gauge_field_smeared != NULL) free(gauge_field_smeared); if(g_spinor_field != NULL) { for(i=0; i<no_fields; i++) free(g_spinor_field[i]); free(g_spinor_field); } free(buffer); free_fp_field(&prop); free_fp(&prop2); free_fp(&prop_aux); free_fp(&prop_aux2); free_fp(&seq_prop); free_fp(&seq_prop2); g_the_time = time(NULL); fprintf(stdout, "# [] %s# [] end fo run\n", ctime(&g_the_time)); fflush(stdout); fprintf(stderr, "# [] %s# [] end fo run\n", ctime(&g_the_time)); fflush(stderr); #ifdef MPI MPI_Finalize(); #endif return(0); }
int main(int argc, char *argv[]) { Parameters *parameters; // user defined parameters Geometry *geometry; // homogenous cube geometry Material *material; // problem material Bank *source_bank; // array for particle source sites Tally *tally; // scalar flux tally double *keff; // effective multiplication factor double t1, t2; // timers #ifdef _OPENMP unsigned long counter = 0; //counter to decide the start pos of master bank copy from sub banks Bank *g_fission_bank; //global fission bank #endif // Get inputs: set parameters to default values, parse parameter file, // override with any command line inputs, and print parameters parameters = init_parameters(); parse_parameters(parameters); read_CLI(argc, argv, parameters); print_parameters(parameters); // Set initial RNG seed set_initial_seed(parameters->seed); set_stream(STREAM_INIT); // Create files for writing results to init_output(parameters); // Set up geometry geometry = init_geometry(parameters); // Set up material material = init_material(parameters); // Set up tallies tally = init_tally(parameters); // Create source bank and initial source distribution source_bank = init_source_bank(parameters, geometry); // Create fission bank #ifdef _OPENMP omp_set_num_threads(parameters->n_threads); // Set number of openmp threads printf("threads num: %d\n", parameters->n_threads); // Allocate one master fission bank g_fission_bank = init_bank(2*parameters->n_particles); #endif // Set up array for k effective keff = calloc(parameters->n_active, sizeof(double)); center_print("SIMULATION", 79); border_print(); printf("%-15s %-15s %-15s\n", "BATCH", "KEFF", "MEAN KEFF"); #ifdef _OPENMP // Start time t1 = omp_get_wtime(); run_eigenvalue(counter, g_fission_bank, parameters, geometry, material, source_bank, fission_bank, tally, keff); // Stop time t2 = omp_get_wtime(); #endif printf("Simulation time: %f secs\n", t2-t1); // Free memory #ifdef _OPENMP free_bank(g_fission_bank); #endif free(keff); free_tally(tally); free_bank(source_bank); free_material(material); free(geometry); free(parameters); return 0; }
int main(int argc, char **argv) { int c, i, mu; int count = 0; int filename_set = 0; int l_LX_at, l_LXstart_at; int x0, x1, ix, idx; int VOL3; int sid; double *disc = (double*)NULL; int verbose = 0; char filename[100]; double ratime, retime; double plaq; double spinor1[24], spinor2[24]; double _2kappamu; double *gauge_field_f=NULL, *gauge_field_timeslice=NULL; double v4norm = 0., vvnorm = 0.; complex w; FILE *ofs1, *ofs2; /* double sign_adj5[] = {-1., -1., -1., -1., +1., +1., +1., +1., +1., +1., -1., -1., -1., 1., -1., -1.}; */ double hopexp_coeff[8], addreal, addimag; int gindex[] = { 5 , 1 , 2 , 3 , 6 ,10 ,11 ,12 , 4 , 7 , 8 , 9 , 0 ,15 , 14 ,13 }; int isimag[] = { 0 , 0 , 0 , 0 , 1 , 1 , 1 , 1 , 0 , 1 , 1 , 1 , 0 , 1 , 1 , 1 }; double gsign[] = {-1., 1., 1., 1., -1., 1., 1., 1., 1., 1., 1., 1., 1., 1., -1., 1.}; #ifdef MPI MPI_Status status; #endif #ifdef MPI MPI_Init(&argc, &argv); #endif while ((c = getopt(argc, argv, "h?vgf:")) != -1) { switch (c) { case 'v': verbose = 1; break; case 'f': strcpy(filename, optarg); filename_set=1; break; case 'h': case '?': default: usage(); break; } } /* set the default values */ if(filename_set==0) strcpy(filename, "cvc.input"); fprintf(stdout, "# reading input from file %s\n", filename); read_input_parser(filename); /* some checks on the input data */ if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) { if(g_proc_id==0) fprintf(stdout, "T and L's must be set\n"); usage(); } if(g_kappa == 0.) { if(g_proc_id==0) fprintf(stdout, "kappa should be > 0.n"); usage(); } /* initialize MPI parameters */ mpi_init(argc, argv); #ifdef MPI T = T_global / g_nproc; Tstart = g_cart_id * T; l_LX_at = LX; l_LXstart_at = 0; FFTW_LOC_VOLUME = T*LX*LY*LZ; VOL3 = LX*LY*LZ; #else T = T_global; Tstart = 0; l_LX_at = LX; l_LXstart_at = 0; FFTW_LOC_VOLUME = T*LX*LY*LZ; VOL3 = LX*LY*LZ; #endif fprintf(stdout, "# [%2d] parameters:\n"\ "# [%2d] T = %3d\n"\ "# [%2d] Tstart = %3d\n"\ "# [%2d] l_LX_at = %3d\n"\ "# [%2d] l_LXstart_at = %3d\n"\ "# [%2d] FFTW_LOC_VOLUME = %3d\n", g_cart_id, g_cart_id, T, g_cart_id, Tstart, g_cart_id, l_LX_at, g_cart_id, l_LXstart_at, g_cart_id, FFTW_LOC_VOLUME); if(init_geometry() != 0) { fprintf(stderr, "ERROR from init_geometry\n"); #ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); #endif exit(1); } geometry(); /* read the gauge field */ alloc_gauge_field(&g_gauge_field, VOLUMEPLUSRAND); sprintf(filename, "%s.%.4d", gaugefilename_prefix, Nconf); if(g_cart_id==0) fprintf(stdout, "reading gauge field from file %s\n", filename); read_lime_gauge_field_doubleprec(filename); xchange_gauge(); /* measure the plaquette */ plaquette(&plaq); if(g_cart_id==0) fprintf(stdout, "# measured plaquette value: %25.16e\n", plaq); if(Nlong > -1) { /* N_ape = 5; */ alpha_ape = 0.4; if(g_cart_id==0) fprintf(stdout, "# apply fuzzing of gauge field and propagators with parameters:\n"\ "# Nlong = %d\n# N_ape = %d\n# alpha_ape = %f\n", Nlong, N_ape, alpha_ape); alloc_gauge_field(&gauge_field_f, VOLUMEPLUSRAND); if( (gauge_field_timeslice = (double*)malloc(72*VOL3*sizeof(double))) == (double*)NULL ) { fprintf(stderr, "Error, could not allocate mem for gauge_field_timeslice\n"); #ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); #endif exit(2); } for(x0=0; x0<T; x0++) { memcpy((void*)gauge_field_timeslice, (void*)(g_gauge_field+_GGI(g_ipt[x0][0][0][0],0)), 72*VOL3*sizeof(double)); for(i=0; i<N_ape; i++) { APE_Smearing_Step_Timeslice(gauge_field_timeslice, alpha_ape); } fuzzed_links_Timeslice(gauge_field_f, gauge_field_timeslice, Nlong, x0); } free(gauge_field_timeslice); } /* test: print the fuzzed APE smeared gauge field to stdout */ /* for(ix=0; ix<36*VOLUME; ix++) { fprintf(stdout, "%6d%25.16e%25.16e%25.16e%25.16e\n", ix, gauge_field_f[2*ix], gauge_field_f[2*ix+1], g_gauge_field[2*ix], g_gauge_field[2*ix+1]); } */ /* allocate memory for the spinor fields */ no_fields = 4; g_spinor_field = (double**)calloc(no_fields, sizeof(double*)); for(i=0; i<no_fields; i++) alloc_spinor_field(&g_spinor_field[i], VOLUMEPLUSRAND); /* allocate memory for the contractions */ disc = (double*)calloc(4*16*T*2, sizeof(double)); if( disc==(double*)NULL ) { fprintf(stderr, "could not allocate memory for disc\n"); #ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); #endif exit(3); } for(ix=0; ix<4*32*T; ix++) disc[ix] = 0.; if(g_cart_id==0) { sprintf(filename, "cvc_2pt_disc_vv.%.4d", Nconf); ofs1 = fopen(filename, "w"); sprintf(filename, "cvc_2pt_disc_v4.%.4d", Nconf); ofs2 = fopen(filename, "w"); if(ofs1==(FILE*)NULL || ofs2==(FILE*)NULL) { #ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); #endif exit(5); } } /* add the HPE coefficients */ if(format==1) { addimag = 2*g_kappa*g_mu/sqrt(1 + 4*g_kappa*g_kappa*g_mu*g_mu)* LX*LY*LZ*3*4*2.*g_kappa*g_kappa*4; addreal = 1./sqrt(1 + 4*g_kappa*g_kappa*g_mu*g_mu)*LX*LY*LZ*3*4*2.*g_kappa*g_kappa*4; v4norm = 1. / ( 8. * g_kappa * g_kappa ); vvnorm = g_mu / ( 4. * g_kappa ); } else { addimag = 2*g_kappa*g_mu/sqrt(1 + 4*g_kappa*g_kappa*g_mu*g_mu)* LX*LY*LZ*3*4*2.*g_kappa*2; addreal = 1./sqrt(1 + 4*g_kappa*g_kappa*g_mu*g_mu)*LX*LY*LZ*3*4*2.*g_kappa*2; v4norm = 1. / ( 4. * g_kappa ); vvnorm = g_mu / ( 4. * g_kappa ); } /* calculate additional contributions for 1 and gamma_5 */ _2kappamu = 2.*g_kappa*g_mu; hopexp_coeff[0] = 24. * g_kappa * LX*LY*LZ / (1. + _2kappamu*_2kappamu); hopexp_coeff[1] = 0.; hopexp_coeff[2] = -768. * g_kappa*g_kappa*g_kappa * LX*LY*LZ * _2kappamu*_2kappamu / ( (1.+_2kappamu*_2kappamu)*(1.+_2kappamu*_2kappamu)*(1.+_2kappamu*_2kappamu) ); hopexp_coeff[3] = 0.; hopexp_coeff[4] = 0.; hopexp_coeff[5] = -24.*g_kappa * LX*LY*LZ * _2kappamu / (1. + _2kappamu*_2kappamu); hopexp_coeff[6] = 0.; hopexp_coeff[7] = -384. * g_kappa*g_kappa*g_kappa * LX*LY*LZ * (1.-_2kappamu*_2kappamu)*_2kappamu / ( (1.+_2kappamu*_2kappamu)*(1.+_2kappamu*_2kappamu)*(1.+_2kappamu*_2kappamu) ); /* start loop on source id.s */ for(sid=g_sourceid; sid<=g_sourceid2; sid+=g_sourceid_step) { for(ix=0; ix<4*32*T; ix++) disc[ix] = 0.; /* read the new propagator */ sprintf(filename, "%s.%.4d.%.5d.inverted", filename_prefix, Nconf, sid); /* sprintf(filename, "%s.%.4d.%.2d.inverted", filename_prefix, Nconf, sid); */ if(read_lime_spinor(g_spinor_field[1], filename, 0) != 0) { fprintf(stderr, "[%2d] Error, could not read from file %s\n", g_cart_id, filename); #ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); #endif exit(4); } count++; xchange_field(g_spinor_field[1]); /* calculate the source: apply Q_phi_tbc */ #ifdef MPI ratime = MPI_Wtime(); #else ratime = (double)clock() / CLOCKS_PER_SEC; #endif Q_phi_tbc(g_spinor_field[0], g_spinor_field[1]); xchange_field(g_spinor_field[0]); #ifdef MPI retime = MPI_Wtime(); #else retime = (double)clock() / CLOCKS_PER_SEC; #endif if(g_cart_id==0) fprintf(stdout, "# time to apply Q_tm %e seconds\n", retime-ratime); /* apply gamma5_BdagH4_gamma5 */ gamma5_BdagH4_gamma5(g_spinor_field[2], g_spinor_field[0], g_spinor_field[3]); /* attention: additional factor 2kappa because of CMI format */ /* if(format==1) { for(ix=0; ix<VOLUME; ix++) { _fv_ti_eq_re(&g_spinor_field[2][_GSI(ix)], 2.*g_kappa); } } */ if(Nlong>-1) { if(g_cart_id==0) fprintf(stdout, "# fuzzing propagator with Nlong = %d\n", Nlong); memcpy((void*)g_spinor_field[3], (void*)g_spinor_field[1], 24*VOLUMEPLUSRAND*sizeof(double)); Fuzz_prop(gauge_field_f, g_spinor_field[3], Nlong); } /* add new contractions to disc */ #ifdef MPI ratime = MPI_Wtime(); #else ratime = (double)clock() / CLOCKS_PER_SEC; #endif for(x0=0; x0<T; x0++) { /* loop on time */ for(x1=0; x1<VOL3; x1++) { /* loop on sites in timeslice */ ix = x0*VOL3 + x1; for(mu=0; mu<16; mu++) { /* loop on index of gamma matrix */ _fv_eq_gamma_ti_fv(spinor1, mu, &g_spinor_field[1][_GSI(ix)]); _co_eq_fv_dag_ti_fv(&w, &g_spinor_field[2][_GSI(ix)], spinor1); disc[2*( x0*16+mu) ] += w.re; disc[2*( x0*16+mu)+1] += w.im; _fv_eq_gamma_ti_fv(spinor1, 5, &g_spinor_field[1][_GSI(ix)]); _fv_eq_gamma_ti_fv(spinor2, mu, spinor1); _co_eq_fv_dag_ti_fv(&w, &g_spinor_field[1][_GSI(ix)], spinor2); disc[2*(16*T + x0*16+mu) ] += w.re; disc[2*(16*T + x0*16+mu)+1] += w.im; if(Nlong>-1) { _fv_eq_gamma_ti_fv(spinor1, mu, &g_spinor_field[3][_GSI(ix)]); _co_eq_fv_dag_ti_fv(&w, &g_spinor_field[2][_GSI(ix)], spinor1); disc[2*(32*T + x0*16+mu) ] += w.re; disc[2*(32*T + x0*16+mu)+1] += w.im; _fv_eq_gamma_ti_fv(spinor1, 5, &g_spinor_field[3][_GSI(ix)]); _fv_eq_gamma_ti_fv(spinor2, mu, spinor1); _co_eq_fv_dag_ti_fv(&w, &g_spinor_field[1][_GSI(ix)], spinor2); disc[2*(48*T + x0*16+mu) ] += w.re; disc[2*(48*T + x0*16+mu)+1] += w.im; } } } } if(g_cart_id==0) fprintf(stdout, "# addimag = %25.16e\n", addimag); if(g_cart_id==0) fprintf(stdout, "# addreal = %25.16e\n", addreal); for(x0=0; x0<T; x0++) { disc[2*( x0*16+4) ] += addreal; disc[2*( x0*16+5)+1] -= addimag; /* if(Nlong>-1) { disc[2*(32*T + x0*16+4) ] += addreal; disc[2*(32*T + x0*16+5)+1] -= addimag; } */ } #ifdef MPI retime = MPI_Wtime(); #else retime = (double)clock() / CLOCKS_PER_SEC; #endif if(g_cart_id==0) fprintf(stdout, "# contractions in %e seconds\n", retime-ratime); /* write current disc to file */ if(g_cart_id==0) { if(sid==g_sourceid) fprintf(ofs1, "#%6d%3d%3d%3d%3d\t%f\t%f\n", Nconf, T, LX, LY, LZ, g_kappa, g_mu); if(sid==g_sourceid) fprintf(ofs2, "#%6d%3d%3d%3d%3d\t%f\t%f\n", Nconf, T, LX, LY, LZ, g_kappa, g_mu); for(x0=0; x0<T; x0++) { for(mu=0; mu<16; mu++) { idx = gindex[mu]; ix = 16*x0 + idx; if(isimag[mu]==0) { fprintf(ofs2, "%6d%3d%4d%4d%25.16e%25.16e%25.16e%25.16e\n", Nconf, mu, x0, sid, gsign[mu]*disc[2* ix ]*v4norm, gsign[mu]*disc[2* ix +1]*v4norm, gsign[mu]*disc[2*(32*T+ix)]*v4norm, gsign[mu]*disc[2*(32*T+ix)+1]*v4norm); } else { fprintf(ofs2, "%6d%3d%4d%4d%25.16e%25.16e%25.16e%25.16e\n", Nconf, mu, x0, sid, gsign[mu]*disc[2*( ix)+1]*v4norm, -gsign[mu]*disc[2* ix ]*v4norm, gsign[mu]*disc[2*(32*T+ix)+1]*v4norm, -gsign[mu]*disc[2*(32*T+ix)]*v4norm); } } } for(x0=0; x0<T; x0++) { for(mu=0; mu<16; mu++) { idx = gindex[mu]; ix = 16*x0 + idx; if(isimag[mu]==0) { fprintf(ofs1, "%6d%3d%4d%4d%25.16e%25.16e%25.16e%25.16e\n", Nconf, mu, x0, sid, gsign[mu]*disc[2*(16*T+ix)+1]*vvnorm, -gsign[mu]*disc[2*(16*T+ix)]*vvnorm, gsign[mu]*disc[2*(48*T+ix)+1]*vvnorm, -gsign[mu]*disc[2*(48*T+ix)]*vvnorm); } else { fprintf(ofs1, "%6d%3d%4d%4d%25.16e%25.16e%25.16e%25.16e\n", Nconf, mu, x0, sid, -gsign[mu]*disc[2*(16*T+ix)]*vvnorm, -gsign[mu]*disc[2*(16*T+ix)+1]*vvnorm, -gsign[mu]*disc[2*(48*T+ix)]*vvnorm, -gsign[mu]*disc[2*(48*T+ix)+1]*vvnorm); } } } #ifdef MPI for(c=1; c<g_nproc; c++) { MPI_Recv(disc, 128*T, MPI_DOUBLE, c, 100+c, g_cart_grid, &status); for(x0=0; x0<T; x0++) { for(mu=0; mu<16; mu++) { idx=gindex[mu]; ix = 16*x0 + idx; if(isimag[mu]==0) { fprintf(ofs2, "%6d%3d%4d%4d%25.16e%25.16e%25.16e%25.16e\n", Nconf, mu, c*T+x0, sid, gsign[mu]*disc[2* ix ]*v4norm, gsign[mu]*disc[2* ix +1]*v4norm, gsign[mu]*disc[2*(32*T+ix)]*v4norm, gsign[mu]*disc[2*(32*T+ix)+1]*v4norm); } else { fprintf(ofs2, "%6d%3d%4d%4d%25.16e%25.16e%25.16e%25.16e\n", Nconf, mu, c*T+x0, sid, gsign[mu]*disc[2*( ix)+1]*v4norm, -gsign[mu]*disc[2* ix ]*v4norm, gsign[mu]*disc[2*(32*T+ix)+1]*v4norm, -gsign[mu]*disc[2*(32*T+ix)]*v4norm); } } } for(x0=0; x0<T; x0++) { for(mu=0; mu<16; mu++) { idx = gindex[mu]; ix = 16*x0 + idx; if(isimag[mu]==0) { fprintf(ofs1, "%6d%3d%4d%4d%25.16e%25.16e%25.16e%25.16e\n", Nconf, mu, c*T+x0, sid, gsign[mu]*disc[2*(16*T+ix)+1]*vvnorm, -gsign[mu]*disc[2*(16*T+ix)]*vvnorm, gsign[mu]*disc[2*(48*T+ix)+1]*vvnorm, -gsign[mu]*disc[2*(48*T+ix)]*vvnorm); } else { fprintf(ofs1, "%6d%3d%4d%4d%25.16e%25.16e%25.16e%25.16e\n", Nconf, mu, c*T+x0, sid, -gsign[mu]*disc[2*(16*T+ix)]*vvnorm, -gsign[mu]*disc[2*(16*T+ix)+1]*vvnorm, -gsign[mu]*disc[2*(48*T+ix)]*vvnorm, -gsign[mu]*disc[2*(48*T+ix)+1]*vvnorm); } } } } #endif } #ifdef MPI else { for(c=1; c<g_nproc; c++) { if(g_cart_id==c) { MPI_Send(disc, 128*T, MPI_DOUBLE, 0, 100+c, g_cart_grid); } } } #endif } /* of loop on sid */ if(g_cart_id==0) { fclose(ofs1); fclose(ofs2); } if(g_cart_id==0) { fprintf(stdout, "# contributions from HPE:\n"); fprintf(stdout, "(1) X = id\t%25.16e%25.16e\n"\ " \t%25.16e%25.16e\n"\ "(2) X = 5\t%25.16e%25.16e\n"\ " \t%25.16e%25.16e\n", hopexp_coeff[0], hopexp_coeff[1], hopexp_coeff[2], hopexp_coeff[3], hopexp_coeff[4], hopexp_coeff[5], hopexp_coeff[6], hopexp_coeff[7]); } /* free the allocated memory, finalize */ free(g_gauge_field); g_gauge_field=(double*)NULL; for(i=0; i<no_fields; i++) free(g_spinor_field[i]); free(g_spinor_field); g_spinor_field=(double**)NULL; free_geometry(); free(disc); if(Nlong>-1) free(gauge_field_f); #ifdef MPI MPI_Finalize(); #endif return(0); }
int main(int argc, char **argv) { int c, i, mu, nu; int count = 0; int filename_set = 0; int dims[4] = {0,0,0,0}; int l_LX_at, l_LXstart_at; int x0, x1, x2, x3, ix, iix; int sid, status; double *disc = (double*)NULL; double *data = (double*)NULL; double *bias = (double*)NULL; double *work = (double*)NULL; double q[4], fnorm; int verbose = 0; char filename[100], contype[200]; double ratime, retime; double plaq; double spinor1[24], spinor2[24], U_[18]; complex w, w1, *cp1, *cp2, *cp3, *cp4; fftw_complex *in=(fftw_complex*)NULL; #ifdef MPI fftwnd_mpi_plan plan_p, plan_m; #else fftwnd_plan plan_p, plan_m; #endif #ifdef MPI MPI_Init(&argc, &argv); #endif while ((c = getopt(argc, argv, "h?vf:")) != -1) { switch (c) { case 'v': verbose = 1; break; case 'f': strcpy(filename, optarg); filename_set=1; break; case 'h': case '?': default: usage(); break; } } /* set the default values */ if(filename_set==0) strcpy(filename, "cvc.input"); fprintf(stdout, "# Reading input from file %s\n", filename); read_input_parser(filename); /* some checks on the input data */ if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) { if(g_proc_id==0) fprintf(stdout, "T and L's must be set\n"); usage(); } if(g_kappa <= 0.) { if(g_proc_id==0) fprintf(stdout, "kappa should be > 0.\n"); usage(); } if(hpe_order%2==0 && hpe_order>0) { if(g_proc_id==0) fprintf(stdout, "HPE order should be odd\n"); usage(); } fprintf(stdout, "\n**************************************************\n"\ "* vp_disc_hpe_stoch_subtract with HPE of order %d\n"\ "**************************************************\n\n", hpe_order); /********************************* * initialize MPI parameters *********************************/ mpi_init(argc, argv); /* initialize fftw */ dims[0]=T_global; dims[1]=LX; dims[2]=LY; dims[3]=LZ; #ifdef MPI plan_p = fftwnd_mpi_create_plan(g_cart_grid, 4, dims, FFTW_BACKWARD, FFTW_MEASURE); plan_m = fftwnd_mpi_create_plan(g_cart_grid, 4, dims, FFTW_FORWARD, FFTW_MEASURE); fftwnd_mpi_local_sizes(plan_p, &T, &Tstart, &l_LX_at, &l_LXstart_at, &FFTW_LOC_VOLUME); #else plan_p = fftwnd_create_plan(4, dims, FFTW_BACKWARD, FFTW_MEASURE | FFTW_IN_PLACE); plan_m = fftwnd_create_plan(4, dims, FFTW_FORWARD, FFTW_MEASURE | FFTW_IN_PLACE); T = T_global; Tstart = 0; l_LX_at = LX; l_LXstart_at = 0; FFTW_LOC_VOLUME = T*LX*LY*LZ; #endif fprintf(stdout, "# [%2d] fftw parameters:\n"\ "# [%2d] T = %3d\n"\ "# [%2d] Tstart = %3d\n"\ "# [%2d] l_LX_at = %3d\n"\ "# [%2d] l_LXstart_at = %3d\n"\ "# [%2d] FFTW_LOC_VOLUME = %3d\n", g_cart_id, g_cart_id, T, g_cart_id, Tstart, g_cart_id, l_LX_at, g_cart_id, l_LXstart_at, g_cart_id, FFTW_LOC_VOLUME); #ifdef MPI if(T==0) { fprintf(stderr, "[%2d] local T is zero; exit\n", g_cart_id); MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); exit(101); } #endif if(init_geometry() != 0) { fprintf(stderr, "ERROR from init_geometry\n"); #ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); #endif exit(102); } geometry(); /************************************************ * read the gauge field, measure the plaquette ************************************************/ alloc_gauge_field(&g_gauge_field, VOLUMEPLUSRAND); sprintf(filename, "%s.%.4d", gaugefilename_prefix, Nconf); if(g_cart_id==0) fprintf(stdout, "# reading gauge field from file %s\n", filename); read_lime_gauge_field_doubleprec(filename); xchange_gauge(); plaquette(&plaq); if(g_cart_id==0) fprintf(stdout, "# measured plaquette value: %25.16e\n", plaq); /**************************************** * allocate memory for the spinor fields ****************************************/ no_fields = 3; g_spinor_field = (double**)calloc(no_fields, sizeof(double*)); for(i=0; i<no_fields; i++) alloc_spinor_field(&g_spinor_field[i], VOLUMEPLUSRAND); /**************************************** * allocate memory for the contractions ****************************************/ disc = (double*)calloc(16*VOLUME, sizeof(double)); if( disc == (double*)NULL ) { fprintf(stderr, "could not allocate memory for disc\n"); # ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); # endif exit(103); } data = (double*)calloc(16*VOLUME, sizeof(double)); if( data== (double*)NULL ) { fprintf(stderr, "could not allocate memory for data\n"); # ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); # endif exit(104); } for(ix=0; ix<16*VOLUME; ix++) data[ix] = 0.; work = (double*)calloc(32*VOLUME, sizeof(double)); if( work == (double*)NULL ) { fprintf(stderr, "could not allocate memory for work\n"); # ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); # endif exit(105); } bias = (double*)calloc(32*VOLUME, sizeof(double)); if( bias == (double*)NULL ) { fprintf(stderr, "could not allocate memory for bias\n"); # ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); # endif exit(106); } for(ix=0; ix<32*VOLUME; ix++) bias[ix] = 0.; /**************************************** * prepare Fourier transformation arrays ****************************************/ in = (fftw_complex*)malloc(FFTW_LOC_VOLUME*sizeof(fftw_complex)); if(in==(fftw_complex*)NULL) { #ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); #endif exit(107); } /*********************************************** * start loop on source id.s ***********************************************/ for(sid=g_sourceid; sid<=g_sourceid2; sid+=g_sourceid_step) { for(ix=0; ix<16*VOLUME; ix++) disc[ix] = 0.; /* read the new propagator */ #ifdef MPI ratime = MPI_Wtime(); #else ratime = (double)clock() / CLOCKS_PER_SEC; #endif if(format==0) { sprintf(filename, "%s.%.4d.%.2d.inverted", filename_prefix, Nconf, sid); if(read_lime_spinor(g_spinor_field[2], filename, 0) != 0) break; } else if(format==1) { sprintf(filename, "%s.%.4d.%.5d.inverted", filename_prefix, Nconf, sid); if(read_cmi(g_spinor_field[2], filename) != 0) break; } xchange_field(g_spinor_field[2]); #ifdef MPI retime = MPI_Wtime(); #else retime = (double)clock() / CLOCKS_PER_SEC; #endif if(g_cart_id==0) fprintf(stdout, "# time to read prop.: %e seconds\n", retime-ratime); count++; /************************************************ * calculate the source: apply Q_phi_tbc ************************************************/ #ifdef MPI ratime = MPI_Wtime(); #else ratime = (double)clock() / CLOCKS_PER_SEC; #endif Q_phi_tbc(g_spinor_field[0], g_spinor_field[2]); xchange_field(g_spinor_field[0]); #ifdef MPI retime = MPI_Wtime(); #else retime = (double)clock() / CLOCKS_PER_SEC; #endif if(g_cart_id==0) fprintf(stdout, "# time to calculate source: %e seconds\n", retime-ratime); #ifdef MPI ratime = MPI_Wtime(); #else ratime = (double)clock() / CLOCKS_PER_SEC; #endif /************************************************ * HPE: apply BH to order hpe_order+2 ************************************************/ if(hpe_order>0) { BHn(g_spinor_field[1], g_spinor_field[2], hpe_order+2); } else { memcpy((void*)g_spinor_field[1], (void*)g_spinor_field[2], 24*VOLUMEPLUSRAND*sizeof(double)); } /************************************************ * add new contractions to (existing) disc ************************************************/ for(mu=0; mu<4; mu++) { iix = _GWI(mu,0,VOLUME); for(ix=0; ix<VOLUME; ix++) { _cm_eq_cm_ti_co(U_, &g_gauge_field[_GGI(ix, mu)], &co_phase_up[mu]); _fv_eq_cm_ti_fv(spinor1, U_, &g_spinor_field[1][_GSI(g_iup[ix][mu])]); _fv_eq_gamma_ti_fv(spinor2, mu, spinor1); _fv_mi_eq_fv(spinor2, spinor1); _co_eq_fv_dag_ti_fv(&w, &g_spinor_field[0][_GSI(ix)], spinor2); disc[iix ] = -0.5 * w.re; disc[iix+1] = -0.5 * w.im; data[iix ] -= 0.5 * w.re; data[iix+1] -= 0.5 * w.im; _fv_eq_cm_dag_ti_fv(spinor1, U_, &g_spinor_field[1][_GSI(ix)]); _fv_eq_gamma_ti_fv(spinor2, mu, spinor1); _fv_pl_eq_fv(spinor2, spinor1); _co_eq_fv_dag_ti_fv(&w, &g_spinor_field[0][_GSI(g_iup[ix][mu])], spinor2); disc[iix ] -= 0.5 * w.re; disc[iix+1] -= 0.5 * w.im; data[iix ] -= 0.5 * w.re; data[iix+1] -= 0.5 * w.im; iix += 2; } } #ifdef MPI retime = MPI_Wtime(); #else retime = (double)clock() / CLOCKS_PER_SEC; #endif if(g_cart_id==0) fprintf(stdout, "# time to contract cvc: %e seconds\n", retime-ratime); #ifdef MPI ratime = MPI_Wtime(); #else ratime = (double)clock() / CLOCKS_PER_SEC; #endif for(mu=0; mu<4; mu++) { memcpy((void*)in, (void*)(disc+_GWI(mu,0,VOLUME)), 2*VOLUME*sizeof(double)); #ifdef MPI fftwnd_mpi(plan_m, 1, in, NULL, FFTW_NORMAL_ORDER); #else fftwnd_one(plan_m, in, NULL); #endif memcpy((void*)(disc+_GWI(4+mu,0,VOLUME)), (void*)in, 2*VOLUME*sizeof(double)); memcpy((void*)in, (void*)(disc+_GWI(mu,0,VOLUME)), 2*VOLUME*sizeof(double)); #ifdef MPI fftwnd_mpi(plan_p, 1, in, NULL, FFTW_NORMAL_ORDER); #else fftwnd_one(plan_p, in, NULL); #endif memcpy((void*)(disc+_GWI(mu,0,VOLUME)), (void*)in, 2*VOLUME*sizeof(double)); } /* of mu =0 ,..., 3*/ for(mu=0; mu<4; mu++) { for(nu=0; nu<4; nu++) { cp1 = (complex*)(disc+_GWI(mu, 0,VOLUME)); cp2 = (complex*)(disc+_GWI(4+nu, 0,VOLUME)); cp3 = (complex*)(bias+_GWI(4*mu+nu,0,VOLUME)); for(ix=0; ix<VOLUME; ix++) { _co_eq_co_ti_co(&w1, cp1, cp2); cp3->re += w1.re; cp3->im += w1.im; cp1++; cp2++; cp3++; } }} #ifdef MPI retime = MPI_Wtime(); #else retime = (double)clock() / CLOCKS_PER_SEC; #endif if(g_cart_id==0) fprintf(stdout, "# time for Fourier trafo and adding to bias: %e seconds\n", retime-ratime); } /* of loop on sid */ /************************************************ * save results for count == Nsave ************************************************/ if(count==Nsave) { if(g_cart_id == 0) fprintf(stdout, "# save results for count = %d\n", count); for(ix=0; ix<16*VOLUME; ix++) disc[ix] = 0.; if(hpe_order>0) { sprintf(filename, "vp_disc_hpe%.2d_loops_X.%.4d", hpe_order, Nconf); if(g_cart_id==0) fprintf(stdout, "# reading loop part from file %s\n", filename); if( (status = read_lime_contraction(disc, filename, 4, 0)) != 0 ) { #ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); #endif exit(108); } } /* save the result in position space */ fnorm = 1. / ( (double)count * g_prop_normsqr ); if(g_cart_id==0) fprintf(stdout, "# X-fnorm = %e\n", fnorm); for(mu=0; mu<4; mu++) { for(ix=0; ix<VOLUME; ix++) { work[_GWI(mu,ix,VOLUME) ] = data[_GWI(mu,ix,VOLUME) ] * fnorm + disc[_GWI(mu,ix,VOLUME) ]; work[_GWI(mu,ix,VOLUME)+1] = data[_GWI(mu,ix,VOLUME)+1] * fnorm + disc[_GWI(mu,ix,VOLUME)+1]; } } sprintf(filename, "vp_disc_hpe%.2d_subtracted_X.%.4d.%.4d", hpe_order, Nconf, count); sprintf(contype, "cvc-disc-hpe-loops-%2d-to-%2d-stoch-subtracted-X", hpe_order, hpe_order+2); write_lime_contraction(work, filename, 64, 4, contype, Nconf, count); /* sprintf(filename, "vp_disc_hpe%.2d_subtracted_X.%.4d.%.4d.ascii", hpe_order, Nconf, count); write_contraction(work, NULL, filename, 4, 2, 0); */ #ifdef MPI ratime = MPI_Wtime(); #else ratime = (double)clock() / CLOCKS_PER_SEC; #endif for(mu=0; mu<4; mu++) { memcpy((void*)in, (void*)(data+_GWI(mu,0,VOLUME)), 2*VOLUME*sizeof(double)); #ifdef MPI fftwnd_mpi(plan_m, 1, in, NULL, FFTW_NORMAL_ORDER); #else fftwnd_one(plan_m, in, NULL); #endif memcpy((void*)(data+_GWI(4+mu,0,VOLUME)), (void*)in, 2*VOLUME*sizeof(double)); memcpy((void*)in, (void*)(data+_GWI(mu,0,VOLUME)), 2*VOLUME*sizeof(double)); #ifdef MPI fftwnd_mpi(plan_p, 1, in, NULL, FFTW_NORMAL_ORDER); #else fftwnd_one(plan_p, in, NULL); #endif memcpy((void*)(data+_GWI(mu,0,VOLUME)), (void*)in, 2*VOLUME*sizeof(double)); } fnorm = 1. / ( g_prop_normsqr*g_prop_normsqr * (double)count * (double)(count-1) ); if(g_cart_id==0) fprintf(stdout, "# P-fnorm for purely stochastic part = %e\n", fnorm); for(mu=0; mu<4; mu++) { for(nu=0; nu<4; nu++) { cp1 = (complex*)(data+_GWI(mu, 0,VOLUME)); cp2 = (complex*)(data+_GWI(4+nu, 0,VOLUME)); cp3 = (complex*)(work+_GWI(4*mu+nu,0,VOLUME)); cp4 = (complex*)(bias+_GWI(4*mu+nu,0,VOLUME)); for(ix=0; ix<VOLUME; ix++) { _co_eq_co_ti_co(&w1, cp1, cp2); cp3->re = ( w1.re - cp4->re ) * fnorm; cp3->im = ( w1.im - cp4->im ) * fnorm; cp1++; cp2++; cp3++; cp4++; } }} for(mu=0; mu<4; mu++) { memcpy((void*)in, (void*)(disc+_GWI(mu,0,VOLUME)), 2*VOLUME*sizeof(double)); #ifdef MPI fftwnd_mpi(plan_m, 1, in, NULL, FFTW_NORMAL_ORDER); #else fftwnd_one(plan_m, in, NULL); #endif memcpy((void*)(disc+_GWI(4+mu,0,VOLUME)), (void*)in, 2*VOLUME*sizeof(double)); memcpy((void*)in, (void*)(disc+_GWI(mu,0,VOLUME)), 2*VOLUME*sizeof(double)); #ifdef MPI fftwnd_mpi(plan_p, 1, in, NULL, FFTW_NORMAL_ORDER); #else fftwnd_one(plan_p, in, NULL); #endif memcpy((void*)(disc+_GWI(mu,0,VOLUME)), (void*)in, 2*VOLUME*sizeof(double)); } fnorm = 1. / ( g_prop_normsqr * (double)count ); if(g_cart_id==0) fprintf(stdout, "# P-fnorm for mixed stochastic-loop part = %e\n", fnorm); for(mu=0; mu<4; mu++) { for(nu=0; nu<4; nu++) { cp1 = (complex*)(data + _GWI(mu, 0,VOLUME)); cp2 = (complex*)(disc + _GWI(4+nu, 0,VOLUME)); cp3 = (complex*)(work + _GWI(4*mu+nu,0,VOLUME)); for(ix=0; ix<VOLUME; ix++) { _co_eq_co_ti_co(&w1, cp1, cp2); cp3->re += w1.re * fnorm; cp3->im += w1.im * fnorm; cp1++; cp2++; cp3++; } cp1 = (complex*)(disc + _GWI(mu, 0,VOLUME)); cp2 = (complex*)(data + _GWI(4+nu, 0,VOLUME)); cp3 = (complex*)(work + _GWI(4*mu+nu,0,VOLUME)); for(ix=0; ix<VOLUME; ix++) { _co_eq_co_ti_co(&w1, cp1, cp2); cp3->re += w1.re * fnorm; cp3->im += w1.im * fnorm; cp1++; cp2++; cp3++; } }} fnorm = 1. / ( (double)T_global * (double)(LX*LY*LZ) ); if(g_cart_id==0) fprintf(stdout, "# P-fnorm for final estimator (1/T/V) = %e\n", fnorm); for(mu=0; mu<4; mu++) { for(nu=0; nu<4; nu++) { cp1 = (complex*)(disc + _GWI(mu, 0,VOLUME)); cp2 = (complex*)(disc + _GWI(4+nu, 0,VOLUME)); cp3 = (complex*)(work + _GWI(4*mu+nu,0,VOLUME)); for(x0=0; x0<T; x0++) { q[0] = (double)(x0+Tstart) / (double)T_global; for(x1=0; x1<LX; x1++) { q[1] = (double)x1 / (double)LX; for(x2=0; x2<LY; x2++) { q[2] = (double)x2 / (double)LY; for(x3=0; x3<LZ; x3++) { q[3] = (double)x3 / (double)LZ; ix = g_ipt[x0][x1][x2][x3]; w.re = cos(M_PI * ( q[mu] - q[nu] ) ); w.im = sin(M_PI * ( q[mu] - q[nu] ) ); _co_eq_co_ti_co(&w1, cp1, cp2); cp3->re += w1.re; cp3->im += w1.im; _co_eq_co_ti_co(&w1, cp3, &w); cp3->re = w1.re * fnorm; cp3->im = w1.im * fnorm; cp1++; cp2++; cp3++; }}}} }} sprintf(filename, "vp_disc_hpe%.2d_subtracted_P.%.4d.%.4d", hpe_order, Nconf, count); sprintf(contype, "cvc-disc-hpe-loops-%2d-to-%2d-stoch-subtracted-P", hpe_order, hpe_order+2); write_lime_contraction(work, filename, 64, 16, contype, Nconf, count); /* sprintf(filename, "vp_disc_hpe%.2d_subtracted_P.%.4d.%.4d.ascii", hpe_order, Nconf, count); write_contraction(work, NULL, filename, 16, 2, 0); */ #ifdef MPI retime = MPI_Wtime(); #else retime = (double)clock() / CLOCKS_PER_SEC; #endif if(g_cart_id==0) fprintf(stdout, "# time to save cvc results: %e seconds\n", retime-ratime); } /* of if count == Nsave */ /*********************************************** * free the allocated memory, finalize ***********************************************/ free(g_gauge_field); for(i=0; i<no_fields; i++) free(g_spinor_field[i]); free(g_spinor_field); free_geometry(); fftw_free(in); free(disc); free(bias); free(data); free(work); #ifdef MPI fftwnd_mpi_destroy_plan(plan_p); fftwnd_mpi_destroy_plan(plan_m); MPI_Finalize(); #else fftwnd_destroy_plan(plan_p); fftwnd_destroy_plan(plan_m); #endif return(0); }
int main(int argc, char **argv) { int c; int count, ncon=-1; int filename_set = 0; int ix; double *disc = (double*)NULL; double *disc2 = (double*)NULL; double adiffre, adiffim, mdiffre, mdiffim, Mdiffre, Mdiffim, hre, him; int verbose = 0; char filename[200]; char file1[200]; char file2[200]; while ((c = getopt(argc, argv, "h?vf:N:c:C:")) != -1) { switch (c) { case 'v': verbose = 1; break; case 'f': strcpy(filename, optarg); filename_set=1; break; case 'N': ncon = atoi(optarg); break; case 'c': strcpy(file1, optarg); break; case 'C': strcpy(file2, optarg); break; case 'h': case '?': default: usage(); break; } } /* set the default values */ if(filename_set==0) strcpy(filename, "cvc.input"); if(g_cart_id==0) fprintf(stdout, "# Reading input from file %s\n", filename); read_input_parser(filename); /* some checks on the input data */ if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) { if(g_proc_id==0) fprintf(stdout, "T and L's must be set\n"); usage(); } if(g_kappa == 0.) { if(g_proc_id==0) fprintf(stdout, "kappa should be > 0.n"); usage(); } /* initialize MPI parameters */ mpi_init(argc, argv); /* initialize */ T = T_global; Tstart = 0; fprintf(stdout, "# [%2d] parameters:\n"\ "# [%2d] T = %3d\n"\ "# [%2d] Tstart = %3d\n",\ g_cart_id, g_cart_id, T, g_cart_id, Tstart); if(init_geometry() != 0) { fprintf(stderr, "ERROR from init_geometry\n"); exit(101); } geometry(); /**************************************** * allocate memory for the contractions ****************************************/ if(ncon<=0) { fprintf(stderr, "Error, incompatible contraction type specified; exit\n"); exit(102); } else { fprintf(stdout, "# Using contraction type %d\n", ncon); } disc = (double*)calloc(2*ncon*VOLUME, sizeof(double)); if( disc == (double*)NULL ) { fprintf(stderr, "could not allocate memory for disc\n"); exit(103); } disc2 = (double*)calloc(2*ncon*VOLUME, sizeof(double)); if( disc2 == (double*)NULL ) { fprintf(stderr, "could not allocate memory for disc2\n"); exit(104); } /**************************************** * read contractions ****************************************/ if( read_lime_contraction(disc, file1, 64, ncon, 0) != 0 ) { fprintf(stderr, "Error, could not read from file %s; exit\n", file1); exit(105); } if( read_lime_contraction(disc2, file2, 64, ncon, 0) != 0 ) { fprintf(stderr, "Error, could not read from file %s; exit\n", file2); exit(106); } /**************************************** * calculate difference ****************************************/ mdiffre = fabs(disc[0] - disc2[0]); mdiffim = fabs(disc[1] - disc2[1]); Mdiffre = 0.; Mdiffim = 0.; adiffre = 0.; adiffim = 0.; for(ix=0; ix<ncon*VOLUME; ix++) { adiffre += disc[2*ix ] - disc2[2*ix ]; adiffim += disc[2*ix+1] - disc2[2*ix+1]; hre = fabs(disc[2*ix ] - disc2[2*ix ]); him = fabs(disc[2*ix+1] - disc2[2*ix+1]); if(hre<mdiffre) mdiffre = hre; if(hre>Mdiffre) Mdiffre = hre; if(him<mdiffim) mdiffim = him; if(him>Mdiffim) Mdiffim = him; } adiffre /= (double)VOLUME * (double)ncon; adiffim /= (double)VOLUME * (double)ncon; fprintf(stdout, "# Results for files %s and %s:\n", file1, file2); fprintf(stdout, "average difference\t%25.16e\t%25.16e\n", adiffre, adiffim); fprintf(stdout, "minimal abs. difference\t%25.16e\t%25.16e\n", mdiffre, mdiffim); fprintf(stdout, "maximal abs. difference\t%25.16e\t%25.16e\n", Mdiffre, Mdiffim); /*********************************************** * free the allocated memory, finalize ***********************************************/ free_geometry(); free(disc); free(disc2); return(0); }
int main(int argc, char **argv) { const int n_c=3; const int n_s=4; const char outfile_prefix[] = "delta_pp_2pt_v3"; int c, i, icomp; int filename_set = 0; int append, status; int l_LX_at, l_LXstart_at; int ix, it, iix, x1,x2,x3; int ir, ir2, is; int VOL3; int do_gt=0; int dims[3]; double *connt=NULL; spinor_propagator_type *connq=NULL; int verbose = 0; int sx0, sx1, sx2, sx3; int write_ascii=0; int fermion_type = 1; // Wilson fermion type int num_threads=1; int pos; char filename[200], contype[200], gauge_field_filename[200]; double ratime, retime; //double plaq_m, plaq_r; double *work=NULL; fermion_propagator_type fp1=NULL, fp2=NULL, fp3=NULL, fp4=NULL, fpaux=NULL, uprop=NULL, dprop=NULL, *stochastic_fp=NULL; spinor_propagator_type sp1, sp2; double q[3], phase, *gauge_trafo=NULL; double *stochastic_source=NULL, *stochastic_prop=NULL; complex w, w1; size_t items, bytes; FILE *ofs; int timeslice; DML_Checksum ildg_gauge_field_checksum, *spinor_field_checksum=NULL, connq_checksum; uint32_t nersc_gauge_field_checksum; /***********************************************************/ int *qlatt_id=NULL, *qlatt_count=NULL, **qlatt_rep=NULL, **qlatt_map=NULL, qlatt_nclass=0; int use_lattice_momenta = 0; double **qlatt_list=NULL; /***********************************************************/ /***********************************************************/ int rel_momentum_filename_set = 0, rel_momentum_no=0; int **rel_momentum_list=NULL; char rel_momentum_filename[200]; /***********************************************************/ /***********************************************************/ int snk_momentum_no = 1; int **snk_momentum_list = NULL; int snk_momentum_filename_set = 0; char snk_momentum_filename[200]; /***********************************************************/ /******************************************************************* * Gamma components for the Delta: */ //const int num_component = 16; //int gamma_component[2][16] = { {0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3}, \ // {0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3}}; //double gamma_component_sign[16] = {1., 1.,-1., 1., 1., 1.,-1., 1.,-1.,-1., 1.,-1., 1., 1.,-1., 1.}; const int num_component = 4; int gamma_component[2][4] = { {0, 1, 2, 3}, {0, 1, 2, 3} }; double gamma_component_sign[4] = {+1.,+1.,+1.,+1.}; /* *******************************************************************/ fftw_complex *in=NULL; #ifdef MPI fftwnd_mpi_plan plan_p; #else fftwnd_plan plan_p; #endif #ifdef MPI MPI_Status status; #endif #ifdef MPI MPI_Init(&argc, &argv); #endif while ((c = getopt(argc, argv, "ah?vgf:t:F:p:P:")) != -1) { switch (c) { case 'v': verbose = 1; break; case 'f': strcpy(filename, optarg); filename_set=1; break; case 'a': write_ascii = 1; fprintf(stdout, "# [] will write in ascii format\n"); break; case 'F': if(strcmp(optarg, "Wilson") == 0) { fermion_type = _WILSON_FERMION; } else if(strcmp(optarg, "tm") == 0) { fermion_type = _TM_FERMION; } else { fprintf(stderr, "[] Error, unrecognized fermion type\n"); exit(145); } fprintf(stdout, "# [] will use fermion type %s ---> no. %d\n", optarg, fermion_type); break; case 't': num_threads = atoi(optarg); fprintf(stdout, "# [] number of threads set to %d\n", num_threads); break; case 's': use_lattice_momenta = 1; fprintf(stdout, "# [] will use lattice momenta\n"); break; case 'p': rel_momentum_filename_set = 1; strcpy(rel_momentum_filename, optarg); fprintf(stdout, "# [] will use current momentum file %s\n", rel_momentum_filename); break; case 'P': snk_momentum_filename_set = 1; strcpy(snk_momentum_filename, optarg); fprintf(stdout, "# [] will use nucleon momentum file %s\n", snk_momentum_filename); break; case 'g': do_gt = 1; fprintf(stdout, "# [] will perform gauge transform\n"); break; case 'h': case '?': default: usage(); break; } } #ifdef OPENMP omp_set_num_threads(num_threads); #endif /* set the default values */ if(filename_set==0) strcpy(filename, "cvc.input"); fprintf(stdout, "# reading input from file %s\n", filename); read_input_parser(filename); /* some checks on the input data */ if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) { if(g_proc_id==0) fprintf(stdout, "T and L's must be set\n"); usage(); } if(g_kappa == 0.) { if(g_proc_id==0) fprintf(stdout, "kappa should be > 0.n"); usage(); } /* initialize MPI parameters */ mpi_init(argc, argv); #ifdef OPENMP status = fftw_threads_init(); if(status != 0) { fprintf(stderr, "\n[] Error from fftw_init_threads; status was %d\n", status); exit(120); } #endif /****************************************************** * ******************************************************/ VOL3 = LX*LY*LZ; l_LX_at = LX; l_LXstart_at = 0; FFTW_LOC_VOLUME = T*LX*LY*LZ; fprintf(stdout, "# [%2d] parameters:\n"\ "# [%2d] l_LX_at = %3d\n"\ "# [%2d] l_LXstart_at = %3d\n"\ "# [%2d] FFTW_LOC_VOLUME = %3d\n", g_cart_id, g_cart_id, l_LX_at, g_cart_id, l_LXstart_at, g_cart_id, FFTW_LOC_VOLUME); if(init_geometry() != 0) { fprintf(stderr, "ERROR from init_geometry\n"); exit(1); } geometry(); if(N_Jacobi>0) { // alloc the gauge field alloc_gauge_field(&g_gauge_field, VOL3); switch(g_gauge_file_format) { case 0: sprintf(gauge_field_filename, "%s.%.4d", gaugefilename_prefix, Nconf); break; case 1: sprintf(gauge_field_filename, "%s.%.5d", gaugefilename_prefix, Nconf); break; } } else { g_gauge_field = NULL; } /********************************************************************* * gauge transformation *********************************************************************/ if(do_gt) { init_gauge_trafo(&gauge_trafo, 1.); } // determine the source location sx0 = g_source_location/(LX*LY*LZ)-Tstart; sx1 = (g_source_location%(LX*LY*LZ)) / (LY*LZ); sx2 = (g_source_location%(LY*LZ)) / LZ; sx3 = (g_source_location%LZ); // g_source_time_slice = sx0; fprintf(stdout, "# [] source location %d = (%d,%d,%d,%d)\n", g_source_location, sx0, sx1, sx2, sx3); source_timeslice = sx0; if(!use_lattice_momenta) { status = make_qcont_orbits_3d_parity_avg(&qlatt_id, &qlatt_count, &qlatt_list, &qlatt_nclass, &qlatt_rep, &qlatt_map); } else { status = make_qlatt_orbits_3d_parity_avg(&qlatt_id, &qlatt_count, &qlatt_list, &qlatt_nclass, &qlatt_rep, &qlatt_map); } if(status != 0) { fprintf(stderr, "\n[] Error while creating h4-lists\n"); exit(4); } fprintf(stdout, "# [] number of classes = %d\n", qlatt_nclass); /*************************************************************************** * read the relative momenta q to be used ***************************************************************************/ /* ofs = fopen(rel_momentum_filename, "r"); if(ofs == NULL) { fprintf(stderr, "[] Error, could not open file %s for reading\n", rel_momentum_filename); exit(6); } rel_momentum_no = 0; while( fgets(line, 199, ofs) != NULL) { if(line[0] != '#') { rel_momentum_no++; } } if(rel_momentum_no == 0) { fprintf(stderr, "[] Error, number of momenta is zero\n"); exit(7); } else { fprintf(stdout, "# [] number of current momenta = %d\n", rel_momentum_no); } rewind(ofs); rel_momentum_list = (int**)malloc(rel_momentum_no * sizeof(int*)); rel_momentum_list[0] = (int*)malloc(3*rel_momentum_no * sizeof(int)); for(i=1;i<rel_momentum_no;i++) { rel_momentum_list[i] = rel_momentum_list[i-1] + 3; } count=0; while( fgets(line, 199, ofs) != NULL) { if(line[0] != '#') { sscanf(line, "%d%d%d", rel_momentum_list[count], rel_momentum_list[count]+1, rel_momentum_list[count]+2); count++; } } fclose(ofs); fprintf(stdout, "# [] current momentum list:\n"); for(i=0;i<rel_momentum_no;i++) { fprintf(stdout, "\t%3d%3d%3d%3d\n", i, rel_momentum_list[i][0], rel_momentum_list[i][1], rel_momentum_list[i][2]); } */ /*************************************************************************** * read the nucleon final momenta to be used ***************************************************************************/ ofs = fopen(snk_momentum_filename, "r"); if(ofs == NULL) { fprintf(stderr, "[] Error, could not open file %s for reading\n", snk_momentum_filename); exit(6); } snk_momentum_no = 0; while( fgets(line, 199, ofs) != NULL) { if(line[0] != '#') { snk_momentum_no++; } } if(snk_momentum_no == 0) { fprintf(stderr, "[] Error, number of momenta is zero\n"); exit(7); } else { fprintf(stdout, "# [] number of nucleon final momenta = %d\n", snk_momentum_no); } rewind(ofs); snk_momentum_list = (int**)malloc(snk_momentum_no * sizeof(int*)); snk_momentum_list[0] = (int*)malloc(3*snk_momentum_no * sizeof(int)); for(i=1;i<snk_momentum_no;i++) { snk_momentum_list[i] = snk_momentum_list[i-1] + 3; } count=0; while( fgets(line, 199, ofs) != NULL) { if(line[0] != '#') { sscanf(line, "%d%d%d", snk_momentum_list[count], snk_momentum_list[count]+1, snk_momentum_list[count]+2); count++; } } fclose(ofs); fprintf(stdout, "# [] the nucleon final momentum list:\n"); for(i=0;i<snk_momentum_no;i++) { fprintf(stdout, "\t%3d%3d%3d%3d\n", i, snk_momentum_list[i][0], snk_momentum_list[i][1], snk_momentum_list[i][1], snk_momentum_list[i][2]); } /*********************************************************** * allocate memory for the spinor fields ***********************************************************/ g_spinor_field = NULL; if(fermion_type == _TM_FERMION) { no_fields = 2*n_s*n_c+3; } else { no_fields = n_s*n_c+3; } if(N_Jacobi>0) no_fields++; g_spinor_field = (double**)calloc(no_fields, sizeof(double*)); for(i=0; i<no_fields-2; i++) alloc_spinor_field(&g_spinor_field[i], VOL3); // work if(N_Jacobi>0) work = g_spinor_field[no_fields-4]; // stochastic_fv stochastic_fv = g_spinor_field[no_fields-3]; // stochastic source and propagator alloc_spinor_field(&g_spinor_field[no_fields-2], VOLUME); stochastic_source = g_spinor_field[no_fields-2]; alloc_spinor_field(&g_spinor_field[no_fields-1], VOLUME); stochastic_prop = g_spinor_field[no_fields-1]; spinor_field_checksum = (DML_Checksum*)malloc(no_fields * sizeof(DML_Checksum) ); if(spinor_field_checksum == NULL ) { fprintf(stderr, "[] Error, could not alloc checksums for spinor fields\n"); exit(73); } /************************************************* * allocate memory for the contractions *************************************************/ items = 4* num_component*T; bytes = sizeof(double); connt = (double*)malloc(items*bytes); if(connt == NULL) { fprintf(stderr, "\n[] Error, could not alloc connt\n"); exit(2); } for(ix=0; ix<items; ix++) connt[ix] = 0.; items = num_component * (size_t)VOL3; connq = create_sp_field( items ); if(connq == NULL) { fprintf(stderr, "\n[] Error, could not alloc connq\n"); exit(2); } items = (size_t)VOL3; stochastic_fp = create_sp_field( items ); if(stochastic_fp== NULL) { fprintf(stderr, "\n[] Error, could not alloc stochastic_fp\n"); exit(22); } /****************************************************** * initialize FFTW ******************************************************/ items = g_fv_dim * (size_t)VOL3; bytes = sizeof(fftw_complex); in = (fftw_complex*)malloc( items * bytes ); if(in == NULL) { fprintf(stderr, "[] Error, could not malloc in for FFTW\n"); exit(155); } dims[0]=LX; dims[1]=LY; dims[2]=LZ; //plan_p = fftwnd_create_plan(3, dims, FFTW_FORWARD, FFTW_MEASURE | FFTW_IN_PLACE); plan_p = fftwnd_create_plan_specific(3, dims, FFTW_FORWARD, FFTW_MEASURE, in, g_fv_dim, (fftw_complex*)( stochastic_fv ), g_fv_dim); // create the fermion propagator points create_fp(&uprop); create_fp(&dprop); create_fp(&fp1); create_fp(&fp2); create_fp(&fp3); create_fp(&stochastic_fp); create_sp(&sp1); create_sp(&sp2); // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! // !! implement twisting for _TM_FERMION // !! // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! #ifdef OPENMP #pragma omp parallel for private(ix) shared(stochastic_prop) #endif for(ix=0;ix<VOLUME;ix++) { _fv_eq_zero(stochastic_prop+_GSI(ix)); } for(sid=g_sourceid; sid<=g_sourceid2;sid+=g_sourceid_step) { switch(g_soruce_type) { case 2: // timeslice source sprintf(filename, "%s.%.4d.%.2d.%.5d.inverted", filename_prefix, Nconf, source_timeslice, sid); break; default: fprintf(stderr, "# [] source type %d not implented; exit\n", g_source_type); exit(100); } fprintf(stdout, "# [] trying to read sample up-prop. from file %s\n", filename); read_lime_spinor(stochastic_source, filename, 0); #ifdef OPENMP #pragma omp parallel for private(ix) shared(stochastic_prop, stochastic_source) #endif for(ix=0;ix<VOLUME;ix++) { _fv_pl_eq_fv(stochastic_prop+_GSI(ix), stochastic_source+_GSI(ix)); } } #ifdef OPENMP #pragma omp parallel for private(ix) shared(stochastic_prop, stochastic_source) #endif fnorm = 1. / ( (double)(g_sourceid2 - g_sourceid + 1) * g_prop_normsqr ); for(ix=0;ix<VOLUME;ix++) { _fv_ti_eq_re(stochastic_prop+_GSI(ix), fnorm); } // calculate the source if(fermion_type && g_propagator_bc_type == 1) { Q_Wilson_phi(stochastic_source, stochastic_prop); } else { Q_phi_tbc(stochastic_source, stochastic_prop); } /****************************************************** * prepare the stochastic fermion field ******************************************************/ // read timeslice of the gauge field if( N_Jacobi>0) { switch(g_gauge_file_format) { case 0: status = read_lime_gauge_field_doubleprec_timeslice(g_gauge_field, gauge_field_filename, source_timeslice, &ildg_gauge_field_checksum); break; case 1: status = read_nersc_gauge_field_timeslice(g_gauge_field, gauge_field_filename, source_timeslice, &nersc_gauge_field_checksum); break; } if(status != 0) { fprintf(stderr, "[] Error, could not read gauge field\n"); exit(21); } for(i=0; i<N_ape; i++) { #ifdef OPENMP status = APE_Smearing_Step_Timeslice_threads(g_gauge_field, alpha_ape); #else status = APE_Smearing_Step_Timeslice(g_gauge_field, alpha_ape); #endif } } // read timeslice of the 12 up-type propagators and smear them // // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! // !! implement twisting for _TM_FERMION // !! // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! for(is=0;is<n_s*n_c;is++) { if(fermion_type != _TM_FERMION) { sprintf(filename, "%s.%.4d.t%.2dx%.2dy%.2dz%.2d.%.2d.inverted", filename_prefix, Nconf, sx0, sx1, sx2, sx3, is); } else { sprintf(filename, "%s.%.4d.t%.2dx%.2dy%.2dz%.2d.%.2d.inverted", filename_prefix2, Nconf, sx0, sx1, sx2, sx3, is); } status = read_lime_spinor_timeslice(g_spinor_field[is], source_timeslice, filename, 0, spinor_field_checksum+is); if(status != 0) { fprintf(stderr, "[] Error, could not read propagator from file %s\n", filename); exit(102); } if(N_Jacobi > 0) { fprintf(stdout, "# [] Jacobi smearing propagator no. %d with paramters N_Jacobi=%d, kappa_Jacobi=%f\n", is, N_Jacobi, kappa_Jacobi); for(c=0; c<N_Jacobi; c++) { #ifdef OPENMP Jacobi_Smearing_Step_one_Timeslice_threads(g_gauge_field, g_spinor_field[is], work, kappa_Jacobi); #else Jacobi_Smearing_Step_one_Timeslice(g_gauge_field, g_spinor_field[is], work, kappa_Jacobi); #endif } } } for(is=0;is<g_fv_dim;is++) { for(ix=0;ix<VOL3;ix++) { iix = source_timeslice * VOL3 + ix; _fv_eq_gamma_ti_fv(spinor1, 5, g_spinor_field[is]+_GSI(iix)); _co_eq_fv_dagger_ti_fv(&w, stochastic_source+_GSI(ix), spinor1); stochastic_fv[_GSI(ix)+2*is ] = w.re; stochastic_fv[_GSI(ix)+2*is+1] = w.im; } } // Fourier transform items = g_fv_dim * (size_t)VOL3; bytes = sizeof(double); memcpy(in, stochastic_fv, items*bytes ); #ifdef OPENMP fftwnd_threads(num_threads, plan_p, g_fv_dim, in, g_fv_dim, 1, (fftw_complex*)(stochastic_fv), g_fv_dim, 1); #else fftwnd(plan_p, g_fv_dim, in, g_fv_dim, 1, (fftw_complex*)(stochastic_fv), g_fv_dim, 1); #endif /****************************************************** * loop on sink momenta (most likely only one: Q=(0,0,0)) ******************************************************/ for(imom_snk=0;imom_snk<snk_momentum_no; imom_snk++) { // create Phi_tilde _fv_eq_zero( spinor2 ); for(ix=0;ix<LX;ix++) { for(iy=0;iy<LY;iy++) { for(iz=0;iz<LZ;iz++) { iix = timeslice * VOL3 + ix; phase = -2.*M_PI*( (ix-sx1) * snk_momentum_list[imom_snk][0] / (double)LX + (iy-sx2) * snk_momentum_list[imom_snk][1] / (double)LY + (iz-sx3) * snk_momentum_list[imom_snk][2] / (double)LZ); w.re = cos(phase); w.im = sin(phase); _fv_eq_fv_ti_co(spinor1, stochastic_prop + _GSI(iix), &w); _fv_pl_eq_fv(spinor2, spinor); }}} // create Theta for(ir=0;ir<g_fv_dim;ir++) { for(is=0;is<g_fv_dim;is++) { _co_eq_co_ti_co( &(stochastic_fp[ix][ir][2*is]), &(spinor2[2*ir]), &(stochastic_fv[_GSI(ix)+2*is]) ); }} /****************************************************** * loop on timeslices ******************************************************/ for(timeslice=0; timeslice<T; timeslice++) { append = (int)( timeslice != 0 ); // read timeslice of the gauge field if( N_Jacobi>0) { switch(g_gauge_file_format) { case 0: status = read_lime_gauge_field_doubleprec_timeslice(g_gauge_field, gauge_field_filename, timeslice, &ildg_gauge_field_checksum); break; case 1: status = read_nersc_gauge_field_timeslice(g_gauge_field, gauge_field_filename, timeslice, &nersc_gauge_field_checksum); break; } if(status != 0) { fprintf(stderr, "[] Error, could not read gauge field\n"); exit(21); } for(i=0; i<N_ape; i++) { #ifdef OPENMP status = APE_Smearing_Step_Timeslice_threads(g_gauge_field, alpha_ape); #else status = APE_Smearing_Step_Timeslice(g_gauge_field, alpha_ape); #endif } } // read timeslice of the 12 up-type propagators and smear them for(is=0;is<n_s*n_c;is++) { sprintf(filename, "%s.%.4d.t%.2dx%.2dy%.2dz%.2d.%.2d.inverted", filename_prefix, Nconf, sx0, sx1, sx2, sx3, is); status = read_lime_spinor_timeslice(g_spinor_field[is], timeslice, filename, 0, spinor_field_checksum+is); if(status != 0) { fprintf(stderr, "[] Error, could not read propagator from file %s\n", filename); exit(102); } if(N_Jacobi > 0) { fprintf(stdout, "# [] Jacobi smearing propagator no. %d with paramters N_Jacobi=%d, kappa_Jacobi=%f\n", is, N_Jacobi, kappa_Jacobi); for(c=0; c<N_Jacobi; c++) { #ifdef OPENMP Jacobi_Smearing_Step_one_Timeslice_threads(g_gauge_field, g_spinor_field[is], work, kappa_Jacobi); #else Jacobi_Smearing_Step_one_Timeslice(g_gauge_field, g_spinor_field[is], work, kappa_Jacobi); #endif } } } if(fermion_type == _TM_FERMION) { // read timeslice of the 12 down-type propagators, smear them for(is=0;is<n_s*n_c;is++) { if(do_gt == 0) { sprintf(filename, "%s.%.4d.t%.2dx%.2dy%.2dz%.2d.%.2d.inverted", filename_prefix2, Nconf, sx0, sx1, sx2, sx3, is); status = read_lime_spinor_timeslice(g_spinor_field[n_s*n_c+is], timeslice, filename, 0, spinor_field_checksum+n_s*n_c+is); if(status != 0) { fprintf(stderr, "[] Error, could not read propagator from file %s\n", filename); exit(102); } if(N_Jacobi > 0) { fprintf(stdout, "# [] Jacobi smearing propagator no. %d with paramters N_Jacobi=%d, kappa_Jacobi=%f\n", is, N_Jacobi, kappa_Jacobi); for(c=0; c<N_Jacobi; c++) { #ifdef OPENMP Jacobi_Smearing_Step_one_Timeslice_threads(g_gauge_field, g_spinor_field[n_s*n_c+is], work, kappa_Jacobi); #else Jacobi_Smearing_Step_one_Timeslice(g_gauge_field, g_spinor_field[n_s*n_c+is], work, kappa_Jacobi); #endif } } } } /****************************************************** * contractions ******************************************************/ for(ix=0;ix<VOL3;ix++) //for(ix=0;ix<1;ix++) { // assign the propagators _assign_fp_point_from_field(uprop, g_spinor_field, ix); if(fermion_type==_TM_FERMION) { _assign_fp_point_from_field(dprop, g_spinor_field+n_s*n_c, ix); } else { _fp_eq_fp(dprop, uprop); } flavor rotation for twisted mass fermions if(fermion_type == _TM_FERMION) { _fp_eq_rot_ti_fp(fp1, uprop, +1, fermion_type, fp2); _fp_eq_fp_ti_rot(uprop, fp1, +1, fermion_type, fp2); // _fp_eq_rot_ti_fp(fp1, dprop, -1, fermion_type, fp2); // _fp_eq_fp_ti_rot(dprop, fp1, -1, fermion_type, fp2); } // test: print fermion propagator point //printf_fp(uprop, stdout); for(icomp=0; icomp<num_component; icomp++) { _sp_eq_zero( connq[ix*num_component+icomp]); /****************************************************** * first contribution ******************************************************/ _fp_eq_zero(fp1); _fp_eq_zero(fp2); _fp_eq_zero(fp3); // C Gamma_1 x S_u = g0 g2 Gamma_1 S_u _fp_eq_gamma_ti_fp(fp1, gamma_component[0][icomp], uprop); _fp_eq_gamma_ti_fp(fp3, 2, fp1); _fp_eq_gamma_ti_fp(fp1, 0, fp3); // S_u x C Gamma_2 = S_u x g0 g2 Gamma_2 _fp_eq_fp_ti_gamma(fp2, 0, uprop); _fp_eq_fp_ti_gamma(fp3, 2, fp2); _fp_eq_fp_ti_gamma(fp2, gamma_component[1][icomp], fp3); // first part // reduce _fp_eq_zero(fp3); _fp_eq_fp_eps_contract13_fp(fp3, fp1, uprop); // reduce to spin propagator _sp_eq_zero( sp1 ); _sp_eq_fp_del_contract23_fp(sp1, fp2, fp3); // second part // reduce to spin propagator _sp_eq_zero( sp2 ); _sp_eq_fp_del_contract24_fp(sp2, fp2, fp3); // add and assign _sp_pl_eq_sp(sp1, sp2); _sp_eq_sp_ti_re(sp2, sp1, -gamma_component_sign[icomp]); _sp_eq_sp( connq[ix*num_component+icomp], sp2); /****************************************************** * second contribution ******************************************************/ _fp_eq_zero(fp1); _fp_eq_zero(fp2); _fp_eq_zero(fp3); // first part // C Gamma_1 x S_u = g0 g2 Gamma_1 S_u _fp_eq_gamma_ti_fp(fp1, gamma_component[0][icomp], uprop); _fp_eq_gamma_ti_fp(fp3, 2, fp1); _fp_eq_gamma_ti_fp(fp1, 0, fp3); // S_u x C Gamma_2 = S_u g0 g2 Gamma_2 (same S_u as above) _fp_eq_fp_ti_gamma(fp2, 0, fp1); _fp_eq_fp_ti_gamma(fp3, 2, fp2); _fp_eq_fp_ti_gamma(fp1, gamma_component[1][icomp], fp3); // reduce _fp_eq_zero(fp3); _fp_eq_fp_eps_contract13_fp(fp3, fp1, uprop); // reduce to spin propagator _sp_eq_zero( sp1 ); _sp_eq_fp_del_contract23_fp(sp1, uprop, fp3); // second part // C Gamma_1 x S_u = g0 g2 Gamma_1 S_u _fp_eq_gamma_ti_fp(fp1, gamma_component[0][icomp], uprop); _fp_eq_gamma_ti_fp(fp3, 2, fp1); _fp_eq_gamma_ti_fp(fp1, 0, fp3); // S_u x C Gamma_2 = S_u g0 g2 Gamma_2 _fp_eq_fp_ti_gamma(fp2, 0, uprop); _fp_eq_fp_ti_gamma(fp3, 2, fp2); _fp_eq_fp_ti_gamma(fp2, gamma_component[1][icomp], fp3); // reduce _fp_eq_zero(fp3); _fp_eq_fp_eps_contract13_fp(fp3, fp1, fp2); // reduce to spin propagator _sp_eq_zero( sp2 ); _sp_eq_fp_del_contract24_fp(sp2, uprop, fp3); // add and assign _sp_pl_eq_sp(sp1, sp2); _sp_eq_sp_ti_re(sp2, sp1, -gamma_component_sign[icomp]); _sp_pl_eq_sp( connq[ix*num_component+icomp], sp2); /****************************************************** * third contribution ******************************************************/ _fp_eq_zero(fp1); _fp_eq_zero(fp2); _fp_eq_zero(fp3); // first part // C Gamma_1 x S_u = g0 g2 Gamma_1 S_u _fp_eq_gamma_ti_fp(fp1, gamma_component[0][icomp], uprop); _fp_eq_gamma_ti_fp(fp3, 2, fp1); _fp_eq_gamma_ti_fp(fp1, 0, fp3); // S_u x C Gamma_2 = S_u g0 g2 Gamma_2 _fp_eq_fp_ti_gamma(fp2, 0, fp1); _fp_eq_fp_ti_gamma(fp3, 2, fp2); _fp_eq_fp_ti_gamma(fp1, gamma_component[1][icomp], fp3); // reduce _fp_eq_zero(fp3); _fp_eq_fp_eps_contract13_fp(fp3, fp1, uprop); // reduce to spin propagator _sp_eq_zero( sp1 ); _sp_eq_fp_del_contract34_fp(sp1, uprop, fp3); // second part // C Gamma_1 x S_u = g0 g2 Gamma_1 S_u _fp_eq_gamma_ti_fp(fp1, gamma_component[0][icomp], uprop); _fp_eq_gamma_ti_fp(fp3, 2, fp1); _fp_eq_gamma_ti_fp(fp1, 0, fp3); // S_u x C Gamma_2 = S_u g0 g2 Gamma_2 _fp_eq_fp_ti_gamma(fp2, 0, uprop); _fp_eq_fp_ti_gamma(fp3, 2, fp2); _fp_eq_fp_ti_gamma(fp2, gamma_component[1][icomp], fp3); // reduce _fp_eq_zero(fp3); _fp_eq_fp_eps_contract13_fp(fp3, fp1, fp2); // reduce to spin propagator _sp_eq_zero( sp2 ); _sp_eq_fp_del_contract34_fp(sp2, uprop, fp3); // add and assign _sp_pl_eq_sp(sp1, sp2); _sp_eq_sp_ti_re(sp2, sp1, -gamma_component_sign[icomp]); _sp_pl_eq_sp( connq[ix*num_component+icomp], sp2); } // of icomp } // of ix /*********************************************** * finish calculation of connq ***********************************************/ if(g_propagator_bc_type == 0) { // multiply with phase factor fprintf(stdout, "# [] multiplying timeslice %d with boundary phase factor\n", timeslice); ir = (timeslice - sx0 + T_global) % T_global; w1.re = cos( 3. * M_PI*(double)ir / (double)T_global ); w1.im = sin( 3. * M_PI*(double)ir / (double)T_global ); for(ix=0;ix<num_component*VOL3;ix++) { _sp_eq_sp(sp1, connq[ix] ); _sp_eq_sp_ti_co( connq[ix], sp1, w1); } } else if (g_propagator_bc_type == 1) { // multiply with step function if(timeslice < sx0) { fprintf(stdout, "# [] multiplying timeslice %d with boundary step function\n", timeslice); for(ix=0;ix<num_component*VOL3;ix++) { _sp_eq_sp(sp1, connq[ix] ); _sp_eq_sp_ti_re( connq[ix], sp1, -1.); } } } if(write_ascii) { sprintf(filename, "%s_x.%.4d.t%.2dx%.2dy%.2dz%.2d.ascii", outfile_prefix, Nconf, sx0, sx1, sx2, sx3); write_contraction2( connq[0][0], filename, num_component*g_sv_dim*g_sv_dim, VOL3, 1, append); } /****************************************************************** * Fourier transform ******************************************************************/ items = 2 * num_component * g_sv_dim * g_sv_dim * VOL3; bytes = sizeof(double); memcpy(in, connq[0][0], items * bytes); ir = num_component * g_sv_dim * g_sv_dim; #ifdef OPENMP fftwnd_threads(num_threads, plan_p, ir, in, ir, 1, (fftw_complex*)(connq[0][0]), ir, 1); #else fftwnd(plan_p, ir, in, ir, 1, (fftw_complex*)(connq[0][0]), ir, 1); #endif // add phase factor from the source location iix = 0; for(x1=0;x1<LX;x1++) { q[0] = (double)x1 / (double)LX; for(x2=0;x2<LY;x2++) { q[1] = (double)x2 / (double)LY; for(x3=0;x3<LZ;x3++) { q[2] = (double)x3 / (double)LZ; phase = 2. * M_PI * ( q[0]*sx1 + q[1]*sx2 + q[2]*sx3 ); w1.re = cos(phase); w1.im = sin(phase); for(icomp=0; icomp<num_component; icomp++) { _sp_eq_sp(sp1, connq[iix] ); _sp_eq_sp_ti_co( connq[iix], sp1, w1) ; iix++; } }}} // of x3, x2, x1 // write to file sprintf(filename, "%s_q.%.4d.t%.2dx%.2dy%.2dz%.2d.Qx%.2dQy%.2dQz%.2d.%.5d", outfile_prefix, Nconf, sx0, sx1, sx2, sx3, qlatt_rep[snk_momentum_list[imom_snk]][1],qlatt_rep[snk_momentum_list[imom_snk]][2],qlatt_rep[snk_momentum_list[imom_snk]][3], g_sourceid2-g_sourceid+1); sprintf(contype, "2-pt. function, (t,q_1,q_2,q_3)-dependent, source_timeslice = %d", sx0); write_lime_contraction_timeslice(connq[0][0], filename, 64, num_component*g_sv_dim*g_sv_dim, contype, Nconf, 0, &connq_checksum, timeslice); if(write_ascii) { strcat(filename, ".ascii"); write_contraction2(connq[0][0],filename, num_component*g_sv_dim*g_sv_dim, VOL3, 1, append); } /*********************************************** * calculate connt ***********************************************/ for(icomp=0;icomp<num_component; icomp++) { // fwd _sp_eq_sp(sp1, connq[icomp]); _sp_eq_gamma_ti_sp(sp2, 0, sp1); _sp_pl_eq_sp(sp1, sp2); _co_eq_tr_sp(&w, sp1); connt[2*(icomp*T + timeslice) ] = w.re * 0.25; connt[2*(icomp*T + timeslice)+1] = w.im * 0.25; // bwd _sp_eq_sp(sp1, connq[icomp]); _sp_eq_gamma_ti_sp(sp2, 0, sp1); _sp_mi_eq_sp(sp1, sp2); _co_eq_tr_sp(&w, sp1); connt[2*(icomp*T+timeslice + num_component*T) ] = w.re * 0.25; connt[2*(icomp*T+timeslice + num_component*T)+1] = w.im * 0.25; } } // of loop on timeslice // write connt sprintf(filename, "%s.%.4d.t%.2dx%.2dy%.2dz%.2d.fw", outfile_prefix, Nconf, sx0, sx1, sx2, sx3); ofs = fopen(filename, "w"); if(ofs == NULL) { fprintf(stderr, "[] Error, could not open file %s for writing\n", filename); exit(3); } fprintf(ofs, "#%12.8f%3d%3d%3d%3d%8.4f%6d\n", g_kappa, T_global, LX, LY, LZ, g_mu, Nconf); for(icomp=0; icomp<num_component; icomp++) { ir = sx0; fprintf(ofs, "%3d%3d%3d%16.7e%16.7e%6d\n", gamma_component[0][icomp], gamma_component[1][icomp], 0, connt[2*(icomp*T+ir)], 0., Nconf); for(it=1;it<T/2;it++) { ir = ( it + sx0 ) % T_global; ir2 = ( (T_global - it) + sx0 ) % T_global; fprintf(ofs, "%3d%3d%3d%16.7e%16.7e%6d\n", gamma_component[0][icomp], gamma_component[1][icomp], it, connt[2*(icomp*T+ir)], connt[2*(icomp*T+ir2)], Nconf); } ir = ( it + sx0 ) % T_global; fprintf(ofs, "%3d%3d%3d%16.7e%16.7e%6d\n", gamma_component[0][icomp], gamma_component[1][icomp], it, connt[2*(icomp*T+ir)], 0., Nconf); } fclose(ofs); sprintf(filename, "%s.%.4d.t%.2dx%.2dy%.2dz%.2d.bw", outfile_prefix, Nconf, sx0, sx1, sx2, sx3); ofs = fopen(filename, "w"); if(ofs == NULL) { fprintf(stderr, "[] Error, could not open file %s for writing\n", filename); exit(3); } fprintf(ofs, "#%12.8f%3d%3d%3d%3d%8.4f%6d\n", g_kappa, T_global, LX, LY, LZ, g_mu, Nconf); for(icomp=0; icomp<num_component; icomp++) { ir = sx0; fprintf(ofs, "%3d%3d%3d%16.7e%16.7e%6d\n", gamma_component[0][icomp], gamma_component[1][icomp], 0, connt[2*(num_component*T+icomp*T+ir)], 0., Nconf); for(it=1;it<T/2;it++) { ir = ( it + sx0 ) % T_global; ir2 = ( (T_global - it) + sx0 ) % T_global; fprintf(ofs, "%3d%3d%3d%16.7e%16.7e%6d\n", gamma_component[0][icomp], gamma_component[1][icomp], it, connt[2*(num_component*T+icomp*T+ir)], connt[2*(num_component*T+icomp*T+ir2)], Nconf); } ir = ( it + sx0 ) % T_global; fprintf(ofs, "%3d%3d%3d%16.7e%16.7e%6d\n", gamma_component[0][icomp], gamma_component[1][icomp], it, connt[2*(num_component*T+icomp*T+ir)], 0., Nconf); } fclose(ofs); } // of loop on sink momentum ( = Delta^++ momentum, Qvec) /*********************************************** * free the allocated memory, finalize ***********************************************/ free_geometry(); if(connt!= NULL) free(connt); if(connq!= NULL) free(connq); if(gauge_trafo != NULL) free(gauge_trafo); if(g_spinor_field!=NULL) { for(i=0; i<no_fields; i++) free(g_spinor_field[i]); free(g_spinor_field); g_spinor_field=(double**)NULL; } if(spinor_field_checksum !=NULL) free(spinor_field_checksum); if(g_gauge_field != NULL) free(g_gauge_field); if(snk_momemtum_list != NULL) { if(snk_momentum_list[0] != NULL) free(snk_momentum_list[0]); free(snk_momentum_list); } if(rel_momemtum_list != NULL) { if(rel_momentum_list[0] != NULL) free(rel_momentum_list[0]); free(rel_momentum_list); } // free the fermion propagator points free_fp( &uprop ); free_fp( &dprop ); free_fp( &fp1 ); free_fp( &fp2 ); free_fp( &fp3 ); free_sp( &sp1 ); free_sp( &sp2 ); free(in); fftwnd_destroy_plan(plan_p); g_the_time = time(NULL); fprintf(stdout, "# [] %s# [] end fo run\n", ctime(&g_the_time)); fflush(stdout); fprintf(stderr, "# [] %s# [] end fo run\n", ctime(&g_the_time)); fflush(stderr); #ifdef MPI MPI_Finalize(); #endif return(0); }
int main(int argc, char **argv) { int c, i, mu, status; int ispin, icol, isc; int n_c = 3; int n_s = 4; int count = 0; int filename_set = 0; int dims[4] = {0,0,0,0}; int grid_size[4]; int l_LX_at, l_LXstart_at; int x0, x1, x2, x3, ix, iix, iy, is, it, i3; int sl0, sl1, sl2, sl3, have_source_flag=0; int source_proc_coords[4], lsl0, lsl1, lsl2, lsl3; int check_residuum = 0; unsigned int VOL3, V5; int do_gt = 0; int full_orbit = 0; int smear_source = 0; char filename[200], source_filename[200], source_filename_write[200]; double ratime, retime; double plaq_r=0., plaq_m=0., norm, norm2; double spinor1[24]; double *gauge_qdp[4], *gauge_field_timeslice=NULL, *gauge_field_smeared=NULL; double _1_2_kappa, _2_kappa, phase; FILE *ofs; int mu_trans[4] = {3, 0, 1, 2}; int threadid, nthreads; int timeslice, source_timeslice; char rng_file_in[100], rng_file_out[100]; int *source_momentum=NULL; int source_momentum_class = -1; int source_momentum_no = 0; int source_momentum_runs = 1; int imom; int num_gpu_on_node=0, rank; int source_location_5d_iseven; int convert_sign=0; #ifdef HAVE_QUDA int rotate_gamma_basis = 1; #else int rotate_gamma_basis = 0; #endif omp_lock_t *lck = NULL, gen_lck[1]; int key = 0; /****************************************************************************/ /* for smearing parallel to inversion */ double *smearing_spinor_field[] = {NULL,NULL}; int dummy_flag = 0; /****************************************************************************/ /****************************************************************************/ #if (defined HAVE_QUDA) && (defined MULTI_GPU) int x_face_size, y_face_size, z_face_size, t_face_size, pad_size; #endif /****************************************************************************/ /************************************************/ int qlatt_nclass; int *qlatt_id=NULL, *qlatt_count=NULL, **qlatt_rep=NULL, **qlatt_map=NULL; double **qlatt_list=NULL; /************************************************/ /************************************************/ double boundary_condition_factor; int boundary_condition_factor_set = 0; /************************************************/ //#ifdef MPI // kernelPackT = true; //#endif /*********************************************** * QUDA parameters ***********************************************/ #ifdef HAVE_QUDA QudaPrecision cpu_prec = QUDA_DOUBLE_PRECISION; QudaPrecision cuda_prec = QUDA_DOUBLE_PRECISION; QudaPrecision cuda_prec_sloppy = QUDA_SINGLE_PRECISION; QudaGaugeParam gauge_param = newQudaGaugeParam(); QudaInvertParam inv_param = newQudaInvertParam(); #endif while ((c = getopt(argc, argv, "soch?vgf:p:b:S:R:")) != -1) { switch (c) { case 'v': g_verbose = 1; break; case 'g': do_gt = 1; break; case 'f': strcpy(filename, optarg); filename_set=1; break; case 'c': check_residuum = 1; fprintf(stdout, "# [invert_dw_quda] will check residuum again\n"); break; case 'p': n_c = atoi(optarg); fprintf(stdout, "# [invert_dw_quda] will use number of colors = %d\n", n_c); break; case 'o': full_orbit = 1; fprintf(stdout, "# [invert_dw_quda] will invert for full orbit, if source momentum set\n"); case 's': smear_source = 1; fprintf(stdout, "# [invert_dw_quda] will smear the sources if they are read from file\n"); break; case 'b': boundary_condition_factor = atof(optarg); boundary_condition_factor_set = 1; fprintf(stdout, "# [invert_dw_quda] const. boundary condition factor set to %e\n", boundary_condition_factor); break; case 'S': convert_sign = atoi(optarg); fprintf(stdout, "# [invert_dw_quda] using convert sign %d\n", convert_sign); break; case 'R': rotate_gamma_basis = atoi(optarg); fprintf(stdout, "# [invert_dw_quda] rotate gamma basis %d\n", rotate_gamma_basis); break; case 'h': case '?': default: usage(); break; } } // get the time stamp g_the_time = time(NULL); /************************************** * set the default values, read input **************************************/ if(filename_set==0) strcpy(filename, "cvc.input"); if(g_proc_id==0) fprintf(stdout, "# Reading input from file %s\n", filename); read_input_parser(filename); #ifdef MPI #ifdef HAVE_QUDA grid_size[0] = g_nproc_x; grid_size[1] = g_nproc_y; grid_size[2] = g_nproc_z; grid_size[3] = g_nproc_t; fprintf(stdout, "# [] g_nproc = (%d,%d,%d,%d)\n", g_nproc_x, g_nproc_y, g_nproc_z, g_nproc_t); initCommsQuda(argc, argv, grid_size, 4); #else MPI_Init(&argc, &argv); #endif #endif #if (defined PARALLELTX) || (defined PARALLELTXY) EXIT_WITH_MSG(1, "[] Error, 2-dim./3-dim. MPI-Version not yet implemented"); #endif // some checks on the input data if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) { if(g_proc_id==0) fprintf(stderr, "[invert_dw_quda] Error, T and L's must be set\n"); usage(); } // set number of openmp threads // initialize MPI parameters mpi_init(argc, argv); // the volume of a timeslice VOL3 = LX*LY*LZ; V5 = T*LX*LY*LZ*L5; g_kappa5d = 0.5 / (5. + g_m5); if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] kappa5d = %e\n", g_kappa5d); fprintf(stdout, "# [%2d] parameters:\n"\ "# [%2d] T = %3d\n"\ "# [%2d] Tstart = %3d\n"\ "# [%2d] L5 = %3d\n",\ g_cart_id, g_cart_id, T, g_cart_id, Tstart, g_cart_id, L5); #ifdef MPI if(T==0) { fprintf(stderr, "[%2d] local T is zero; exit\n", g_cart_id); MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); exit(2); } #endif if(init_geometry() != 0) { fprintf(stderr, "[invert_dw_quda] Error from init_geometry\n"); EXIT(1); } geometry(); if( init_geometry_5d() != 0 ) { fprintf(stderr, "[invert_dw_quda] Error from init_geometry_5d\n"); EXIT(2); } geometry_5d(); /************************************** * initialize the QUDA library **************************************/ if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] initializing quda\n"); #ifdef HAVE_QUDA // cudaGetDeviceCount(&num_gpu_on_node); if(g_gpu_per_node<0) { if(g_cart_id==0) fprintf(stderr, "[] Error, number of GPUs per node not set\n"); EXIT(106); } else { num_gpu_on_node = g_gpu_per_node; } #ifdef MPI rank = comm_rank(); #else rank = 0; #endif g_gpu_device_number = rank % num_gpu_on_node; fprintf(stdout, "# [] process %d/%d uses device %d\n", rank, g_cart_id, g_gpu_device_number); initQuda(g_gpu_device_number); #endif /************************************** * prepare the gauge field **************************************/ // read the gauge field from file alloc_gauge_field(&g_gauge_field, VOLUMEPLUSRAND); if(strcmp( gaugefilename_prefix, "identity")==0 ) { if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] Setting up unit gauge field\n"); for(ix=0;ix<VOLUME; ix++) { for(mu=0;mu<4;mu++) { _cm_eq_id(g_gauge_field+_GGI(ix,mu)); } } } else if(strcmp( gaugefilename_prefix, "random")==0 ) { if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] Setting up random gauge field with seed = %d\n", g_seed); init_rng_state(g_seed, &g_rng_state); random_gauge_field(g_gauge_field, 1.); plaquette(&plaq_m); sprintf(filename, "%s.%.4d", gaugefilename_prefix, Nconf); check_error(write_lime_gauge_field(filename, plaq_m, Nconf, 64), "write_lime_gauge_field", NULL, 12); } else { if(g_gauge_file_format == 0) { // ILDG sprintf(filename, "%s.%.4d", gaugefilename_prefix, Nconf); if(g_cart_id==0) fprintf(stdout, "# Reading gauge field from file %s\n", filename); status = read_lime_gauge_field_doubleprec(filename); } else if(g_gauge_file_format == 1) { // NERSC sprintf(filename, "%s.%.5d", gaugefilename_prefix, Nconf); if(g_cart_id==0) fprintf(stdout, "# Reading gauge field from file %s\n", filename); status = read_nersc_gauge_field(g_gauge_field, filename, &plaq_r); //status = read_nersc_gauge_field_3x3(g_gauge_field, filename, &plaq_r); } if(status != 0) { fprintf(stderr, "[invert_dw_quda] Error, could not read gauge field"); EXIT(12); } } #ifdef MPI xchange_gauge(); #endif // measure the plaquette plaquette(&plaq_m); if(g_cart_id==0) fprintf(stdout, "# Measured plaquette value: %25.16e\n", plaq_m); if(g_cart_id==0) fprintf(stdout, "# Read plaquette value : %25.16e\n", plaq_r); #ifndef HAVE_QUDA if(N_Jacobi>0) { #endif // allocate the smeared / qdp ordered gauge field alloc_gauge_field(&gauge_field_smeared, VOLUMEPLUSRAND); for(i=0;i<4;i++) { gauge_qdp[i] = gauge_field_smeared + i*18*VOLUME; } #ifndef HAVE_QUDA } #endif #ifdef HAVE_QUDA // transcribe the gauge field omp_set_num_threads(g_num_threads); #pragma omp parallel for private(ix,iy,mu) for(ix=0;ix<VOLUME;ix++) { iy = g_lexic2eot[ix]; for(mu=0;mu<4;mu++) { _cm_eq_cm(gauge_qdp[mu_trans[mu]]+18*iy, g_gauge_field+_GGI(ix,mu)); } } // multiply timeslice T-1 with factor of -1 (antiperiodic boundary condition) if(g_proc_coords[0]==g_nproc_t-1) { if(!boundary_condition_factor_set) boundary_condition_factor = -1.; fprintf(stdout, "# [] process %d multiplies gauge-field timeslice T_global-1 with boundary condition factor %e\n", g_cart_id, boundary_condition_factor); omp_set_num_threads(g_num_threads); #pragma omp parallel for private(ix,iy) for(ix=0;ix<VOL3;ix++) { iix = (T-1)*VOL3 + ix; iy = g_lexic2eot[iix]; _cm_ti_eq_re(gauge_qdp[mu_trans[0]]+18*iy, -1.); } } // QUDA precision parameters switch(g_cpu_prec) { case 0: cpu_prec = QUDA_HALF_PRECISION; if(g_cart_id==0) fprintf(stdout, "# [] CPU prec = half\n"); break; case 1: cpu_prec = QUDA_SINGLE_PRECISION; if(g_cart_id==0) fprintf(stdout, "# [] CPU prec = single\n"); break; case 2: cpu_prec = QUDA_DOUBLE_PRECISION; if(g_cart_id==0) fprintf(stdout, "# [] CPU prec = double\n"); break; default: cpu_prec = QUDA_DOUBLE_PRECISION; break; } switch(g_gpu_prec) { case 0: cuda_prec = QUDA_HALF_PRECISION; if(g_cart_id==0) fprintf(stdout, "# [] GPU prec = half\n"); break; case 1: cuda_prec = QUDA_SINGLE_PRECISION; if(g_cart_id==0) fprintf(stdout, "# [] GPU prec = single\n"); break; case 2: cuda_prec = QUDA_DOUBLE_PRECISION; if(g_cart_id==0) fprintf(stdout, "# [] GPU prec = double\n"); break; default: cuda_prec = QUDA_DOUBLE_PRECISION; break; } switch(g_gpu_prec_sloppy) { case 0: cuda_prec_sloppy = QUDA_HALF_PRECISION; if(g_cart_id==0) fprintf(stdout, "# [] GPU sloppy prec = half\n"); break; case 1: cuda_prec_sloppy = QUDA_SINGLE_PRECISION; if(g_cart_id==0) fprintf(stdout, "# [] GPU sloppy prec = single\n"); break; case 2: cuda_prec_sloppy = QUDA_DOUBLE_PRECISION; if(g_cart_id==0) fprintf(stdout, "# [] GPU sloppy prec = double\n"); break; default: cuda_prec_sloppy = QUDA_SINGLE_PRECISION; break; } // QUDA gauge parameters gauge_param.X[0] = LX; gauge_param.X[1] = LY; gauge_param.X[2] = LZ; gauge_param.X[3] = T; inv_param.Ls = L5; gauge_param.anisotropy = 1.0; gauge_param.type = QUDA_WILSON_LINKS; gauge_param.gauge_order = QUDA_QDP_GAUGE_ORDER; gauge_param.t_boundary = QUDA_ANTI_PERIODIC_T; gauge_param.cpu_prec = cpu_prec; gauge_param.cuda_prec = cuda_prec; gauge_param.reconstruct = QUDA_RECONSTRUCT_12; gauge_param.cuda_prec_sloppy = cuda_prec_sloppy; gauge_param.reconstruct_sloppy = QUDA_RECONSTRUCT_12; gauge_param.gauge_fix = QUDA_GAUGE_FIXED_NO; gauge_param.ga_pad = 0; inv_param.sp_pad = 0; inv_param.cl_pad = 0; // For multi-GPU, ga_pad must be large enough to store a time-slice #ifdef MULTI_GPU x_face_size = inv_param.Ls * gauge_param.X[1]*gauge_param.X[2]*gauge_param.X[3]/2; y_face_size = inv_param.Ls * gauge_param.X[0]*gauge_param.X[2]*gauge_param.X[3]/2; z_face_size = inv_param.Ls * gauge_param.X[0]*gauge_param.X[1]*gauge_param.X[3]/2; t_face_size = inv_param.Ls * gauge_param.X[0]*gauge_param.X[1]*gauge_param.X[2]/2; pad_size = _MAX(x_face_size, y_face_size); pad_size = _MAX(pad_size, z_face_size); pad_size = _MAX(pad_size, t_face_size); gauge_param.ga_pad = pad_size; if(g_cart_id==0) printf("# [invert_dw_quda] pad_size = %d\n", pad_size); #endif // load the gauge field if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] loading gauge field\n"); loadGaugeQuda((void*)gauge_qdp, &gauge_param); gauge_qdp[0] = NULL; gauge_qdp[1] = NULL; gauge_qdp[2] = NULL; gauge_qdp[3] = NULL; #endif /********************************************* * APE smear the gauge field *********************************************/ if(N_Jacobi>0) { memcpy(gauge_field_smeared, g_gauge_field, 72*VOLUMEPLUSRAND*sizeof(double)); fprintf(stdout, "# [invert_dw_quda] APE smearing gauge field with paramters N_APE=%d, alpha_APE=%e\n", N_ape, alpha_ape); APE_Smearing_Step_threads(gauge_field_smeared, N_ape, alpha_ape); xchange_gauge_field(gauge_field_smeared); } // allocate memory for the spinor fields #ifdef HAVE_QUDA no_fields = 3+2; #else no_fields = 6+2; #endif g_spinor_field = (double**)calloc(no_fields, sizeof(double*)); for(i=0; i<no_fields; i++) alloc_spinor_field(&g_spinor_field[i], VOLUMEPLUSRAND*L5); smearing_spinor_field[0] = g_spinor_field[no_fields-2]; smearing_spinor_field[1] = g_spinor_field[no_fields-1]; switch(g_source_type) { case 0: case 5: // the source locaton sl0 = g_source_location / (LX_global*LY_global*LZ); sl1 = ( g_source_location % (LX_global*LY_global*LZ) ) / ( LY_global*LZ); sl2 = ( g_source_location % ( LY_global*LZ) ) / ( LZ); sl3 = g_source_location % LZ; if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] global sl = (%d, %d, %d, %d)\n", sl0, sl1, sl2, sl3); source_proc_coords[0] = sl0 / T; source_proc_coords[1] = sl1 / LX; source_proc_coords[2] = sl2 / LY; source_proc_coords[3] = sl3 / LZ; #ifdef MPI MPI_Cart_rank(g_cart_grid, source_proc_coords, &g_source_proc_id); #else g_source_proc_id = 0; #endif have_source_flag = g_source_proc_id == g_cart_id; lsl0 = sl0 % T; lsl1 = sl1 % LX; lsl2 = sl2 % LY; lsl3 = sl3 % LZ; if(have_source_flag) { fprintf(stdout, "# [invert_dw_quda] process %d has the source at (%d, %d, %d, %d)\n", g_cart_id, lsl0, lsl1, lsl2, lsl3); } break; case 2: case 3: case 4: // the source timeslice #ifdef MPI source_proc_coords[0] = g_source_timeslice / T; source_proc_coords[1] = 0; source_proc_coords[2] = 0; source_proc_coords[3] = 0; MPI_Cart_rank(g_cart_grid, source_proc_coords, &g_source_proc_id); have_source_flag = ( g_source_proc_id == g_cart_id ); source_timeslice = have_source_flag ? g_source_timeslice % T : -1; #else g_source_proc_id = 0; have_source_flag = 1; source_timeslice = g_source_timeslice; #endif break; } #ifdef HAVE_QUDA /************************************************************* * QUDA inverter parameters *************************************************************/ inv_param.dslash_type = QUDA_DOMAIN_WALL_DSLASH; if(strcmp(g_inverter_type_name, "cg") == 0) { inv_param.inv_type = QUDA_CG_INVERTER; if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] using cg inverter\n"); } else if(strcmp(g_inverter_type_name, "bicgstab") == 0) { inv_param.inv_type = QUDA_BICGSTAB_INVERTER; if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] using bicgstab inverter\n"); #ifdef MULTI_GPU } else if(strcmp(g_inverter_type_name, "gcr") == 0) { inv_param.inv_type = QUDA_GCR_INVERTER; if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] using gcr inverter\n"); #endif } else { if(g_cart_id==0) fprintf(stderr, "[invert_dw_quda] Error, unrecognized inverter type %s\n", g_inverter_type_name); EXIT(123); } if(inv_param.inv_type == QUDA_CG_INVERTER) { inv_param.solution_type = QUDA_MAT_SOLUTION; inv_param.solve_type = QUDA_NORMEQ_PC_SOLVE; } else if(inv_param.inv_type == QUDA_BICGSTAB_INVERTER) { inv_param.solution_type = QUDA_MAT_SOLUTION; inv_param.solve_type = QUDA_DIRECT_PC_SOLVE; } else { inv_param.solution_type = QUDA_MATPC_SOLUTION; inv_param.solve_type = QUDA_DIRECT_PC_SOLVE; } inv_param.m5 = g_m5; inv_param.kappa = 0.5 / (5. + inv_param.m5); inv_param.mass = g_m0; inv_param.tol = solver_precision; inv_param.maxiter = niter_max; inv_param.reliable_delta = reliable_delta; #ifdef MPI // domain decomposition preconditioner parameters if(inv_param.inv_type == QUDA_GCR_INVERTER) { if(g_cart_id == 0) printf("# [] settup DD parameters\n"); inv_param.gcrNkrylov = 15; inv_param.inv_type_precondition = QUDA_MR_INVERTER; inv_param.tol_precondition = 1e-6; inv_param.maxiter_precondition = 200; inv_param.verbosity_precondition = QUDA_VERBOSE; inv_param.prec_precondition = cuda_prec_sloppy; inv_param.omega = 0.7; } #endif inv_param.matpc_type = QUDA_MATPC_EVEN_EVEN; inv_param.dagger = QUDA_DAG_NO; inv_param.mass_normalization = QUDA_KAPPA_NORMALIZATION; //;QUDA_MASS_NORMALIZATION; inv_param.cpu_prec = cpu_prec; inv_param.cuda_prec = cuda_prec; inv_param.cuda_prec_sloppy = cuda_prec_sloppy; inv_param.verbosity = QUDA_VERBOSE; inv_param.preserve_source = QUDA_PRESERVE_SOURCE_NO; inv_param.dirac_order = QUDA_DIRAC_ORDER; #ifdef MPI inv_param.preserve_dirac = QUDA_PRESERVE_DIRAC_YES; inv_param.prec_precondition = cuda_prec_sloppy; inv_param.gamma_basis = QUDA_DEGRAND_ROSSI_GAMMA_BASIS; inv_param.dirac_tune = QUDA_TUNE_NO; #endif #endif /******************************************* * write initial rng state to file *******************************************/ if( g_source_type==2 && g_coherent_source==2 ) { sprintf(rng_file_out, "%s.0", g_rng_filename); status = init_rng_stat_file (g_seed, rng_file_out); if( status != 0 ) { fprintf(stderr, "[invert_dw_quda] Error, could not write rng status\n"); EXIT(210); } } else if( (g_source_type==2 /*&& g_coherent_source==1*/) || g_source_type==3 || g_source_type==4) { if( init_rng_state(g_seed, &g_rng_state) != 0 ) { fprintf(stderr, "[invert_dw_quda] Error, could initialize rng state\n"); EXIT(211); } } /******************************************* * prepare locks for openmp *******************************************/ nthreads = g_num_threads - 1; lck = (omp_lock_t*)malloc(nthreads * sizeof(omp_lock_t)); if(lck == NULL) { EXIT_WITH_MSG(97, "[invert_dw_quda] Error, could not allocate lck\n"); } // init locks for(i=0;i<nthreads;i++) { omp_init_lock(lck+i); } omp_init_lock(gen_lck); // check the source momenta if(g_source_momentum_set) { source_momentum = (int*)malloc(3*sizeof(int)); if(g_source_momentum[0]<0) g_source_momentum[0] += LX_global; if(g_source_momentum[1]<0) g_source_momentum[1] += LY_global; if(g_source_momentum[2]<0) g_source_momentum[2] += LZ_global; fprintf(stdout, "# [invert_dw_quda] using final source momentum ( %d, %d, %d )\n", g_source_momentum[0], g_source_momentum[1], g_source_momentum[2]); if(full_orbit) { status = make_qcont_orbits_3d_parity_avg( &qlatt_id, &qlatt_count, &qlatt_list, &qlatt_nclass, &qlatt_rep, &qlatt_map); if(status != 0) { if(g_cart_id==0) fprintf(stderr, "\n[invert_dw_quda] Error while creating O_3-lists\n"); EXIT(4); } source_momentum_class = qlatt_id[g_ipt[0][g_source_momentum[0]][g_source_momentum[1]][g_source_momentum[2]]]; source_momentum_no = qlatt_count[source_momentum_class]; source_momentum_runs = source_momentum_class==0 ? 1 : source_momentum_no + 1; if(g_cart_id==0) fprintf(stdout, "# [] source momentum belongs to class %d with %d members, which means %d runs\n", source_momentum_class, source_momentum_no, source_momentum_runs); } } if(g_source_type == 5) { if(g_seq_source_momentum_set) { if(g_seq_source_momentum[0]<0) g_seq_source_momentum[0] += LX_global; if(g_seq_source_momentum[1]<0) g_seq_source_momentum[1] += LY_global; if(g_seq_source_momentum[2]<0) g_seq_source_momentum[2] += LZ_global; } else if(g_source_momentum_set) { g_seq_source_momentum[0] = g_source_momentum[0]; g_seq_source_momentum[1] = g_source_momentum[1]; g_seq_source_momentum[2] = g_source_momentum[2]; } fprintf(stdout, "# [invert_dw_quda] using final sequential source momentum ( %d, %d, %d )\n", g_seq_source_momentum[0], g_seq_source_momentum[1], g_seq_source_momentum[2]); } /*********************************************** * loop on spin-color-index ***********************************************/ for(isc=g_source_index[0]; isc<=g_source_index[1]; isc++) // for(isc=g_source_index[0]; isc<=g_source_index[0]; isc++) { ispin = isc / n_c; icol = isc % n_c; for(imom=0; imom<source_momentum_runs; imom++) { /*********************************************** * set source momentum ***********************************************/ if(g_source_momentum_set) { if(imom == 0) { if(full_orbit) { source_momentum[0] = 0; source_momentum[1] = 0; source_momentum[2] = 0; } else { source_momentum[0] = g_source_momentum[0]; source_momentum[1] = g_source_momentum[1]; source_momentum[2] = g_source_momentum[2]; } } else { source_momentum[0] = qlatt_map[source_momentum_class][imom-1] / (LY_global*LZ_global); source_momentum[1] = ( qlatt_map[source_momentum_class][imom-1] % (LY_global*LZ_global) ) / LZ_global; source_momentum[2] = qlatt_map[source_momentum_class][imom-1] % LZ_global; } if(g_cart_id==0) fprintf(stdout, "# [] run no. %d, source momentum (%d, %d, %d)\n", imom, source_momentum[0], source_momentum[1], source_momentum[2]); } /*********************************************** * prepare the souce ***********************************************/ if(g_read_source == 0) { // create source switch(g_source_type) { case 0: // point source if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] Creating point source\n"); for(ix=0;ix<L5*VOLUME;ix++) { _fv_eq_zero(g_spinor_field[0]+ix); } if(have_source_flag) { if(g_source_momentum_set) { phase = 2*M_PI*( source_momentum[0]*sl1/(double)LX_global + source_momentum[1]*sl2/(double)LY_global + source_momentum[2]*sl3/(double)LZ_global ); g_spinor_field[0][_GSI(g_ipt[lsl0][lsl1][lsl2][lsl3]) + 2*(n_c*ispin+icol) ] = cos(phase); g_spinor_field[0][_GSI(g_ipt[lsl0][lsl1][lsl2][lsl3]) + 2*(n_c*ispin+icol)+1] = sin(phase); } else { g_spinor_field[0][_GSI(g_ipt[lsl0][lsl1][lsl2][lsl3]) + 2*(n_c*ispin+icol) ] = 1.; } } if(g_source_momentum_set) { sprintf(source_filename, "%s.%.4d.t%.2dx%.2dy%.2dz%.2d.%.2d.qx%.2dqy%.2dqz%.2d", filename_prefix, Nconf, sl0, sl1, sl2, sl3, n_c*ispin+icol, source_momentum[0], source_momentum[1], source_momentum[2]); } else { sprintf(source_filename, "%s.%.4d.t%.2dx%.2dy%.2dz%.2d.%.2d", filename_prefix, Nconf, sl0, sl1, sl2, sl3, n_c*ispin+icol); } #ifdef HAVE_QUDA // set matpc_tpye source_location_5d_iseven = ( (g_iseven[g_ipt[lsl0][lsl1][lsl2][lsl3]] && ispin<n_s/2) || (!g_iseven[g_ipt[lsl0][lsl1][lsl2][lsl3]] && ispin>=n_s/2) ) ? 1 : 0; if(source_location_5d_iseven) { inv_param.matpc_type = QUDA_MATPC_EVEN_EVEN; if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] matpc type is MATPC_EVEN_EVEN\n"); } else { inv_param.matpc_type = QUDA_MATPC_ODD_ODD; if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] matpc type is MATPC_ODD_ODD\n"); } #endif break; case 2: // timeslice source if(g_coherent_source==1) { if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] Creating coherent timeslice source\n"); status = prepare_coherent_timeslice_source(g_spinor_field[0], gauge_field_smeared, g_coherent_source_base, g_coherent_source_delta, VOLUME, g_rng_state, 1); if(status != 0) { fprintf(stderr, "[invert_dw_quda] Error from prepare source, status was %d\n", status); #ifdef MPI MPI_Abort(MPI_COMM_WORLD, 123); MPI_Finalize(); #endif exit(123); } check_error(prepare_coherent_timeslice_source(g_spinor_field[0], gauge_field_smeared, g_coherent_source_base, g_coherent_source_delta, VOLUME, g_rng_state, 1), "prepare_coherent_timeslice_source", NULL, 123); timeslice = g_coherent_source_base; } else { if(g_coherent_source==2) { timeslice = (g_coherent_source_base+isc*g_coherent_source_delta)%T_global; fprintf(stdout, "# [invert_dw_quda] Creating timeslice source\n"); check_error(prepare_timeslice_source(g_spinor_field[0], gauge_field_smeared, timeslice, VOLUME, g_rng_state, 1), "prepare_timeslice_source", NULL, 123); } else { if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] Creating timeslice source\n"); check_error(prepare_timeslice_source(g_spinor_field[0], gauge_field_smeared, g_source_timeslice, VOLUME, g_rng_state, 1), "prepare_timeslice_source", NULL, 124); timeslice = g_source_timeslice; } } if(g_source_momentum_set) { sprintf(source_filename, "%s.%.4d.%.2d.%.5d.qx%.2dqy%.2dqz%.2d", filename_prefix, Nconf, timeslice, isc, source_momentum[0], source_momentum[1], source_momentum[2]); } else { sprintf(source_filename, "%s.%.4d.%.2d.%.5d", filename_prefix, Nconf, timeslice, isc); } break; case 3: // timeslice sources for one-end trick (spin dilution) fprintf(stdout, "# [invert_dw_quda] Creating timeslice source for one-end-trick\n"); check_error( prepare_timeslice_source_one_end(g_spinor_field[0], gauge_field_smeared, source_timeslice, source_momentum, isc%n_s, g_rng_state, \ ( isc%n_s==(n_s-1) && imom==source_momentum_runs-1 )), "prepare_timeslice_source_one_end", NULL, 125 ); c = N_Jacobi > 0 ? isc%n_s + n_s : isc%n_s; if(g_source_momentum_set) { sprintf(source_filename, "%s.%.4d.%.2d.%.2d.qx%.2dqy%.2dqz%.2d", filename_prefix, Nconf, g_source_timeslice, c, source_momentum[0], source_momentum[1], source_momentum[2]); } else { sprintf(source_filename, "%s.%.4d.%.2d.%.2d", filename_prefix, Nconf, g_source_timeslice, c); } break; case 4: // timeslice sources for one-end trick (spin and color dilution ) fprintf(stdout, "# [invert_dw_quda] Creating timeslice source for one-end-trick\n"); check_error(prepare_timeslice_source_one_end_color(g_spinor_field[0], gauge_field_smeared, source_timeslice, source_momentum,\ isc%(n_s*n_c), g_rng_state, ( isc%(n_s*n_c)==(n_s*n_c-1) && imom==source_momentum_runs-1 )), "prepare_timeslice_source_one_end_color", NULL, 126); c = N_Jacobi > 0 ? isc%(n_s*n_c) + (n_s*n_c) : isc%(n_s*n_c); if(g_source_momentum_set) { sprintf(source_filename, "%s.%.4d.%.2d.%.2d.qx%.2dqy%.2dqz%.2d", filename_prefix, Nconf, g_source_timeslice, c, source_momentum[0], source_momentum[1], source_momentum[2]); } else { sprintf(source_filename, "%s.%.4d.%.2d.%.2d", filename_prefix, Nconf, g_source_timeslice, c); } break; case 5: if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] preparing sequential point source\n"); check_error( prepare_sequential_point_source (g_spinor_field[0], isc, sl0, g_seq_source_momentum, smear_source, g_spinor_field[1], gauge_field_smeared), "prepare_sequential_point_source", NULL, 33); sprintf(source_filename, "%s.%.4d.t%.2dx%.2d.y%.2d.z%.2d.%.2d.qx%.2dqy%.2dqz%.2d", filename_prefix2, Nconf, sl0, sl1, sl2, sl3, isc, g_source_momentum[0], g_source_momentum[1], g_source_momentum[2]); break; default: fprintf(stderr, "\nError, unrecognized source type\n"); exit(32); break; } } else { // read source switch(g_source_type) { case 0: // point source if(g_source_momentum_set) { sprintf(source_filename, "%s.%.4d.t%.2dx%.2dy%.2dz%.2d.%.2d.qx%.2dqy%.2dqz%.2d", \ filename_prefix2, Nconf, sl0, sl1, sl2, sl3, isc, source_momentum[0], source_momentum[1], source_momentum[2]); } else { sprintf(source_filename, "%s.%.4d.t%.2dx%.2dy%.2dz%.2d.%.2d", filename_prefix2, Nconf, sl0, sl1, sl2, sl3, isc); } fprintf(stdout, "# [invert_dw_quda] reading source from file %s\n", source_filename); check_error(read_lime_spinor(g_spinor_field[0], source_filename, 0), "read_lime_spinor", NULL, 115); break; case 2: // timeslice source if(g_source_momentum_set) { sprintf(source_filename, "%s.%.4d.%.2d.%.5d.qx%.2dqy%.2dqz%.2d", filename_prefix2, Nconf, g_source_timeslice, isc, source_momentum[0], source_momentum[1], source_momentum[2]); } else { sprintf(source_filename, "%s.%.4d.%.2d.%.5d", filename_prefix2, Nconf, g_source_timeslice, isc); } fprintf(stdout, "# [invert_dw_quda] reading source from file %s\n", source_filename); check_error(read_lime_spinor(g_spinor_field[0], source_filename, 0), "read_lime_spinor", NULL, 115); break; default: check_error(1, "source type", NULL, 104); break; case -1: // timeslice source sprintf(source_filename, "%s", filename_prefix2); fprintf(stdout, "# [invert_dw_quda] reading source from file %s\n", source_filename); check_error(read_lime_spinor(g_spinor_field[0], source_filename, 0), "read_lime_spinor", NULL, 115); break; } } // of if g_read_source if(g_write_source) { check_error(write_propagator(g_spinor_field[0], source_filename, 0, g_propagator_precision), "write_propagator", NULL, 27); } /*********************************************************************************************** * here threads split: ***********************************************************************************************/ if(dummy_flag==0) strcpy(source_filename_write, source_filename); memcpy((void*)(smearing_spinor_field[0]), (void*)(g_spinor_field[0]), 24*VOLUME*sizeof(double)); if(dummy_flag>0) { // copy only if smearing has been done; otherwise do not copy, do not invert if(g_cart_id==0) fprintf(stdout, "# [] copy smearing field -> g field\n"); memcpy((void*)(g_spinor_field[0]), (void*)(smearing_spinor_field[1]), 24*VOLUME*sizeof(double)); } omp_set_num_threads(g_num_threads); #pragma omp parallel private(threadid, _2_kappa, is, ix, iy, iix, ratime, retime) shared(key,g_read_source, smear_source, N_Jacobi, kappa_Jacobi, smearing_spinor_field, g_spinor_field, nthreads, convert_sign, VOLUME, VOL3, T, L5, isc, rotate_gamma_basis, g_cart_id) firstprivate(inv_param, gauge_param, ofs) { threadid = omp_get_thread_num(); if(threadid < nthreads) { fprintf(stdout, "# [] proc%.2d thread%.2d starting source preparation\n", g_cart_id, threadid); // smearing if( ( !g_read_source || (g_read_source && smear_source ) ) && N_Jacobi > 0 ) { if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] smearing source with N_Jacobi=%d, kappa_Jacobi=%e\n", N_Jacobi, kappa_Jacobi); Jacobi_Smearing_threaded(gauge_field_smeared, smearing_spinor_field[0], smearing_spinor_field[1], kappa_Jacobi, N_Jacobi, threadid, nthreads); } /*********************************************** * create the 5-dim. source field ***********************************************/ if(convert_sign == 0) { spinor_4d_to_5d_threaded(smearing_spinor_field[0], smearing_spinor_field[0], threadid, nthreads); } else if(convert_sign == 1 || convert_sign == -1) { spinor_4d_to_5d_sign_threaded(smearing_spinor_field[0], smearing_spinor_field[0], convert_sign, threadid, nthreads); } for(is=0; is<L5; is++) { for(it=threadid; it<T; it+=nthreads) { memcpy((void*)(g_spinor_field[0]+_GSI(g_ipt_5d[is][it][0][0][0])), (void*)(smearing_spinor_field[0]+_GSI(g_ipt_5d[is][it][0][0][0])), VOL3*24*sizeof(double)); } } // reorder, multiply with g2 for(is=0; is<L5; is++) { for(it=threadid; it<T; it+=nthreads) { for(i3=0; i3<VOL3; i3++) { ix = (is*T+it)*VOL3 + i3; _fv_eq_zero(smearing_spinor_field[1]+_GSI(ix)); }}} if(rotate_gamma_basis) { for(it=threadid; it<T; it+=nthreads) { for(i3=0; i3<VOL3; i3++) { ix = it * VOL3 + i3; iy = lexic2eot_5d(0, ix); _fv_eq_gamma_ti_fv(smearing_spinor_field[1]+_GSI(iy), 2, smearing_spinor_field[0]+_GSI(ix)); }} for(it=threadid; it<T; it+=nthreads) { for(i3=0; i3<VOL3; i3++) { ix = it * VOL3 + i3; iy = lexic2eot_5d(L5-1, ix); _fv_eq_gamma_ti_fv(smearing_spinor_field[1]+_GSI(iy), 2, smearing_spinor_field[0]+_GSI(ix+(L5-1)*VOLUME)); }} } else { for(it=threadid; it<T; it+=nthreads) { for(i3=0; i3<VOL3; i3++) { ix = it * VOL3 + i3; iy = lexic2eot_5d(0, ix); _fv_eq_fv(smearing_spinor_field[1]+_GSI(iy), smearing_spinor_field[0]+_GSI(ix)); }} for(it=threadid; it<T; it+=nthreads) { for(i3=0; i3<VOL3; i3++) { ix = it * VOL3 + i3; iy = lexic2eot_5d(L5-1, ix); _fv_eq_fv(smearing_spinor_field[1]+_GSI(iy), smearing_spinor_field[0]+_GSI(ix+(L5-1)*VOLUME)); }} } fprintf(stdout, "# [] proc%.2d thread%.2d finished source preparation\n", g_cart_id, threadid); } else if(threadid == g_num_threads-1 && dummy_flag > 0) { // else branch on threadid fprintf(stdout, "# [] proc%.2d thread%.2d starting inversion for dummy_flag = %d\n", g_cart_id, threadid, dummy_flag); /*********************************************** * perform the inversion ***********************************************/ if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] starting inversion\n"); xchange_field_5d(g_spinor_field[0]); memset(g_spinor_field[1], 0, (VOLUME+RAND)*L5*24*sizeof(double)); ratime = CLOCK; #ifdef MPI if(inv_param.inv_type == QUDA_BICGSTAB_INVERTER || inv_param.inv_type == QUDA_GCR_INVERTER) { if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] calling invertQuda\n"); invertQuda(g_spinor_field[1], g_spinor_field[0], &inv_param); } else if(inv_param.inv_type == QUDA_CG_INVERTER) { if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] calling testCG\n"); testCG(g_spinor_field[1], g_spinor_field[0], &inv_param); } else { if(g_cart_id==0) fprintf(stderr, "# [invert_dw_quda] unrecognized inverter\n"); } #else invertQuda(g_spinor_field[1], g_spinor_field[0], &inv_param); #endif retime = CLOCK; if(g_cart_id==0) { fprintf(stdout, "# [invert_dw_quda] QUDA time: %e seconds\n", inv_param.secs); fprintf(stdout, "# [invert_dw_quda] QUDA Gflops: %e\n", inv_param.gflops/inv_param.secs); fprintf(stdout, "# [invert_dw_quda] wall time: %e seconds\n", retime-ratime); fprintf(stdout, "# [invert_dw_quda] Device memory used:\n\tSpinor: %f GiB\n\tGauge: %f GiB\n", inv_param.spinorGiB, gauge_param.gaugeGiB); } } // of if threadid // wait till all threads are here #pragma omp barrier if(inv_param.mass_normalization == QUDA_KAPPA_NORMALIZATION) { _2_kappa = 2. * g_kappa5d; for(ix=threadid; ix<VOLUME*L5;ix+=g_num_threads) { _fv_ti_eq_re(g_spinor_field[1]+_GSI(ix), _2_kappa ); } } #pragma omp barrier // reorder, multiply with g2 for(is=0;is<L5;is++) { for(ix=threadid; ix<VOLUME; ix+=g_num_threads) { iy = lexic2eot_5d(is, ix); iix = is*VOLUME + ix; _fv_eq_fv(g_spinor_field[0]+_GSI(iix), g_spinor_field[1]+_GSI(iy)); }} #pragma omp barrier if(rotate_gamma_basis) { for(ix=threadid; ix<VOLUME*L5; ix+=g_num_threads) { _fv_eq_gamma_ti_fv(g_spinor_field[1]+_GSI(ix), 2, g_spinor_field[0]+_GSI(ix)); } } else { for(ix=threadid; ix<VOLUME*L5;ix+=g_num_threads) { _fv_eq_fv(g_spinor_field[1]+_GSI(ix), g_spinor_field[0]+_GSI(ix)); } } if(g_cart_id==0 && threadid==g_num_threads-1) fprintf(stdout, "# [invert_dw_quda] inversion done in %e seconds\n", retime-ratime); #pragma omp single { #ifdef MPI xchange_field_5d(g_spinor_field[1]); #endif /*********************************************** * check residuum ***********************************************/ if(check_residuum && dummy_flag>0) { // apply the Wilson Dirac operator in the gamma-basis defined in cvc_linalg, // which uses the tmLQCD conventions (same as in contractions) // without explicit boundary conditions #ifdef MPI xchange_field_5d(g_spinor_field[2]); xchange_field_5d(g_spinor_field[1]); #endif memset(g_spinor_field[0], 0, 24*(VOLUME+RAND)*L5*sizeof(double)); //sprintf(filename, "%s.inverted.ascii.%.2d", source_filename, g_cart_id); //ofs = fopen(filename, "w"); //printf_spinor_field_5d(g_spinor_field[1], ofs); //fclose(ofs); Q_DW_Wilson_phi(g_spinor_field[0], g_spinor_field[1]); for(ix=0;ix<VOLUME*L5;ix++) { _fv_mi_eq_fv(g_spinor_field[0]+_GSI(ix), g_spinor_field[2]+_GSI(ix)); } spinor_scalar_product_re(&norm2, g_spinor_field[2], g_spinor_field[2], VOLUME*L5); spinor_scalar_product_re(&norm, g_spinor_field[0], g_spinor_field[0], VOLUME*L5); if(g_cart_id==0) fprintf(stdout, "\n# [invert_dw_quda] absolut residuum squared: %e; relative residuum %e\n", norm, sqrt(norm/norm2) ); } if(dummy_flag>0) { /*********************************************** * create 4-dim. propagator ***********************************************/ if(convert_sign == 0) { spinor_5d_to_4d(g_spinor_field[1], g_spinor_field[1]); } else if(convert_sign == -1 || convert_sign == +1) { spinor_5d_to_4d_sign(g_spinor_field[1], g_spinor_field[1], convert_sign); } /*********************************************** * write the solution ***********************************************/ sprintf(filename, "%s.inverted", source_filename_write); if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] writing propagator to file %s\n", filename); check_error(write_propagator(g_spinor_field[1], filename, 0, g_propagator_precision), "write_propagator", NULL, 22); //sprintf(filename, "prop.ascii.4d.%.2d.%.2d.%.2d", isc, g_nproc, g_cart_id); //ofs = fopen(filename, "w"); //printf_spinor_field(g_spinor_field[1], ofs); //fclose(ofs); } if(check_residuum) memcpy(g_spinor_field[2], smearing_spinor_field[0], 24*VOLUME*L5*sizeof(double)); } // of omp single } // of omp parallel region if(dummy_flag > 0) strcpy(source_filename_write, source_filename); dummy_flag++; } // of loop on momenta } // of isc #if 0 // last inversion { memcpy(g_spinor_field[0], smearing_spinor_field[1], 24*VOLUME*L5*sizeof(double)); if(g_cart_id==0) fprintf(stdout, "# [] proc%.2d starting last inversion\n", g_cart_id); /*********************************************** * perform the inversion ***********************************************/ if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] starting inversion\n"); xchange_field_5d(g_spinor_field[0]); memset(g_spinor_field[1], 0, (VOLUME+RAND)*L5*24*sizeof(double)); ratime = CLOCK; #ifdef MPI if(inv_param.inv_type == QUDA_BICGSTAB_INVERTER || inv_param.inv_type == QUDA_GCR_INVERTER) { if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] calling invertQuda\n"); invertQuda(g_spinor_field[1], g_spinor_field[0], &inv_param); } else if(inv_param.inv_type == QUDA_CG_INVERTER) { if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] calling testCG\n"); testCG(g_spinor_field[1], g_spinor_field[0], &inv_param); } else { if(g_cart_id==0) fprintf(stderr, "# [invert_dw_quda] unrecognized inverter\n"); } #else invertQuda(g_spinor_field[1], g_spinor_field[0], &inv_param); #endif retime = CLOCK; if(g_cart_id==0) { fprintf(stdout, "# [invert_dw_quda] QUDA time: %e seconds\n", inv_param.secs); fprintf(stdout, "# [invert_dw_quda] QUDA Gflops: %e\n", inv_param.gflops/inv_param.secs); fprintf(stdout, "# [invert_dw_quda] wall time: %e seconds\n", retime-ratime); fprintf(stdout, "# [invert_dw_quda] Device memory used:\n\tSpinor: %f GiB\n\tGauge: %f GiB\n", inv_param.spinorGiB, gauge_param.gaugeGiB); } omp_set_num_threads(g_num_threads); #pragma omp parallel private(threadid,_2_kappa,is,ix,iy,iix) shared(VOLUME,L5,g_kappa,g_spinor_field,g_num_threads) { threadid = omp_get_thread_num(); if(inv_param.mass_normalization == QUDA_KAPPA_NORMALIZATION) { _2_kappa = 2. * g_kappa5d; for(ix=threadid; ix<VOLUME*L5;ix+=g_num_threads) { _fv_ti_eq_re(g_spinor_field[1]+_GSI(ix), _2_kappa ); } } #pragma omp barrier // reorder, multiply with g2 for(is=0;is<L5;is++) { for(ix=threadid; ix<VOLUME; ix+=g_num_threads) { iy = lexic2eot_5d(is, ix); iix = is*VOLUME + ix; _fv_eq_fv(g_spinor_field[0]+_GSI(iix), g_spinor_field[1]+_GSI(iy)); }} #pragma omp barrier if(rotate_gamma_basis) { for(ix=threadid; ix<VOLUME*L5; ix+=g_num_threads) { _fv_eq_gamma_ti_fv(g_spinor_field[1]+_GSI(ix), 2, g_spinor_field[0]+_GSI(ix)); } } else { for(ix=threadid; ix<VOLUME*L5;ix+=g_num_threads) { _fv_eq_fv(g_spinor_field[1]+_GSI(ix), g_spinor_field[0]+_GSI(ix)); } } } // end of parallel region if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] inversion done in %e seconds\n", retime-ratime); #ifdef MPI xchange_field_5d(g_spinor_field[1]); #endif /*********************************************** * check residuum ***********************************************/ if(check_residuum && dummy_flag>0) { // apply the Wilson Dirac operator in the gamma-basis defined in cvc_linalg, // which uses the tmLQCD conventions (same as in contractions) // without explicit boundary conditions #ifdef MPI xchange_field_5d(g_spinor_field[2]); #endif memset(g_spinor_field[0], 0, 24*(VOLUME+RAND)*L5*sizeof(double)); //sprintf(filename, "%s.inverted.ascii.%.2d", source_filename, g_cart_id); //ofs = fopen(filename, "w"); //printf_spinor_field_5d(g_spinor_field[1], ofs); //fclose(ofs); Q_DW_Wilson_phi(g_spinor_field[0], g_spinor_field[1]); for(ix=0;ix<VOLUME*L5;ix++) { _fv_mi_eq_fv(g_spinor_field[0]+_GSI(ix), g_spinor_field[2]+_GSI(ix)); } spinor_scalar_product_re(&norm, g_spinor_field[0], g_spinor_field[0], VOLUME*L5); spinor_scalar_product_re(&norm2, g_spinor_field[2], g_spinor_field[2], VOLUME*L5); if(g_cart_id==0) fprintf(stdout, "\n# [invert_dw_quda] absolut residuum squared: %e; relative residuum %e\n", norm, sqrt(norm/norm2) ); } /*********************************************** * create 4-dim. propagator ***********************************************/ if(convert_sign == 0) { spinor_5d_to_4d(g_spinor_field[1], g_spinor_field[1]); } else if(convert_sign == -1 || convert_sign == +1) { spinor_5d_to_4d_sign(g_spinor_field[1], g_spinor_field[1], convert_sign); } /*********************************************** * write the solution ***********************************************/ sprintf(filename, "%s.inverted", source_filename_write); if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] writing propagator to file %s\n", filename); check_error(write_propagator(g_spinor_field[1], filename, 0, g_propagator_precision), "write_propagator", NULL, 22); //sprintf(filename, "prop.ascii.4d.%.2d.%.2d.%.2d", isc, g_nproc, g_cart_id); //ofs = fopen(filename, "w"); //printf_spinor_field(g_spinor_field[1], ofs); //fclose(ofs); } // of last inversion #endif // of if 0 /*********************************************** * free the allocated memory, finalize ***********************************************/ #ifdef HAVE_QUDA // finalize the QUDA library if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] finalizing quda\n"); #ifdef MPI freeGaugeQuda(); #endif endQuda(); #endif if(g_gauge_field != NULL) free(g_gauge_field); if(gauge_field_smeared != NULL) free(gauge_field_smeared); if(no_fields>0) { if(g_spinor_field!=NULL) { for(i=0; i<no_fields; i++) if(g_spinor_field[i]!=NULL) free(g_spinor_field[i]); free(g_spinor_field); } } free_geometry(); if(g_source_momentum_set && full_orbit) { finalize_q_orbits(&qlatt_id, &qlatt_count, &qlatt_list, &qlatt_rep); if(qlatt_map != NULL) { free(qlatt_map[0]); free(qlatt_map); } } if(source_momentum != NULL) free(source_momentum); if(lck != NULL) free(lck); #ifdef MPI #ifdef HAVE_QUDA endCommsQuda(); #else MPI_Finalize(); #endif #endif if(g_cart_id==0) { g_the_time = time(NULL); fprintf(stdout, "\n# [invert_dw_quda] %s# [invert_dw_quda] end of run\n", ctime(&g_the_time)); fprintf(stderr, "\n# [invert_dw_quda] %s# [invert_dw_quda] end of run\n", ctime(&g_the_time)); } return(0); }
int main(int argc, char **argv) { int c, mu, nu, status; int i, j, ncon=-1, ir, is, ic, id; int filename_set = 0; int x0, x1, x2, x3, ix, iix; int y0, y1, y2, y3, iy, iiy; int start_valuet=0, start_valuex=0, start_valuey=0; int num_threads=1, threadid, nthreads; int seed, seed_set=0; double diff1, diff2; /* double *chi=NULL, *psi=NULL; */ double plaq=0., pl_ts, pl_xs, pl_global; double *gauge_field_smeared = NULL; double s[18], t[18], u[18], pl_loc; double spinor1[24], spinor2[24]; double *pl_gather=NULL; double dtmp; complex prod, w, w2; int verbose = 0; char filename[200]; char file1[200]; char file2[200]; FILE *ofs=NULL; double norm, norm2; fermion_propagator_type *prop=NULL, prop2=NULL, seq_prop=NULL, seq_prop2=NULL, prop_aux=NULL, prop_aux2=NULL; int idx, eoflag, shift; float *buffer = NULL; unsigned int VOL3; size_t items, bytes; #ifdef MPI MPI_Init(&argc, &argv); #endif while ((c = getopt(argc, argv, "h?vf:g:")) != -1) { switch (c) { case 'v': verbose = 1; break; case 'f': strcpy(filename, optarg); filename_set=1; break; case 'g': strcpy(file1, optarg); break; case 'h': case '?': default: usage(); break; } } /* set the default values */ if(filename_set==0) strcpy(filename, "cvc.input"); if(g_cart_id==0) fprintf(stdout, "# Reading input from file %s\n", filename); read_input_parser(filename); /* some checks on the input data */ if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) { if(g_proc_id==0) fprintf(stdout, "T and L's must be set\n"); usage(); } if(g_kappa == 0.) { if(g_proc_id==0) fprintf(stdout, "kappa should be > 0.n"); usage(); } /* initialize MPI parameters */ mpi_init(argc, argv); /* initialize T etc. */ fprintf(stdout, "# [%2d] parameters:\n"\ "# [%2d] T_global = %3d\n"\ "# [%2d] T = %3d\n"\ "# [%2d] Tstart = %3d\n"\ "# [%2d] LX_global = %3d\n"\ "# [%2d] LX = %3d\n"\ "# [%2d] LXstart = %3d\n"\ "# [%2d] LY_global = %3d\n"\ "# [%2d] LY = %3d\n"\ "# [%2d] LYstart = %3d\n",\ g_cart_id, g_cart_id, T_global, g_cart_id, T, g_cart_id, Tstart, g_cart_id, LX_global, g_cart_id, LX, g_cart_id, LXstart, g_cart_id, LY_global, g_cart_id, LY, g_cart_id, LYstart); if(init_geometry() != 0) { fprintf(stderr, "ERROR from init_geometry\n"); exit(101); } geometry(); if(init_geometry_5d() != 0) { fprintf(stderr, "ERROR from init_geometry_5d\n"); exit(102); } geometry_5d(); VOL3 = LX*LY*LZ; /* read the gauge field */ alloc_gauge_field(&g_gauge_field, VOLUMEPLUSRAND); if(g_cart_id==0) fprintf(stdout, "# gauge field file name %s\n", file1); // status = read_nersc_gauge_field_3x3(g_gauge_field, filename, &plaq); // status = read_ildg_nersc_gauge_field(g_gauge_field, filename); status = read_lime_gauge_field_doubleprec(file1); // status = read_nersc_gauge_field(g_gauge_field, filename, &plaq); // status = 0; if(status != 0) { fprintf(stderr, "[apply_Dtm] Error, could not read gauge field\n"); EXIT(11); } #ifdef MPI xchange_gauge(); #endif // measure the plaquette if(g_cart_id==0) fprintf(stdout, "# read plaquette value 1st field: %25.16e\n", plaq); plaquette(&plaq); if(g_cart_id==0) fprintf(stdout, "# measured plaquette value 1st field: %25.16e\n", plaq); sprintf(filename, "%s.dbl", file1); if(g_cart_id==0) fprintf(stdout, "# [] writing gauge field in double precision to file %s\n", filename); status = write_lime_gauge_field(filename, plaq, Nconf, 64); if(status != 0) { fprintf(stderr, "[apply_Dtm] Error, could not write gauge field\n"); EXIT(12); } /*********************************************** * free the allocated memory, finalize ***********************************************/ free(g_gauge_field); free_geometry(); g_the_time = time(NULL); fprintf(stdout, "# [] %s# [] end fo run\n", ctime(&g_the_time)); fflush(stdout); fprintf(stderr, "# [] %s# [] end fo run\n", ctime(&g_the_time)); fflush(stderr); #ifdef MPI MPI_Finalize(); #endif return(0); }
int main(int argc, char **argv) { int c, i, mu, nu; int count = 0; int filename_set = 0; int dims[4] = {0,0,0,0}; int l_LX_at, l_LXstart_at; int x0, x1, x2, x3, ix, iix; int sid, status, gid; double *disc = (double*)NULL; double *work = (double*)NULL; double q[4], fnorm; int verbose = 0; int do_gt = 0; char filename[100], contype[200]; double ratime, retime; double plaq; double spinor1[24], spinor2[24], U_[18]; double *gauge_trafo=(double*)NULL; complex w, w1, *cp1, *cp2, *cp3; FILE *ofs; #ifdef MPI // MPI_Init(&argc, &argv); fprintf(stderr, "[jc_ud_x] Error, only non-mpi version implemented\n"); exit(1); #endif while ((c = getopt(argc, argv, "h?f:")) != -1) { switch (c) { case 'f': strcpy(filename, optarg); filename_set=1; break; case 'h': case '?': default: usage(); break; } } /* set the default values */ if(filename_set==0) strcpy(filename, "cvc.input"); fprintf(stdout, "# Reading input from file %s\n", filename); read_input_parser(filename); /* some checks on the input data */ if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) { if(g_proc_id==0) fprintf(stdout, "T and L's must be set\n"); usage(); } if(g_kappa == 0.) { if(g_proc_id==0) fprintf(stdout, "kappa should be > 0.n"); usage(); } fprintf(stdout, "\n**************************************************\n"); fprintf(stdout, "* jc_ud_x\n"); fprintf(stdout, "**************************************************\n\n"); /********************************* * initialize MPI parameters *********************************/ // mpi_init(argc, argv); /* initialize fftw */ dims[0]=T_global; dims[1]=LX; dims[2]=LY; dims[3]=LZ; T = T_global; Tstart = 0; l_LX_at = LX; l_LXstart_at = 0; FFTW_LOC_VOLUME = T*LX*LY*LZ; fprintf(stdout, "# [%2d] parameters:\n"\ "# [%2d] T = %3d\n"\ "# [%2d] Tstart = %3d\n"\ "# [%2d] l_LX_at = %3d\n"\ "# [%2d] l_LXstart_at = %3d\n"\ "# [%2d] FFTW_LOC_VOLUME = %3d\n", g_cart_id, g_cart_id, T, g_cart_id, Tstart, g_cart_id, l_LX_at, g_cart_id, l_LXstart_at, g_cart_id, FFTW_LOC_VOLUME); if(init_geometry() != 0) { fprintf(stderr, "ERROR from init_geometry\n"); exit(1); } geometry(); /************************************************* * allocate mem for gauge field and spinor fields *************************************************/ alloc_gauge_field(&g_gauge_field, VOLUMEPLUSRAND); no_fields = 2; g_spinor_field = (double**)calloc(no_fields, sizeof(double*)); for(i=0; i<no_fields; i++) alloc_spinor_field(&g_spinor_field[i], VOLUMEPLUSRAND); /**************************************** * allocate memory for the contractions ****************************************/ disc = (double*)calloc( 8*VOLUME, sizeof(double)); if( disc == (double*)NULL ) { fprintf(stderr, "could not allocate memory for disc\n"); exit(3); } /*********************************************** * start loop on gauge id.s ***********************************************/ for(gid=g_gaugeid; gid<=g_gaugeid2; gid++) { for(ix=0; ix<8*VOLUME; ix++) disc[ix] = 0.; sprintf(filename, "%s.%.4d", gaugefilename_prefix, gid); if(g_cart_id==0) fprintf(stdout, "# reading gauge field from file %s\n", filename); read_lime_gauge_field_doubleprec(filename); xchange_gauge(); plaquette(&plaq); if(g_cart_id==0) fprintf(stdout, "# measured plaquette value: %25.16e\n", plaq); /*********************************************** * start loop on source id.s ***********************************************/ for(sid=g_sourceid; sid<=g_sourceid2; sid+=g_sourceid_step) { /* reset disc to zero */ for(ix=0; ix<8*VOLUME; ix++) disc[ix] = 0.; /* read the new propagator to g_spinor_field[0] */ ratime = (double)clock() / CLOCKS_PER_SEC; if(format==0) { sprintf(filename, "%s.%.4d.%.2d.inverted", filename_prefix, gid, sid); if(read_lime_spinor(g_spinor_field[0], filename, 0) != 0) break; } else if(format==1) { sprintf(filename, "%s.%.4d.%.5d.inverted", filename_prefix, gid, sid); if(read_cmi(g_spinor_field[0], filename) != 0) break; } xchange_field(g_spinor_field[0]); retime = (double)clock() / CLOCKS_PER_SEC; if(g_cart_id==0) fprintf(stdout, "# time to read prop.: %e seconds\n", retime-ratime); ratime = (double)clock() / CLOCKS_PER_SEC; /* apply D_W once, save in g_spinor_field[1] */ Hopping(g_spinor_field[1], g_spinor_field[0]); for(ix=0; ix<VOLUME; ix++) { _fv_pl_eq_fv(g_spinor_field[1]+_GSI(ix), g_spinor_field[0]+_GSI(ix)); _fv_ti_eq_re(g_spinor_field[1]+_GSI(ix), 1./(2.*g_kappa)); } xchange_field(g_spinor_field[1]); retime = (double)clock() / CLOCKS_PER_SEC; if(g_cart_id==0) fprintf(stdout, "# time to apply D_W: %e seconds\n", retime-ratime); ratime = (double)clock() / CLOCKS_PER_SEC; /* calculate real and imaginary part */ for(mu=0; mu<4; mu++) { for(ix=0; ix<VOLUME; ix++) { _cm_eq_cm_ti_co(U_, g_gauge_field+_GGI(ix,mu), &(co_phase_up[mu])); _fv_eq_gamma_ti_fv(spinor1, 5, g_spinor_field[0]+_GSI(g_iup[ix][mu])); _fv_eq_gamma_ti_fv(spinor2, mu, spinor1); _fv_pl_eq_fv(spinor2, spinor1); _fv_eq_cm_ti_fv(spinor1, U_, spinor2); _co_eq_fv_dag_ti_fv(&w, g_spinor_field[0]+_GSI(ix), spinor1); disc[_GWI(mu,ix,VOLUME) ] = g_mu * w.im; _fv_eq_gamma_ti_fv(spinor1, mu, g_spinor_field[1]+_GSI(g_iup[ix][mu])); _fv_pl_eq_fv(spinor1, g_spinor_field[1]+_GSI(g_iup[ix][mu])); _fv_eq_cm_ti_fv(spinor2, U_, spinor1); _co_eq_fv_dag_ti_fv(&w, g_spinor_field[0]+_GSI(ix), spinor2); disc[_GWI(mu,ix,VOLUME)+1] = w.im / 3.; } } retime = (double)clock() / CLOCKS_PER_SEC; if(g_cart_id==0) fprintf(stdout, "# time to calculate contractions: %e seconds\n", retime-ratime); /************************************************ * save results ************************************************/ if(g_cart_id == 0) fprintf(stdout, "# save results for gauge id %d and sid %d\n", gid, sid); /* save the result in position space */ fnorm = 1. / g_prop_normsqr; if(g_cart_id==0) fprintf(stdout, "X-fnorm = %e\n", fnorm); for(mu=0; mu<4; mu++) { for(ix=0; ix<VOLUME; ix++) { disc[_GWI(mu,ix,VOLUME) ] *= fnorm; disc[_GWI(mu,ix,VOLUME)+1] *= fnorm; } } sprintf(filename, "jc_ud_x.%.4d.%.4d", gid, sid); sprintf(contype, "jc-u_and_d-X"); write_lime_contraction(disc, filename, 64, 4, contype, gid, sid); //sprintf(filename, "jc_ud_x.%.4d.%.4d.ascii", gid, sid); //write_contraction (disc, NULL, filename, 4, 2, 0); } /* of loop on sid */ } /* of loop on gid */ /*********************************************** * free the allocated memory, finalize ***********************************************/ free(g_gauge_field); for(i=0; i<no_fields; i++) free(g_spinor_field[i]); free(g_spinor_field); free_geometry(); free(disc); return(0); }
int main(int argc, char **argv) { int c, mu, nu, status, gid; int filename_set = 0; int source_location, have_source_flag = 0; int x0, x1, x2, x3, ix, iix; int sx0, sx1, sx2, sx3; int tsize = 0; double *conn = NULL; double *conn2 = NULL; double *conn3 = NULL; int verbose = 0; char filename[200]; double ratime, retime; FILE *ofs; double q[4], wre, wim, dtmp; int check_WI = 0, write_ascii=0; unsigned int VOL3=0; while ((c = getopt(argc, argv, "AWh?vf:")) != -1) { switch (c) { case 'v': verbose = 1; break; case 'f': strcpy(filename, optarg); filename_set=1; break; case 'W': check_WI = 1; fprintf(stdout, "# [get_corr_v5] check Ward Identity\n"); break; case 'A': write_ascii = 1; fprintf(stdout, "# [get_corr_v5] write Pi_mn in ASCII format\n"); break; case 'h': case '?': default: usage(); break; } } g_the_time = time(NULL); // set the default values set_default_input_values(); if(filename_set==0) strcpy(filename, "cvc.input"); fprintf(stdout, "# [get_corr_v5] reading input parameters from file %s\n", filename); read_input_parser(filename); // some checks on the input data if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) { fprintf(stdout, "# [get_corr_v5] T=%d, LX=%d, LY=%d, LZ=%d\n", T_global, LX, LY, LZ); if(g_proc_id==0) fprintf(stderr, "[get_corr_v5] Error, T and L's must be set\n"); usage(); } // initialize MPI parameters mpi_init(argc, argv); T = T_global; Tstart = 0; fprintf(stdout, "# [%2d] fftw parameters:\n"\ "# [%2d] T = %3d\n"\ "# [%2d] Tstart = %3d\n", g_cart_id, g_cart_id, T, g_cart_id, Tstart); if(init_geometry() != 0) { fprintf(stderr, "[get_corr_v5] Error from init_geometry\n"); EXIT(1); } geometry(); VOL3 = LX*LY*LZ; /**************************************** * allocate memory for the contractions * ****************************************/ conn = (double*)calloc(32 * VOLUME, sizeof(double)); if( (conn==NULL) ) { fprintf(stderr, "[get_corr_v5] Error, could not allocate memory for contr. fields\n"); EXIT(2); } conn2= (double*)calloc(2 * T, sizeof(double)); if( (conn2==NULL) ) { fprintf(stderr, "[get_corr_v5] Error, could not allocate memory for corr.\n"); EXIT(3); } conn3= (double*)calloc(2 * T, sizeof(double)); if( (conn3==NULL) ) { fprintf(stderr, "[get_corr_v5] Error, could not allocate memory for corr.\n"); EXIT(3); } /******************************** * determine source coordinates * ********************************/ /* have_source_flag = (int)(g_source_location/(LX*LY*LZ)>=Tstart && g_source_location/(LX*LY*LZ)<(Tstart+T)); if(have_source_flag==1) fprintf(stdout, "# [get_corr_v5] process %2d has source location\n", g_cart_id); sx0 = g_source_location/(LX*LY*LZ)-Tstart; sx1 = (g_source_location%(LX*LY*LZ)) / (LY*LZ); sx2 = (g_source_location%(LY*LZ)) / LZ; sx3 = (g_source_location%LZ); if(have_source_flag==1) { fprintf(stdout, "# [get_corr_v5] local source coordinates: (%3d,%3d,%3d,%3d)\n", sx0, sx1, sx2, sx3); source_location = g_ipt[sx0][sx1][sx2][sx3]; } have_source_flag = 0; */ for(gid=g_gaugeid; gid<=g_gaugeid2; gid+=g_gauge_step) { memset(conn, 0, 32*VOLUME*sizeof(double)); /*********************** * read contractions * ***********************/ ratime = CLOCK; sprintf(filename, "%s.%.4d", filename_prefix, gid); if(format==2 || format==3) { status = read_contraction(conn, NULL, filename, 16); } else if( format==0) { status = read_lime_contraction(conn, filename, 16, 0); } if(status != 0) { // fprintf(stderr, "[get_corr_v5] Error from read_contractions, status was %d\n", status); // EXIT(5); fprintf(stderr, "[get_corr_v5] Warning, could not read contractions for gid %d, status was %d\n", gid, status); continue; } retime = CLOCK; fprintf(stdout, "# [get_corr_v5] time to read contractions %e seconds\n", retime-ratime); // TEST Pi_mm if(write_ascii) { sprintf(filename, "pimm_test.%.4d", gid); ofs = fopen(filename, "w"); if(ofs == NULL) exit(33); fprintf(ofs, "# Pi_mm\n# %s", ctime(&g_the_time)); for(x0=0; x0<T; x0++) { for(x1=0; x1<LX; x1++) { for(x2=0; x2<LY; x2++) { for(x3=0; x3<LZ; x3++) { fprintf(ofs, "# t=%3d x=%3d y=%3d z=%3d\n", x0, x1, x2, x3); ix = g_ipt[x0][x1][x2][x3]; for(nu=0;nu<4;nu++) { wre = conn[_GWI(5*nu,ix,VOLUME)]; wim = conn[_GWI(5*nu,ix,VOLUME)+1]; fprintf(ofs, "%3d%16.7e%16.7e\n", nu, wre, wim); } }}}} fclose(ofs); } // of if write_ascii // TEST Ward Identity if(check_WI) { fprintf(stdout, "# [get_corr_v5] Ward identity\n"); sprintf(filename, "WI.%.4d", gid); ofs = fopen(filename, "w"); if(ofs == NULL) exit(32); for(x0=0; x0<T; x0++) { q[0] = 2. * sin(M_PI * (double)x0 / (double)T); for(x1=0; x1<LX; x1++) { q[1] = 2. * sin(M_PI * (double)x1 / (double)LX); for(x2=0; x2<LY; x2++) { q[2] = 2. * sin(M_PI * (double)x2 / (double)LY); for(x3=0; x3<LZ; x3++) { q[3] = 2. * sin(M_PI * (double)x3 / (double)LZ); ix = g_ipt[x0][x1][x2][x3]; for(nu=0;nu<4;nu++) { wre = q[0] * conn[_GWI(4*0+nu,ix,VOLUME)] + q[1] * conn[_GWI(4*1+nu,ix,VOLUME)] \ + q[2] * conn[_GWI(4*2+nu,ix,VOLUME)] + q[3] * conn[_GWI(4*3+nu,ix,VOLUME)]; wim = q[0] * conn[_GWI(4*0+nu,ix,VOLUME)+1] + q[1] * conn[_GWI(4*1+nu,ix,VOLUME)+1] \ + q[2] * conn[_GWI(4*2+nu,ix,VOLUME)+1] + q[3] * conn[_GWI(4*3+nu,ix,VOLUME)+1]; fprintf(ofs, "\t%3d%3d%3d%3d%3d%16.7e%16.7e\n", nu, x0, x1, x2, x3, wre, wim); } }}}} fclose(ofs); } /*********************** * fill the correlator * ***********************/ ratime = CLOCK; memset(conn2, 0, 2*T*sizeof(double)); // (1) V0V0 for(x0=0; x0<T; x0++) { for(ix=0; ix<VOL3; ix++) { iix = _GWI(0,x0*VOL3+ix,VOLUME); conn2[2*x0 ] += conn[iix ]; conn2[2*x0+1] += conn[iix+1]; } } // (2) VKVK memset(conn3, 0, 2*T*sizeof(double)); for(x0=0; x0<T; x0++) { for(ix=0; ix<VOL3; ix++) { iix = x0 * VOL3 + ix; conn3[2*x0 ] += conn[_GWI(5,iix,VOLUME) ] + conn[_GWI(10,iix,VOLUME) ] + conn[_GWI(15,iix,VOLUME) ]; conn3[2*x0+1] += conn[_GWI(5,iix,VOLUME)+1] + conn[_GWI(10,iix,VOLUME)+1] + conn[_GWI(15,iix,VOLUME)+1]; } } // normalization dtmp = 1. / (double)VOL3; for(x0=0; x0<2*T; x0++) { conn2[x0] *= dtmp; } for(x0=0; x0<2*T; x0++) { conn3[x0] *= dtmp; } retime = CLOCK; fprintf(stdout, "# [get_corr_v5] time to fill correlator %e seconds\n", retime-ratime); // TEST /* fprintf(stdout, "# [get_corr_v5] V0V0 correlator\n"); for(x0=0; x0<T; x0++) { fprintf(stdout, "\t%3d%25.16e%25.16e\n",x0, conn2[2*x0], conn2[2*x0+1]); } fprintf(stdout, "# [get_corr_v5] VKVK correlator\n"); for(x0=0; x0<T; x0++) { fprintf(stdout, "\t%3d%25.16e%25.16e\n",x0, conn3[2*x0], conn3[2*x0+1]); } */ /***************************************** * write to file *****************************************/ ratime = CLOCK; sprintf(filename, "p00_corr.%.4d", gid); if( (ofs=fopen(filename, "w")) == NULL ) { fprintf(stderr, "[get_corr_v5] Error, could not open file %s for writing\n", filename); EXIT(6); } x0 = 0; fprintf(ofs, "%3d%3d%3d%25.16e%25.16e%6d\n", 5, 1, x0, conn2[2*x0], 0., gid); for(x0=1; x0<T/2; x0++) { fprintf(ofs, "%3d%3d%3d%25.16e%25.16e%6d\n", 5, 1, x0, conn2[2*x0], conn2[2*(T-x0)], gid); } x0 = T / 2; fprintf(ofs, "%3d%3d%3d%25.16e%25.16e%6d\n", 5, 1, x0, conn2[2*x0], 0., gid); fclose(ofs); sprintf(filename, "pkk_corr.%.4d", gid); if( (ofs=fopen(filename, "w")) == NULL ) { fprintf(stderr, "[get_corr_v5] Error, could not open file %s for writing\n", filename); EXIT(7); } x0 = 0; fprintf(ofs, "%3d%3d%3d%25.16e%25.16e%6d\n", 5, 1, x0, conn3[2*x0], 0., gid); for(x0=1; x0<T/2; x0++) { fprintf(ofs, "%3d%3d%3d%25.16e%25.16e%6d\n", 5, 1, x0, conn3[2*x0], conn3[2*(T-x0)], gid); } x0 = T / 2; fprintf(ofs, "%3d%3d%3d%25.16e%25.16e%6d\n", 5, 1, x0, conn3[2*x0], 0., gid); fclose(ofs); retime = CLOCK; fprintf(stdout, "# [get_corr_v5] time to write correlator %e seconds\n", retime-ratime); } // of loop on gid /*************************************** * free the allocated memory, finalize * ***************************************/ free_geometry(); if(conn != NULL) free(conn); if(conn2 != NULL) free(conn2); if(conn3 != NULL) free(conn3); fprintf(stdout, "# [get_corr_v5] %s# [get_corr_v5] end of run\n", ctime(&g_the_time)); fflush(stdout); fprintf(stderr, "[get_corr_v5] %s[get_corr_v5] end of run\n", ctime(&g_the_time)); fflush(stderr); return(0); }
int main(int argc, char **argv) { int c, i, mu, nu; int count = 0; int filename_set = 0; int dims[4] = {0,0,0,0}; int l_LX_at, l_LXstart_at; int x0, x1, x2, x3, ix, iix; int dxm[4], dxn[4], ixpm, ixpn; int sid; double *disc = (double*)NULL; double *work = (double*)NULL; double q[4], fnorm; int verbose = 0; int do_gt = 0; char filename[100]; double ratime, retime; double plaq, _2kappamu, hpe3_coeff, onepmutilde2, mutilde2; double spinor1[24], spinor2[24], U_[18], U1_[18], U2_[18]; double *gauge_trafo=(double*)NULL; complex w, w1, w2, *cp1, *cp2, *cp3; FILE *ofs; fftw_complex *in=(fftw_complex*)NULL; #ifdef MPI fftwnd_mpi_plan plan_p, plan_m; int *status; #else fftwnd_plan plan_p, plan_m; #endif #ifdef MPI MPI_Init(&argc, &argv); #endif while ((c = getopt(argc, argv, "h?vgf:")) != -1) { switch (c) { case 'v': verbose = 1; break; case 'g': do_gt = 1; break; case 'f': strcpy(filename, optarg); filename_set=1; break; case 'h': case '?': default: usage(); break; } } /* set the default values */ set_default_input_values(); if(filename_set==0) strcpy(filename, "cvc.input"); /* read the input file */ read_input(filename); /* some checks on the input data */ if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) { if(g_proc_id==0) fprintf(stdout, "T and L's must be set\n"); usage(); } if(g_kappa == 0.) { if(g_proc_id==0) fprintf(stdout, "kappa should be > 0.n"); usage(); } /* initialize MPI parameters */ mpi_init(argc, argv); #ifdef MPI if((status = (int*)calloc(g_nproc, sizeof(int))) == (int*)NULL) { MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); exit(7); } #endif /* initialize fftw */ dims[0]=T_global; dims[1]=LX; dims[2]=LY; dims[3]=LZ; #ifdef MPI plan_p = fftwnd_mpi_create_plan(g_cart_grid, 4, dims, FFTW_BACKWARD, FFTW_MEASURE); plan_m = fftwnd_mpi_create_plan(g_cart_grid, 4, dims, FFTW_FORWARD, FFTW_MEASURE); fftwnd_mpi_local_sizes(plan_p, &T, &Tstart, &l_LX_at, &l_LXstart_at, &FFTW_LOC_VOLUME); #else plan_p = fftwnd_create_plan(4, dims, FFTW_BACKWARD, FFTW_MEASURE | FFTW_IN_PLACE); plan_m = fftwnd_create_plan(4, dims, FFTW_FORWARD, FFTW_MEASURE | FFTW_IN_PLACE); T = T_global; Tstart = 0; l_LX_at = LX; l_LXstart_at = 0; FFTW_LOC_VOLUME = T*LX*LY*LZ; #endif fprintf(stdout, "# [%2d] fftw parameters:\n"\ "# [%2d] T = %3d\n"\ "# [%2d] Tstart = %3d\n"\ "# [%2d] l_LX_at = %3d\n"\ "# [%2d] l_LXstart_at = %3d\n"\ "# [%2d] FFTW_LOC_VOLUME = %3d\n", g_cart_id, g_cart_id, T, g_cart_id, Tstart, g_cart_id, l_LX_at, g_cart_id, l_LXstart_at, g_cart_id, FFTW_LOC_VOLUME); #ifdef MPI if(T==0) { fprintf(stderr, "[%2d] local T is zero; exit\n", g_cart_id); MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); exit(2); } #endif if(init_geometry() != 0) { fprintf(stderr, "ERROR from init_geometry\n"); #ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); #endif exit(1); } geometry(); /* read the gauge field */ alloc_gauge_field(&g_gauge_field, VOLUMEPLUSRAND); sprintf(filename, "%s.%.4d", gaugefilename_prefix, Nconf); if(g_cart_id==0) fprintf(stdout, "reading gauge field from file %s\n", filename); read_lime_gauge_field_doubleprec(filename); xchange_gauge(); /* measure the plaquette */ plaquette(&plaq); if(g_cart_id==0) fprintf(stdout, "measured plaquette value: %25.16e\n", plaq); if(do_gt==1) { /*********************************** * initialize gauge transformation ***********************************/ init_gauge_trafo(&gauge_trafo, 1.); apply_gt_gauge(gauge_trafo); plaquette(&plaq); if(g_cart_id==0) fprintf(stdout, "measured plaquette value after gauge trafo: %25.16e\n", plaq); } /**************************************** * allocate memory for the spinor fields ****************************************/ no_fields = 3; g_spinor_field = (double**)calloc(no_fields, sizeof(double*)); for(i=0; i<no_fields; i++) alloc_spinor_field(&g_spinor_field[i], VOLUMEPLUSRAND); /**************************************** * allocate memory for the contractions ****************************************/ disc = (double*)calloc( 8*VOLUME, sizeof(double)); if( disc == (double*)NULL ) { fprintf(stderr, "could not allocate memory for disc\n"); # ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); # endif exit(3); } for(ix=0; ix<8*VOLUME; ix++) disc[ix] = 0.; work = (double*)calloc(48*VOLUME, sizeof(double)); if( work == (double*)NULL ) { fprintf(stderr, "could not allocate memory for work\n"); # ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); # endif exit(3); } /**************************************** * prepare Fourier transformation arrays ****************************************/ in = (fftw_complex*)malloc(FFTW_LOC_VOLUME*sizeof(fftw_complex)); if(in==(fftw_complex*)NULL) { #ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); #endif exit(4); } /*********************************************** * start loop on source id.s ***********************************************/ for(sid=g_sourceid; sid<=g_sourceid2; sid++) { /******************************** * read the first propagator ********************************/ #ifdef MPI ratime = MPI_Wtime(); #else ratime = (double)clock() / CLOCKS_PER_SEC; #endif if(format==0) { sprintf(filename, "%s.%.4d.%.2d.inverted", filename_prefix, Nconf, sid); if(read_lime_spinor(g_spinor_field[2], filename, 0) != 0) break; } else if(format==1) { sprintf(filename, "%s.%.4d.%.5d.inverted", filename_prefix, Nconf, sid); if(read_cmi(g_spinor_field[2], filename) != 0) break; } xchange_field(g_spinor_field[2]); #ifdef MPI retime = MPI_Wtime(); #else retime = (double)clock() / CLOCKS_PER_SEC; #endif fprintf(stdout, "time to read prop.: %e seconds\n", retime-ratime); if(do_gt==1) { /****************************************** * gauge transform the propagators for sid ******************************************/ for(ix=0; ix<VOLUME; ix++) { _fv_eq_cm_ti_fv(spinor1, gauge_trafo+18*ix, g_spinor_field[2]+_GSI(ix)); _fv_eq_fv(g_spinor_field[2]+_GSI(ix), spinor1); } xchange_field(g_spinor_field[2]); } /************************************************ * calculate the source: apply Q_phi_tbc ************************************************/ #ifdef MPI ratime = MPI_Wtime(); #else ratime = (double)clock() / CLOCKS_PER_SEC; #endif Q_phi_tbc(g_spinor_field[0], g_spinor_field[2]); xchange_field(g_spinor_field[0]); #ifdef MPI retime = MPI_Wtime(); #else retime = (double)clock() / CLOCKS_PER_SEC; #endif if(g_cart_id==0) fprintf(stdout, "time to calculate source: %e seconds\n", retime-ratime); /************************************************ * HPE: apply BH5 ************************************************/ BH5(g_spinor_field[1], g_spinor_field[2]); for(ix=0; ix<8*VOLUME; ix++) {disc[ix] = 0.;} /* add new contractions to (existing) disc */ # ifdef MPI ratime = MPI_Wtime(); # else ratime = (double)clock() / CLOCKS_PER_SEC; # endif for(mu=0; mu<4; mu++) { /* loop on Lorentz index of the current */ iix = _GWI(mu,0,VOLUME); for(ix=0; ix<VOLUME; ix++) { /* loop on lattice sites */ _cm_eq_cm_ti_co(U_, &g_gauge_field[_GGI(ix, mu)], &co_phase_up[mu]); /* first contribution */ _fv_eq_cm_ti_fv(spinor1, U_, &g_spinor_field[1][_GSI(g_iup[ix][mu])]); _fv_eq_gamma_ti_fv(spinor2, mu, spinor1); _fv_mi_eq_fv(spinor2, spinor1); _co_eq_fv_dag_ti_fv(&w, &g_spinor_field[0][_GSI(ix)], spinor2); disc[iix ] -= 0.5 * w.re; disc[iix+1] -= 0.5 * w.im; /* second contribution */ _fv_eq_cm_dag_ti_fv(spinor1, U_, &g_spinor_field[1][_GSI(ix)]); _fv_eq_gamma_ti_fv(spinor2, mu, spinor1); _fv_pl_eq_fv(spinor2, spinor1); _co_eq_fv_dag_ti_fv(&w, &g_spinor_field[0][_GSI(g_iup[ix][mu])], spinor2); disc[iix ] -= 0.5 * w.re; disc[iix+1] -= 0.5 * w.im; iix += 2; } /* of ix */ } /* of mu */ #ifdef MPI retime = MPI_Wtime(); #else retime = (double)clock() / CLOCKS_PER_SEC; #endif if(g_cart_id==0) fprintf(stdout, "[%2d] time to contract cvc: %e seconds\n", g_cart_id, retime-ratime); #ifdef MPI ratime = MPI_Wtime(); #else ratime = (double)clock() / CLOCKS_PER_SEC; #endif /* Fourier transform data, copy to work */ for(mu=0; mu<4; mu++) { memcpy((void*)in, (void*)(disc+_GWI(mu,0,VOLUME)), 2*VOLUME*sizeof(double)); #ifdef MPI fftwnd_mpi(plan_p, 1, in, NULL, FFTW_NORMAL_ORDER); #else fftwnd_one(plan_p, in, NULL); #endif memcpy((void*)(work+_GWI(mu,0,VOLUME)), (void*)in, 2*VOLUME*sizeof(double)); } /******************************** * read the second propagator ********************************/ #ifdef MPI ratime = MPI_Wtime(); #else ratime = (double)clock() / CLOCKS_PER_SEC; #endif if(format==0) { sprintf(filename, "%s.%.4d.%.2d.inverted", filename_prefix, Nconf, sid+g_resume); if(read_lime_spinor(g_spinor_field[2], filename, 0) != 0) break; } else if(format==1) { sprintf(filename, "%s.%.4d.%.5d.inverted", filename_prefix, Nconf, sid+g_resume); if(read_cmi(g_spinor_field[2], filename) != 0) break; } xchange_field(g_spinor_field[2]); #ifdef MPI retime = MPI_Wtime(); #else retime = (double)clock() / CLOCKS_PER_SEC; #endif fprintf(stdout, "time to read prop.: %e seconds\n", retime-ratime); if(do_gt==1) { /****************************************** * gauge transform the propagators for sid ******************************************/ for(ix=0; ix<VOLUME; ix++) { _fv_eq_cm_ti_fv(spinor1, gauge_trafo+18*ix, g_spinor_field[2]+_GSI(ix)); _fv_eq_fv(g_spinor_field[2]+_GSI(ix), spinor1); } xchange_field(g_spinor_field[2]); } /************************************************ * calculate the source: apply Q_phi_tbc ************************************************/ #ifdef MPI ratime = MPI_Wtime(); #else ratime = (double)clock() / CLOCKS_PER_SEC; #endif Q_phi_tbc(g_spinor_field[0], g_spinor_field[2]); xchange_field(g_spinor_field[0]); #ifdef MPI retime = MPI_Wtime(); #else retime = (double)clock() / CLOCKS_PER_SEC; #endif if(g_cart_id==0) fprintf(stdout, "time to calculate source: %e seconds\n", retime-ratime); /************************************************ * HPE: apply BH5 ************************************************/ BH5(g_spinor_field[1], g_spinor_field[2]); for(ix=0; ix<8*VOLUME; ix++) disc[ix] = 0.; /* add new contractions to (existing) disc */ # ifdef MPI ratime = MPI_Wtime(); # else ratime = (double)clock() / CLOCKS_PER_SEC; # endif for(mu=0; mu<4; mu++) { /* loop on Lorentz index of the current */ iix = _GWI(mu,0,VOLUME); for(ix=0; ix<VOLUME; ix++) { /* loop on lattice sites */ _cm_eq_cm_ti_co(U_, &g_gauge_field[_GGI(ix, mu)], &co_phase_up[mu]); /* first contribution */ _fv_eq_cm_ti_fv(spinor1, U_, &g_spinor_field[1][_GSI(g_iup[ix][mu])]); _fv_eq_gamma_ti_fv(spinor2, mu, spinor1); _fv_mi_eq_fv(spinor2, spinor1); _co_eq_fv_dag_ti_fv(&w, &g_spinor_field[0][_GSI(ix)], spinor2); disc[iix ] -= 0.5 * w.re; disc[iix+1] -= 0.5 * w.im; /* second contribution */ _fv_eq_cm_dag_ti_fv(spinor1, U_, &g_spinor_field[1][_GSI(ix)]); _fv_eq_gamma_ti_fv(spinor2, mu, spinor1); _fv_pl_eq_fv(spinor2, spinor1); _co_eq_fv_dag_ti_fv(&w, &g_spinor_field[0][_GSI(g_iup[ix][mu])], spinor2); disc[iix ] -= 0.5 * w.re; disc[iix+1] -= 0.5 * w.im; iix += 2; } /* of ix */ } /* of mu */ #ifdef MPI retime = MPI_Wtime(); #else retime = (double)clock() / CLOCKS_PER_SEC; #endif if(g_cart_id==0) fprintf(stdout, "[%2d] time to contract cvc: %e seconds\n", g_cart_id, retime-ratime); #ifdef MPI ratime = MPI_Wtime(); #else ratime = (double)clock() / CLOCKS_PER_SEC; #endif /* Fourier transform data, copy to work */ for(mu=0; mu<4; mu++) { memcpy((void*)in, (void*)(disc+_GWI(mu,0,VOLUME)), 2*VOLUME*sizeof(double)); #ifdef MPI fftwnd_mpi(plan_m, 1, in, NULL, FFTW_NORMAL_ORDER); #else fftwnd_one(plan_m, in, NULL); #endif memcpy((void*)(work+_GWI(4+mu,0,VOLUME)), (void*)in, 2*VOLUME*sizeof(double)); } fnorm = 1. / ((double)(T_global*LX*LY*LZ)); fprintf(stdout, "fnorm = %e\n", fnorm); for(mu=0; mu<4; mu++) { for(nu=0; nu<4; nu++) { cp1 = (complex*)(work+_GWI(mu,0,VOLUME)); cp2 = (complex*)(work+_GWI(4+nu,0,VOLUME)); cp3 = (complex*)(work+_GWI(8+4*mu+nu,0,VOLUME)); for(x0=0; x0<T; x0++) { q[0] = (double)(x0+Tstart) / (double)T_global; for(x1=0; x1<LX; x1++) { q[1] = (double)(x1) / (double)LX; for(x2=0; x2<LY; x2++) { q[2] = (double)(x2) / (double)LY; for(x3=0; x3<LZ; x3++) { q[3] = (double)(x3) / (double)LZ; ix = g_ipt[x0][x1][x2][x3]; w.re = cos( M_PI * (q[mu]-q[nu]) ); w.im = sin( M_PI * (q[mu]-q[nu]) ); _co_eq_co_ti_co(&w1, cp1, cp2); _co_eq_co_ti_co(cp3, &w1, &w); _co_ti_eq_re(cp3, fnorm); cp1++; cp2++; cp3++; } } } } } } /* save the result in momentum space */ sprintf(filename, "cvc_hpe5_ft.%.4d.%.2d", Nconf, sid); write_contraction(work+_GWI(8,0,VOLUME), NULL, filename, 16, 0, 0); #ifdef MPI retime = MPI_Wtime(); #else retime = (double)clock() / CLOCKS_PER_SEC; #endif if(g_cart_id==0) fprintf(stdout, "time to save cvc results: %e seconds\n", retime-ratime); } /* of loop on sid */ /*********************************************** * free the allocated memory, finalize ***********************************************/ free(g_gauge_field); for(i=0; i<no_fields; i++) free(g_spinor_field[i]); free(g_spinor_field); free_geometry(); fftw_free(in); free(disc); free(work); #ifdef MPI fftwnd_mpi_destroy_plan(plan_p); fftwnd_mpi_destroy_plan(plan_m); free(status); MPI_Finalize(); #else fftwnd_destroy_plan(plan_p); fftwnd_destroy_plan(plan_m); #endif return(0); }
int main(int argc, char **argv) { int c, i, mu; int count = 0; int filename_set = 0; int l_LX_at, l_LXstart_at; int x0, x1, x2, x3, ix, it, iy; int sid1, sid2, status, gid; int Thp1, Lhp1, nmom, shift[4], shift2[4], nperm; double *disc1=NULL, *disc2=NULL; double *work = NULL; double r2, fnorm; char filename[100]; double ratime, retime; complex w; int *mom_tab=NULL, *mom_members=NULL, *mom_perm=NULL; FILE *ofs; int perm_tab_3[6][3]; perm_tab_3[0][0] = 0; perm_tab_3[0][1] = 1; perm_tab_3[0][2] = 2; perm_tab_3[1][0] = 1; perm_tab_3[1][1] = 2; perm_tab_3[1][2] = 0; perm_tab_3[2][0] = 2; perm_tab_3[2][1] = 0; perm_tab_3[2][2] = 1; perm_tab_3[3][0] = 0; perm_tab_3[3][1] = 2; perm_tab_3[3][2] = 1; perm_tab_3[4][0] = 1; perm_tab_3[4][1] = 0; perm_tab_3[4][2] = 2; perm_tab_3[5][0] = 2; perm_tab_3[5][1] = 1; perm_tab_3[5][2] = 0; while ((c = getopt(argc, argv, "h?f:")) != -1) { switch (c) { case 'f': strcpy(filename, optarg); filename_set=1; break; case 'h': case '?': default: usage(); break; } } /* set the default values */ if(filename_set==0) strcpy(filename, "cvc.input"); fprintf(stdout, "# Reading input from file %s\n", filename); read_input_parser(filename); /* some checks on the input data */ if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) { if(g_proc_id==0) fprintf(stdout, "T and L's must be set\n"); usage(); } if(g_kappa == 0.) { if(g_proc_id==0) fprintf(stdout, "kappa should be > 0.n"); usage(); } fprintf(stdout, "\n**************************************************\n"); fprintf(stdout, "* jc_ud_tr\n"); fprintf(stdout, "**************************************************\n\n"); /* initialize */ T = T_global; Tstart = 0; l_LX_at = LX; l_LXstart_at = 0; FFTW_LOC_VOLUME = T*LX*LY*LZ; fprintf(stdout, "# [%2d] parameters:\n"\ "# T = %3d\n"\ "# Tstart = %3d\n"\ "# l_LX_at = %3d\n"\ "# l_LXstart_at = %3d\n"\ "# FFTW_LOC_VOLUME = %3d\n", g_cart_id, T, Tstart, l_LX_at, l_LXstart_at, FFTW_LOC_VOLUME); if(init_geometry() != 0) { fprintf(stderr, "ERROR from init_geometry\n"); exit(1); } geometry(); Thp1 = T /2 + 1; Lhp1 = LX/2 + 1; nmom = 3*Lhp1 - 2; /**************************************** * initialize the momenta ****************************************/ mom_tab = (int*)calloc(3*nmom, sizeof(int)); if( mom_tab==NULL) { fprintf(stderr, "could not allocate memory for mom_tab\n"); exit(4); } mom_tab[0] = 0; mom_tab[1] = 0; mom_tab[2] = 0; count=3; for(x1=1; x1<Lhp1; x1++) { mom_tab[count ] = x1; mom_tab[count+1] = 0; mom_tab[count+2] = 0; mom_tab[count+3] = x1; mom_tab[count+4] = 1; mom_tab[count+5] = 0; mom_tab[count+6] = x1; mom_tab[count+7] = 1; mom_tab[count+8] = 1; count+=9; } mom_members = (int*)calloc(Thp1*nmom, sizeof(int)); mom_perm = (int*)calloc(nmom, sizeof(int)); mom_perm[0] = 1; mom_perm[1] = 3; mom_perm[2] = 3; mom_perm[3] = 1; for (i=2; i<Lhp1; i++) { mom_perm[3*i-2] = 3; mom_perm[3*i-1] = 6; mom_perm[3*i ] = 3; } for (i=0; i<nmom; i++) fprintf(stdout, "# %d\t(%d, %d, %d)\t%d\n", i, mom_tab[3*i], mom_tab[3*i+1], mom_tab[3*i+2], mom_perm[i]); /**************************************** * allocate memory for the contractions ****************************************/ disc1 = (double*)calloc(8*VOLUME, sizeof(double)); disc2 = (double*)calloc(8*VOLUME, sizeof(double)); if( disc1==NULL || disc2==NULL) { fprintf(stderr, "could not allocate memory for disc\n"); exit(3); } work = (double*)calloc(8*Thp1*nmom, sizeof(double)); if( work == (double*)NULL ) { fprintf(stderr, "could not allocate memory for work\n"); exit(3); } /*********************************************** * start loop on gauge id.s ***********************************************/ for(gid=g_gaugeid; gid<=g_gaugeid2; gid++) { for(ix=0; ix<8*Thp1*nmom; ix++) work[ix] = 0.; for(ix=0; ix<8*VOLUME; ix++) disc2[ix] = 0.; for (i=0; i<Thp1*nmom; i++) mom_members[i] = 0; /*********************************************** * start loop on source id.s ***********************************************/ ratime = (double)clock() / CLOCKS_PER_SEC; for(sid1=g_sourceid; sid1<=g_sourceid2; sid1+=g_sourceid_step) { sprintf(filename, "jc_ud_x.%.4d.%.4d", gid, sid1); if(read_lime_contraction(disc1, filename, 4, 0) != 0) break; for(ix=0; ix<8*VOLUME; ix++) disc2[ix] += disc1[ix]; count=0; for (it=0; it<Thp1; it++) { shift[0] = it; shift2[0] = it; for (i=0; i<nmom; i++) { shift[1] = mom_tab[3*i ]; shift[2] = mom_tab[3*i+1]; shift[3] = mom_tab[3*i+2]; for (mu=0; mu<mom_perm[i]; mu++) { // fprintf(stdout, "# mom=%d,\tperm=%d\n", i, mom_perm[i]); shift2[1] = shift[perm_tab_3[mu][0]+1]; shift2[2] = shift[perm_tab_3[mu][1]+1]; shift2[3] = shift[perm_tab_3[mu][2]+1]; for(x0=shift2[0]; x0<T; x0++) { for(x1=shift2[1]; x1<LX; x1++) { for(x2=shift2[2]; x2<LY; x2++) { for(x3=shift2[3]; x3<LZ; x3++) { ix = g_ipt[x0][x1][x2][x3]; iy = g_ipt[x0-shift2[0]][x1-shift2[1]][x2-shift2[2]][x3-shift2[3]]; // fprintf(stdout, "shift2=(%d,%d,%d,%d); x=(%d,%d,%d,%d); ix=%d, iy=%d\n", // shift2[0], shift2[1],shift2[2],shift2[3], x0, x1, x2, x3, ix, iy); _co_eq_co_ti_co(&w, (complex*)(disc1+_GWI(0,ix,VOLUME)), (complex*)(disc1+_GWI(0,iy,VOLUME))); work[2*( count) ] -= w.re; work[2*( count)+1] -= w.im; _co_eq_co_ti_co(&w, (complex*)(disc1+_GWI(1,ix,VOLUME)), (complex*)(disc1+_GWI(1,iy,VOLUME))); work[2*( Thp1*nmom+count) ] -= w.re; work[2*( Thp1*nmom+count)+1] -= w.im; _co_eq_co_ti_co(&w, (complex*)(disc1+_GWI(2,ix,VOLUME)), (complex*)(disc1+_GWI(2,iy,VOLUME))); work[2*(2*Thp1*nmom+count) ] -= w.re; work[2*(2*Thp1*nmom+count)+1] -= w.im; _co_eq_co_ti_co(&w, (complex*)(disc1+_GWI(3,ix,VOLUME)), (complex*)(disc1+_GWI(3,iy,VOLUME))); work[2*(3*Thp1*nmom+count) ] -= w.re; work[2*(3*Thp1*nmom+count)+1] -= w.im; }}}} } count++; } } /* of it=0,...,T/2 */ } /* of loop on sid1 */ count=0; for (it=0; it<Thp1; it++) { shift[0] = it; shift2[0] = it; for (i=0; i<nmom; i++) { shift[1] = mom_tab[3*i ]; shift[2] = mom_tab[3*i+1]; shift[3] = mom_tab[3*i+2]; for (mu=0; mu<mom_perm[i]; mu++) { // fprintf(stdout, "# mom=%d,\tperm=%d\n", i, mom_perm[i]); shift2[1] = shift[perm_tab_3[mu][0]+1]; shift2[2] = shift[perm_tab_3[mu][1]+1]; shift2[3] = shift[perm_tab_3[mu][2]+1]; for(x0=shift2[0]; x0<T; x0++) { for(x1=shift2[1]; x1<LX; x1++) { for(x2=shift2[2]; x2<LY; x2++) { for(x3=shift2[3]; x3<LZ; x3++) { ix = g_ipt[x0][x1][x2][x3]; iy = g_ipt[x0-shift2[0]][x1-shift2[1]][x2-shift2[2]][x3-shift2[3]]; // fprintf(stdout, "shift2=(%d,%d,%d,%d); x=(%d,%d,%d,%d); ix=%d, iy=%d\n", // shift2[0], shift2[1],shift2[2],shift2[3], x0, x1, x2, x3, ix, iy); _co_eq_co_ti_co(&w, (complex*)(disc2+_GWI(0,ix,VOLUME)), (complex*)(disc2+_GWI(0,iy,VOLUME))); work[2*( count) ] += w.re; work[2*( count)+1] += w.im; _co_eq_co_ti_co(&w, (complex*)(disc2+_GWI(1,ix,VOLUME)), (complex*)(disc2+_GWI(1,iy,VOLUME))); work[2*( Thp1*nmom+count) ] += w.re; work[2*( Thp1*nmom+count)+1] += w.im; _co_eq_co_ti_co(&w, (complex*)(disc2+_GWI(2,ix,VOLUME)), (complex*)(disc2+_GWI(2,iy,VOLUME))); work[2*(2*Thp1*nmom+count) ] += w.re; work[2*(2*Thp1*nmom+count)+1] += w.im; _co_eq_co_ti_co(&w, (complex*)(disc2+_GWI(3,ix,VOLUME)), (complex*)(disc2+_GWI(3,iy,VOLUME))); work[2*(3*Thp1*nmom+count) ] += w.re; work[2*(3*Thp1*nmom+count)+1] += w.im; mom_members[count]++; }}}} } count++; } } /* of it=0,...,T/2 */ /* normalization */ count=0; for (it=0; it<Thp1; it++) { for (i=0; i<nmom; i++) { fprintf(stdout, "%d\t%d\t%d\n", it, i, mom_members[count]); count++; } } for (mu=0; mu<4; mu++) { count=0; for (it=0; it<Thp1; it++) { for(i=0; i<nmom; i++) { fnorm = 1. / ( (double)mom_members[count] * (double)(g_sourceid2-g_sourceid+1) * (double)(g_sourceid2-g_sourceid) ); // fprintf(stdout, "# fnorm(%d,%2d) = %25.16e\n", mu, count, fnorm); work[2*(mu*Thp1*nmom+count) ] *= fnorm; work[2*(mu*Thp1*nmom+count)+1] *= fnorm; count++; } } } retime = (double)clock() / CLOCKS_PER_SEC; if(g_cart_id == 0) fprintf(stdout, "# time for building correl.: %e seconds\n", retime-ratime); /************************************************ * save results ************************************************/ sprintf(filename, "jc_ud_tr.%4d", gid); ofs = fopen(filename, "w"); if (ofs==NULL) { fprintf(stderr, "Error, could not open file %s for writing\n", filename); } for(mu=0; mu<4; mu++) { count=0; for (it=0; it<Thp1; it++) { for (i=0; i<nmom; i++) { r2 = sqrt( mom_tab[3*i]*mom_tab[3*i] + mom_tab[3*i+1]*mom_tab[3*i+1] + mom_tab[3*i+2]*mom_tab[3*i+2] ); fprintf(ofs, "%3d%3d%3d%3d%16.7e%25.16e%25.16e\n", it, mom_tab[3*i], mom_tab[3*i+1],mom_tab[3*i+2], r2, work[2*(mu*Thp1*nmom+count)], work[2*(mu*Thp1*nmom+count)+1]); count++; } } } fclose(ofs); } /* of loop on gid */ /*********************************************** * free the allocated memory, finalize ***********************************************/ free_geometry(); free(disc1); free(disc2); free(work); free(mom_tab); free(mom_perm); free(mom_members); return(0); }
int main(int argc, char **argv) { int c, i, mu, nu; int count = 0; int filename_set = 0; int dims[4] = {0,0,0,0}; int l_LX_at, l_LXstart_at; int x0, x1, x2, x3, ix, iix; int dxm[4], dxn[4], ixpm, ixpn; int sid; double *disc = (double*)NULL; double *disc2 = (double*)NULL; double *work = (double*)NULL; double q[4], fnorm; int verbose = 0; int do_gt = 0; char filename[100], contype[200]; double ratime, retime; double plaq, _2kappamu, hpe3_coeff, onepmutilde2, mutilde2; double spinor1[24], spinor2[24], U_[18], U1_[18], U2_[18]; double *gauge_trafo=(double*)NULL; complex w, w1, w2, *cp1, *cp2, *cp3; FILE *ofs; fftw_complex *in=(fftw_complex*)NULL; #ifdef MPI fftwnd_mpi_plan plan_p, plan_m; #else fftwnd_plan plan_p, plan_m; #endif #ifdef MPI MPI_Init(&argc, &argv); #endif while ((c = getopt(argc, argv, "h?vgf:")) != -1) { switch (c) { case 'v': verbose = 1; break; case 'g': do_gt = 1; break; case 'f': strcpy(filename, optarg); filename_set=1; break; case 'h': case '?': default: usage(); break; } } /* set the default values */ if(filename_set==0) strcpy(filename, "cvc.input"); fprintf(stdout, "# Reading input from file %s\n", filename); read_input_parser(filename); /* some checks on the input data */ if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) { if(g_proc_id==0) fprintf(stdout, "T and L's must be set\n"); usage(); } if(g_kappa == 0.) { if(g_proc_id==0) fprintf(stdout, "kappa should be > 0.n"); usage(); } /* initialize MPI parameters */ mpi_init(argc, argv); /* initialize fftw */ dims[0]=T_global; dims[1]=LX; dims[2]=LY; dims[3]=LZ; #ifdef MPI plan_p = fftwnd_mpi_create_plan(g_cart_grid, 4, dims, FFTW_BACKWARD, FFTW_MEASURE); plan_m = fftwnd_mpi_create_plan(g_cart_grid, 4, dims, FFTW_FORWARD, FFTW_MEASURE); fftwnd_mpi_local_sizes(plan_p, &T, &Tstart, &l_LX_at, &l_LXstart_at, &FFTW_LOC_VOLUME); #else plan_p = fftwnd_create_plan(4, dims, FFTW_BACKWARD, FFTW_MEASURE | FFTW_IN_PLACE); plan_m = fftwnd_create_plan(4, dims, FFTW_FORWARD, FFTW_MEASURE | FFTW_IN_PLACE); T = T_global; Tstart = 0; l_LX_at = LX; l_LXstart_at = 0; FFTW_LOC_VOLUME = T*LX*LY*LZ; #endif fprintf(stdout, "# [%2d] fftw parameters:\n"\ "# [%2d] T = %3d\n"\ "# [%2d] Tstart = %3d\n"\ "# [%2d] l_LX_at = %3d\n"\ "# [%2d] l_LXstart_at = %3d\n"\ "# [%2d] FFTW_LOC_VOLUME = %3d\n", g_cart_id, g_cart_id, T, g_cart_id, Tstart, g_cart_id, l_LX_at, g_cart_id, l_LXstart_at, g_cart_id, FFTW_LOC_VOLUME); #ifdef MPI if(T==0) { fprintf(stderr, "[%2d] local T is zero; exit\n", g_cart_id); MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); exit(2); } #endif if(init_geometry() != 0) { fprintf(stderr, "ERROR from init_geometry\n"); #ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); #endif exit(1); } geometry(); /* read the gauge field */ alloc_gauge_field(&g_gauge_field, VOLUMEPLUSRAND); sprintf(filename, "%s.%.4d", gaugefilename_prefix, Nconf); if(g_cart_id==0) fprintf(stdout, "reading gauge field from file %s\n", filename); read_lime_gauge_field_doubleprec(filename); #ifdef MPI xchange_gauge(); #endif /* measure the plaquette */ plaquette(&plaq); if(g_cart_id==0) fprintf(stdout, "measured plaquette value: %25.16e\n", plaq); if(do_gt==1) { /*********************************** * initialize gauge transformation ***********************************/ init_gauge_trafo(&gauge_trafo, 1.); apply_gt_gauge(gauge_trafo); plaquette(&plaq); if(g_cart_id==0) fprintf(stdout, "measured plaquette value after gauge trafo: %25.16e\n", plaq); } /**************************************** * allocate memory for the spinor fields ****************************************/ no_fields = 3; g_spinor_field = (double**)calloc(no_fields, sizeof(double*)); for(i=0; i<no_fields; i++) alloc_spinor_field(&g_spinor_field[i], VOLUMEPLUSRAND); /**************************************** * allocate memory for the contractions ****************************************/ disc = (double*)calloc( 8*VOLUME, sizeof(double)); if( disc == (double*)NULL ) { fprintf(stderr, "could not allocate memory for disc\n"); # ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); # endif exit(3); } for(ix=0; ix<8*VOLUME; ix++) disc[ix] = 0.; disc2 = (double*)calloc( 8*VOLUME, sizeof(double)); if( disc2 == (double*)NULL ) { fprintf(stderr, "could not allocate memory for disc2\n"); # ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); # endif exit(3); } for(ix=0; ix<8*VOLUME; ix++) disc2[ix] = 0.; work = (double*)calloc(48*VOLUME, sizeof(double)); if( work == (double*)NULL ) { fprintf(stderr, "could not allocate memory for work\n"); # ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); # endif exit(3); } /**************************************** * prepare Fourier transformation arrays ****************************************/ in = (fftw_complex*)malloc(FFTW_LOC_VOLUME*sizeof(fftw_complex)); if(in==(fftw_complex*)NULL) { #ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); #endif exit(4); } /************************************************ * HPE: calculate coeff. of 3rd order term ************************************************/ _2kappamu = 2. * g_kappa * g_mu; onepmutilde2 = 1. + _2kappamu * _2kappamu; mutilde2 = _2kappamu * _2kappamu; hpe3_coeff = 16. * g_kappa*g_kappa*g_kappa*g_kappa * (1. + 6. * mutilde2 + mutilde2*mutilde2) / onepmutilde2 / onepmutilde2 / onepmutilde2 / onepmutilde2; /* hpe3_coeff = 8. * g_kappa*g_kappa*g_kappa * \ (1. + 6.*_2kappamu*_2kappamu + _2kappamu*_2kappamu*_2kappamu*_2kappamu) / (1. + _2kappamu*_2kappamu) / (1. + _2kappamu*_2kappamu) / (1. + _2kappamu*_2kappamu) / (1. + _2kappamu*_2kappamu); */ fprintf(stdout, "hpe3_coeff = %25.16e\n", hpe3_coeff); /************************************************ * HPE: calculate the plaquette terms ************************************************/ for(ix=0; ix<VOLUME; ix++) { for(mu=0; mu<4; mu++) { for(i=1; i<4; i++) { nu = (mu+i)%4; _cm_eq_cm_ti_cm(U1_, g_gauge_field+_GGI(ix,mu), g_gauge_field+_GGI(g_iup[ix][mu],nu) ); _cm_eq_cm_ti_cm(U2_, g_gauge_field+_GGI(ix,nu), g_gauge_field+_GGI(g_iup[ix][nu],mu) ); _cm_eq_cm_ti_cm_dag(U_, U1_, U2_); _co_eq_tr_cm(&w1, U_); iix = g_idn[ix][nu]; _cm_eq_cm_ti_cm(U1_, g_gauge_field+_GGI(iix,mu), g_gauge_field+_GGI(g_iup[iix][mu],nu) ); _cm_eq_cm_ti_cm(U2_, g_gauge_field+_GGI(iix,nu), g_gauge_field+_GGI(g_iup[iix][nu],mu) ); _cm_eq_cm_ti_cm_dag(U_, U1_, U2_); _co_eq_tr_cm(&w2, U_); disc2[_GWI(mu,ix,VOLUME)+1] += hpe3_coeff * (w1.im - w2.im); /* _cm_eq_cm_ti_cm(U1_, g_gauge_field+_GGI(g_idn[ix][nu],nu), g_gauge_field+_GGI(ix,mu) ); _cm_eq_cm_ti_cm(U2_, g_gauge_field+_GGI(g_idn[ix][nu],mu), g_gauge_field+_GGI(g_iup[g_idn[ix][nu]][mu], nu) ); _cm_eq_cm_ti_cm_dag(U_, U1_, U2_); _co_eq_tr_cm(&w2, U_); disc2[_GWI(mu,ix,VOLUME)+1] += hpe3_coeff * (w1.im + w2.im); */ /* fprintf(stdout, "mu=%1d, ix=%5d, nu=%1d, w1=%25.16e +i %25.16e; w2=%25.16e +i %25.16e\n", mu, ix, nu, w1.re, w1.im, w2.re, w2.im); */ } /* of nu */ /**************************************** * - in case lattice size equals 4 * calculate additional loop term * - _NOTE_ the possible minus sign from * the fermionic boundary conditions ****************************************/ if(dims[mu]==4) { wilson_loop(&w, ix, mu, dims[mu]); fnorm = -64. * g_kappa*g_kappa*g_kappa*g_kappa / onepmutilde2 / onepmutilde2 / onepmutilde2 / onepmutilde2; disc2[_GWI(mu,ix,VOLUME)+1] += fnorm * w.im; /* fprintf(stdout, "loop contribution: ix=%5d, mu=%2d, fnorm=%25.16e, w=%25.16e\n", ix, mu, fnorm, w.im); */ } /* fprintf(stdout, "-------------------------------------------\n"); fprintf(stdout, "disc2[ix=%d,mu=%d] = %25.16e +i %25.16e\n", ix, mu, disc2[_GWI(mu,ix,VOLUME)], disc2[_GWI(mu,ix,VOLUME)+1]); fprintf(stdout, "-------------------------------------------\n"); */ } } /* sprintf(filename, "avc_disc_hpe5_3rd.%.4d", Nconf); ofs = fopen(filename, "w"); for(ix=0; ix<VOLUME; ix++) { for(mu=0; mu<4; mu++) { fprintf(ofs, "%6d%3d%25.16e\t%25.16e\n", ix, mu, disc[_GWI(mu,ix,VOLUME)], disc[_GWI(mu,ix,VOLUME)+1]); } } fclose(ofs); for(ix=0; ix<8*VOLUME; ix++) disc[ix] = 0.; */ /* for(x0=0; x0<T; x0++) { for(x1=0; x1<LX; x1++) { for(x2=0; x2<LY; x2++) { for(x3=0; x3<LZ; x3++) { ix = g_ipt[x0][x1][x2][x3]; for(mu=0; mu<4; mu++) { dxm[0]=0; dxm[1]=0; dxm[2]=0; dxm[3]=0; dxm[mu]=1; for(i=1; i<4; i++) { nu = (mu+i)%4; dxn[0]=0; dxn[1]=0; dxn[2]=0; dxn[3]=0; dxn[nu]=1; ixpm = g_ipt[(x0+dxm[0]+T)%T][(x1+dxm[1]+LX)%LX][(x2+dxm[2]+LY)%LY][(x3+dxm[3]+LZ)%LZ]; ixpn = g_ipt[(x0+dxn[0]+T)%T][(x1+dxn[1]+LX)%LX][(x2+dxn[2]+LY)%LY][(x3+dxn[3]+LZ)%LZ]; _cm_eq_cm_ti_cm(U1_, g_gauge_field + 72*ix+18*mu, g_gauge_field + 72*ixpm+18*nu ); _cm_eq_cm_ti_cm(U2_, g_gauge_field + 72*ix+18*nu, g_gauge_field + 72*ixpn+18*mu ); _cm_eq_cm_ti_cm_dag(U_, U1_, U2_); _co_eq_tr_cm(&w1, U_); ixpm = g_ipt[(x0+dxm[0]-dxn[0]+T)%T][(x1+dxm[1]-dxn[1]+LX)%LX][(x2+dxm[2]-dxn[2]+LY)%LY][(x3+dxm[3]-dxn[3]+LZ)%LZ]; ixpn = g_ipt[(x0-dxn[0]+T)%T][(x1-dxn[1]+LX)%LX][(x2-dxn[2]+LY)%LY][(x3-dxn[3]+LZ)%LZ]; _cm_eq_cm_ti_cm(U1_, g_gauge_field + 72*ixpn+18*nu, g_gauge_field + 72*ix+18*mu); _cm_eq_cm_ti_cm(U2_, g_gauge_field + 72*ixpn+18*mu, g_gauge_field + 72*ixpm+18*nu); _cm_eq_cm_ti_cm_dag(U_, U1_, U2_); _co_eq_tr_cm(&w2, U_); disc2[_GWI(mu,ix,VOLUME)+1] += hpe3_coeff * (w1.im + w2.im); fprintf(stdout, "mu=%1d, ix=%5d, nu=%1d, w1=%25.16e; w2=%25.16e\n", mu, ix, nu, w1.im, w2.im); } fprintf(stdout, "-------------------------------------------\n"); fprintf(stdout, "disc2[ix=%d,mu=%d] = %25.16e +i %25.16e\n", ix, mu, disc2[_GWI(mu,ix,VOLUME)], disc2[_GWI(mu,ix,VOLUME)+1]); fprintf(stdout, "-------------------------------------------\n"); } } } } } */ /*********************************************** * start loop on source id.s ***********************************************/ for(sid=g_sourceid; sid<=g_sourceid2; sid+=g_sourceid_step) { /* read the new propagator */ #ifdef MPI ratime = MPI_Wtime(); #else ratime = (double)clock() / CLOCKS_PER_SEC; #endif if(format==0) { sprintf(filename, "%s.%.4d.%.2d.inverted", filename_prefix, Nconf, sid); if(read_lime_spinor(g_spinor_field[2], filename, 0) != 0) break; } else if(format==1) { sprintf(filename, "%s.%.4d.%.5d.inverted", filename_prefix, Nconf, sid); if(read_cmi(g_spinor_field[2], filename) != 0) break; } xchange_field(g_spinor_field[2]); #ifdef MPI retime = MPI_Wtime(); #else retime = (double)clock() / CLOCKS_PER_SEC; #endif fprintf(stdout, "time to read prop.: %e seconds\n", retime-ratime); if(do_gt==1) { /****************************************** * gauge transform the propagators for sid ******************************************/ for(ix=0; ix<VOLUME; ix++) { _fv_eq_cm_ti_fv(spinor1, gauge_trafo+18*ix, g_spinor_field[2]+_GSI(ix)); _fv_eq_fv(g_spinor_field[2]+_GSI(ix), spinor1); } xchange_field(g_spinor_field[2]); } count++; /************************************************ * calculate the source: apply Q_phi_tbc ************************************************/ #ifdef MPI ratime = MPI_Wtime(); #else ratime = (double)clock() / CLOCKS_PER_SEC; #endif Q_phi_tbc(g_spinor_field[0], g_spinor_field[2]); xchange_field(g_spinor_field[0]); #ifdef MPI retime = MPI_Wtime(); #else retime = (double)clock() / CLOCKS_PER_SEC; #endif if(g_cart_id==0) fprintf(stdout, "time to calculate source: %e seconds\n", retime-ratime); /************************************************ * HPE: apply BH5 ************************************************/ BH5(g_spinor_field[1], g_spinor_field[2]); /* add new contractions to (existing) disc */ # ifdef MPI ratime = MPI_Wtime(); # else ratime = (double)clock() / CLOCKS_PER_SEC; # endif for(mu=0; mu<4; mu++) { /* loop on Lorentz index of the current */ iix = _GWI(mu,0,VOLUME); for(ix=0; ix<VOLUME; ix++) { /* loop on lattice sites */ _cm_eq_cm_ti_co(U_, &g_gauge_field[_GGI(ix, mu)], &co_phase_up[mu]); /* first contribution */ _fv_eq_cm_ti_fv(spinor1, U_, &g_spinor_field[1][_GSI(g_iup[ix][mu])]); _fv_eq_gamma_ti_fv(spinor2, mu, spinor1); _fv_mi_eq_fv(spinor2, spinor1); _co_eq_fv_dag_ti_fv(&w, &g_spinor_field[0][_GSI(ix)], spinor2); disc[iix ] -= 0.5 * w.re; disc[iix+1] -= 0.5 * w.im; /* second contribution */ _fv_eq_cm_dag_ti_fv(spinor1, U_, &g_spinor_field[1][_GSI(ix)]); _fv_eq_gamma_ti_fv(spinor2, mu, spinor1); _fv_pl_eq_fv(spinor2, spinor1); _co_eq_fv_dag_ti_fv(&w, &g_spinor_field[0][_GSI(g_iup[ix][mu])], spinor2); disc[iix ] -= 0.5 * w.re; disc[iix+1] -= 0.5 * w.im; iix += 2; } /* of ix */ } /* of mu */ #ifdef MPI retime = MPI_Wtime(); #else retime = (double)clock() / CLOCKS_PER_SEC; #endif if(g_cart_id==0) fprintf(stdout, "# time to contract cvc: %e seconds\n", retime-ratime); /************************************************ * save results for count = multiple of Nsave ************************************************/ if(count%Nsave == 0) { if(g_cart_id == 0) fprintf(stdout, "save results for count = %d\n", count); fnorm = 1. / ( (double)count * g_prop_normsqr ); if(g_cart_id==0) fprintf(stdout, "# X-fnorm = %e\n", fnorm); for(mu=0; mu<4; mu++) { for(ix=0; ix<VOLUME; ix++) { work[_GWI(mu,ix,VOLUME) ] = disc[_GWI(mu,ix,VOLUME) ] * fnorm + disc2[_GWI(mu,ix,VOLUME) ]; work[_GWI(mu,ix,VOLUME)+1] = disc[_GWI(mu,ix,VOLUME)+1] * fnorm + disc2[_GWI(mu,ix,VOLUME)+1]; } } /* save the result in position space */ sprintf(filename, "cvc_hpe5_X.%.4d.%.4d", Nconf, count); sprintf(contype, "cvc-disc-all-hpe-05-X"); write_lime_contraction(work, filename, 64, 4, contype, Nconf, count); /* sprintf(filename, "cvc_hpe5_Xascii.%.4d.%.4d", Nconf, count); write_contraction(work, NULL, filename, 4, 2, 0); */ #ifdef MPI ratime = MPI_Wtime(); #else ratime = (double)clock() / CLOCKS_PER_SEC; #endif /* Fourier transform data, copy to work */ for(mu=0; mu<4; mu++) { memcpy((void*)in, (void*)(work+_GWI(mu,0,VOLUME)), 2*VOLUME*sizeof(double)); #ifdef MPI fftwnd_mpi(plan_m, 1, in, NULL, FFTW_NORMAL_ORDER); #else fftwnd_one(plan_m, in, NULL); #endif memcpy((void*)(work+_GWI(4+mu,0,VOLUME)), (void*)in, 2*VOLUME*sizeof(double)); memcpy((void*)in, (void*)(work+_GWI(mu,0,VOLUME)), 2*VOLUME*sizeof(double)); #ifdef MPI fftwnd_mpi(plan_p, 1, in, NULL, FFTW_NORMAL_ORDER); #else fftwnd_one(plan_p, in, NULL); #endif memcpy((void*)(work+_GWI(mu,0,VOLUME)), (void*)in, 2*VOLUME*sizeof(double)); } /* of mu =0 ,..., 3*/ fnorm = 1. / (double)(T_global*LX*LY*LZ); if(g_cart_id==0) fprintf(stdout, "# P-fnorm = %e\n", fnorm); for(mu=0; mu<4; mu++) { for(nu=0; nu<4; nu++) { cp1 = (complex*)(work+_GWI(mu,0,VOLUME)); cp2 = (complex*)(work+_GWI(4+nu,0,VOLUME)); cp3 = (complex*)(work+_GWI(8+4*mu+nu,0,VOLUME)); for(x0=0; x0<T; x0++) { q[0] = (double)(x0+Tstart) / (double)T_global; for(x1=0; x1<LX; x1++) { q[1] = (double)(x1) / (double)LX; for(x2=0; x2<LY; x2++) { q[2] = (double)(x2) / (double)LY; for(x3=0; x3<LZ; x3++) { q[3] = (double)(x3) / (double)LZ; ix = g_ipt[x0][x1][x2][x3]; w.re = cos( M_PI * (q[mu]-q[nu]) ); w.im = sin( M_PI * (q[mu]-q[nu]) ); _co_eq_co_ti_co(&w1, cp1, cp2); _co_eq_co_ti_co(cp3, &w1, &w); _co_ti_eq_re(cp3, fnorm); cp1++; cp2++; cp3++; } } } } } } /* save the result in momentum space */ sprintf(filename, "cvc_hpe5_P.%.4d.%.4d", Nconf, count); sprintf(contype, "cvc-disc-all-hpe-05-P"); write_lime_contraction(work+_GWI(8,0,VOLUME), filename, 64, 16, contype, Nconf, count); /* sprintf(filename, "cvc_hpe5_Pascii.%.4d.%.4d", Nconf, count); write_contraction(work+_GWI(8,0,VOLUME), NULL, filename, 16, 2, 0); */ #ifdef MPI retime = MPI_Wtime(); #else retime = (double)clock() / CLOCKS_PER_SEC; #endif if(g_cart_id==0) fprintf(stdout, "# time to save cvc results: %e seconds\n", retime-ratime); } /* of count % Nsave == 0 */ } /* of loop on sid */ /*********************************************** * free the allocated memory, finalize ***********************************************/ free(g_gauge_field); for(i=0; i<no_fields; i++) free(g_spinor_field[i]); free(g_spinor_field); free_geometry(); fftw_free(in); free(disc); free(work); #ifdef MPI fftwnd_mpi_destroy_plan(plan_p); fftwnd_mpi_destroy_plan(plan_m); MPI_Finalize(); #else fftwnd_destroy_plan(plan_p); fftwnd_destroy_plan(plan_m); #endif return(0); }
int main(int argc, char **argv) { int K=32, nfc=2, timeslice, status; int c, i, j, k, ll, t, id, mu, icol; int count, position=-1, position_set=0; size_t shift; long unsigned int VOL3; int filename_set = 0; int x0, x1, x2, x3, ix, idx; int n_c=1, n_s=4; int *xgindex1=NULL, *xgindex2=NULL, *xisimag=NULL; int write_ascii=0; int prop_single_file=0; double *xvsign=NULL; void *cconn=NULL; void *buffer = NULL; int sigmalight=0, sigmaheavy=0; double correlator_norm = 1.; void * vptr; int source_coords[4]; int verbose = 0; size_t prec = 64, bytes; char filename[200]; double ratime, retime; void *chi=NULL, *psi=NULL; FILE *ofs=NULL, *ofs2=NULL; double c_conf_gamma_sign[] = {1., 1., 1., -1., -1., -1., -1., 1., 1., 1., -1., -1., 1., 1., 1., 1.}; double n_conf_gamma_sign[] = {1., 1., 1., -1., -1., -1., -1., 1., 1., 1., 1., 1., -1., -1., 1., 1.}; double *conf_gamma_sign=NULL; void *spinor_field=NULL; DML_Checksum *checksum=NULL; /************************************************************************************************** * charged stuff * * (pseudo-)scalar: * g5 - g5, g5 - g0g5, g0g5 - g5, g0g5 - g0g5, * g0 - g0, g5 - g0, g0 - g5, g0g5 - g0, * g0 - g0g5, 1 - 1, 1 - g5, g5 - 1, * 1 - g0g5, g0g5 - 1, 1 - g0, g0 - 1 * * (pseudo-)vector: * gig0 - gig0, gi - gi, gig5 - gig5, gig0 - gi, * gi - gig0, gig0 - gig5, gig5 - gig0, gi - gig5, * gig5 - gi, gig0g5 - gig0g5, gig0 - gig0g5, gig0g5 - gig0, * gi - gig0g5, gig0g5 - gi, gig5 - gig0g5, gig0g5 - gig5 **************************************************************************************************/ int gindex1[] = {5, 5, 6, 6, 0, 5, 0, 6, 0, 4, 4, 5, 4, 6, 4, 0, 10, 11, 12, 1, 2, 3, 7, 8, 9, 10, 11, 12, 1, 2, 3, 10, 11, 12, 7, 8, 9, 1, 2, 3, 7, 8, 9, 13, 14, 15, 10, 11, 12, 15, 14, 13, 1, 2, 3, 15, 14, 13, 7, 8, 9, 15, 14, 13}; int gindex2[] = {5, 6, 5, 6, 0, 0, 5, 0, 6, 4, 5, 4, 6, 4, 0, 4, 10, 11, 12, 1, 2, 3, 7, 8, 9, 1, 2, 3, 10, 11, 12, 7, 8, 9, 10, 11, 12, 7, 8, 9, 1, 2, 3, 13, 14, 15, 15, 14, 13, 10, 11, 12, 15, 14, 13, 1, 2, 3, 15, 14, 13, 7, 8, 9}; /* due to twisting we have several correlators that are purely imaginary */ int isimag[] = {0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0}; double vsign[] = {1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., -1., 1., 1., -1., 1., 1., -1., 1., 1., -1., 1., 1., -1., 1., 1., -1., 1.}; /************************************************************************************************** * neutral stuff * * (pseudo-)scalar: * g5 - g5, g5 - g0g5, g0g5 - g5, g0g5 - g0g5, * 1 - 1, g5 - 1, 1 - g5, g0g5 - 1, * 1 - g0g5, g0 - g0, g0 - g5, g5 - g0, * g0 - g0g5, g0g5 - g0, g0 - 1, 1 - g0 * * (pseudo-)vector: * gig0 - gig0, gi - gi, gig0g5 - gig0g5, gig0 - gi, * gi - gig0, gig0 - gig0g5, gig0g5 - gig0, gi - gig0g5, * gig0g5 - gi gig5 - gig5, gig5 - gi, gi - gig5, * gig5 - gig0, gig0 - gig5, gig5 - gig0g5, gig0g5 - gig5 **************************************************************************************************/ int ngindex1[] = {5, 5, 6, 6, 4, 5, 4, 6, 4, 0, 0, 5, 0, 6, 0, 4, 10, 11, 12, 1, 2, 3, 13, 14, 15, 10, 11, 12, 1, 2, 3, 10, 11, 12, 15, 14, 13, 1, 2, 3, 15, 14, 13, 7, 8, 9, 7, 8, 9, 1, 2, 3, 7, 8, 9, 10, 11, 12, 7, 8, 9, 15, 14, 13}; int ngindex2[] = {5, 6, 5, 6, 4, 4, 5, 4, 6, 0, 5, 0, 6, 0, 4, 0, 10, 11, 12, 1, 2, 3, 13, 14, 15, 1, 2, 3, 10, 11, 12, 15, 14, 13, 10, 11, 12, 15, 14, 13, 1, 2, 3, 7, 8, 9, 1, 2, 3, 7, 8, 9, 10, 11, 12, 7, 8, 9, 15, 14, 13, 7, 8, 9}; int nisimag[] = {0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0}; double nvsign[] = {1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., -1., 1., 1., -1., 1., 1., -1., 1., 1., -1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., -1., 1., 1., -1., 1. }; /* double isneg_std[]= {+1., -1., +1., -1., +1., +1., +1., +1., -1., +1., +1., +1., +1., +1., +1., +1., -1., +1., -1., -1., +1., +1., +1., -1., +1., -1., +1., +1., +1., +1., +1., +1.}; */ double isneg_std[]= {+1., +1., +1., +1., +1., +1., +1., +1., +1., +1., +1., +1., +1., +1., +1., +1., +1., +1., +1., +1., +1., +1., +1., +1., +1., +1., +1., +1., +1., +1., +1., +1.}; double isneg[32]; while ((c = getopt(argc, argv, "sah?vf:c:p:n:P:")) != -1) { switch (c) { case 'v': verbose = 1; break; case 'f': strcpy(filename, optarg); filename_set=1; break; case 'c': n_c = atoi(optarg); break; case 'p': position = atoi(optarg); position_set = 1; break; case 'a': write_ascii = 1; fprintf(stdout, "# [] will write in ascii format\n"); break; case 'n': nfc = atoi(optarg); fprintf(stdout, "# [] number of flavor combinations set to %d\n", nfc); break; case 's': prop_single_file = 1; fprintf(stdout, "# [] will read up and down from same file\n"); break; case 'P': prec = (size_t)atoi(optarg); fprintf(stdout, "# [] set precision to %lu\n", prec); break; case 'h': case '?': default: usage(); break; } } /* the global time stamp */ g_the_time = time(NULL); fprintf(stdout, "\n# [ll_conn_x2dep_extract] using global time stamp %s", ctime(&g_the_time)); /* set the default values */ if(filename_set==0) strcpy(filename, "cvc.input"); fprintf(stdout, "# reading input from file %s\n", filename); read_input_parser(filename); /* some checks on the input data */ if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) { if(g_proc_id==0) fprintf(stdout, "T and L's must be set\n"); usage(); } if( Nlong > 0 ) { if(g_proc_id==0) fprintf(stdout, "Fuzzing not available in this version.\n"); usage(); } /* initialize MPI parameters */ mpi_init(argc, argv); VOL3 = LX*LY*LZ; fprintf(stdout, "# [%2d] parameters:\n"\ "# [%2d] T_global = %3d\n"\ "# [%2d] T = %3d\n"\ "# [%2d] Tstart = %3d\n"\ "# [%2d] LX_global = %3d\n"\ "# [%2d] LX = %3d\n"\ "# [%2d] LXstart = %3d\n"\ "# [%2d] LY_global = %3d\n"\ "# [%2d] LY = %3d\n"\ "# [%2d] LYstart = %3d\n", g_cart_id, g_cart_id, T_global, g_cart_id, T, g_cart_id, Tstart, g_cart_id, LX_global, g_cart_id, LX, g_cart_id, LXstart, g_cart_id, LY_global, g_cart_id, LY, g_cart_id, LYstart); if(init_geometry() != 0) { fprintf(stderr, "ERROR from init_geometry\n"); #ifdef MPI MPI_Abort(MPI_COMM_WORLD, 2); MPI_Finalize(); #endif exit(1); } // switch to double precision of single precision was set if(prec == 32) { fprintf(stderr, "[] Warning: switching to double precision\n"); prec = 64; } geometry(); fprintf(stdout, "# ll_conn wout MPI\n"); fprintf(stdout, "# number of colours = %d\n", n_c); if(position_set == 0) { position = g_propagator_position; if(g_cart_id == 0) fprintf(stdout, "# using input file value for prop pos %d\n", position); } else { if(g_cart_id == 0) fprintf(stdout, "# using command line arg value for prop pos %d\n", position); } /********************************************* * set the isneg field *********************************************/ for(i = 0; i < K; i++) isneg[i] = isneg_std[i]; /********************************************************* * allocate memory for the spinor fields *********************************************************/ no_fields = n_s; if(nfc>1) { no_fields *= 2; } if(prec==64) { spinor_field = calloc(no_fields, sizeof(double*)); } else { spinor_field = calloc(no_fields, sizeof(float*)); } if(g_cart_id==0) fprintf(stdout, "# no. of spinor fields is %d\n", no_fields); if(prec==64) { for(i=0; i<no_fields-1; i++) { ((double**)spinor_field)[i] = (double*)malloc(24*VOL3*sizeof(double)); if( ((double**)spinor_field)[i] == NULL) { fprintf(stderr, "Error, could not alloc spinor field %d\n", i); exit(12); } } ((double**)spinor_field)[i] = (double*)malloc(24*VOL3*sizeof(double)); if( ((double**)spinor_field)[i] == NULL) { fprintf(stderr, "Error, could not alloc spinor field %d\n", i); exit(12); } } else { for(i=0; i<no_fields-1; i++) { ((float**)spinor_field)[i] = (float*)malloc(24*VOL3*sizeof(float)); if(((float**)spinor_field)[i] == NULL) { fprintf(stderr, "Error, could not alloc spinor field %d\n", i); exit(14); } } ((float**)spinor_field)[i] = (float*)malloc(24*VOL3*sizeof(float)); if( ((float**)spinor_field)[i] == NULL) { fprintf(stderr, "Error, could not alloc spinor field %d\n", i); exit(14); } } checksum = (DML_Checksum*)malloc(2*n_c*n_s*sizeof(DML_Checksum)); if(checksum == NULL) { fprintf(stderr, "[] Error, could not alloc checksumßn"); exit(75); } /********************************************************* * allocate memory for the contractions *********************************************************/ bytes = (prec==64) ? sizeof(double) : sizeof(float); cconn = calloc(2*nfc*K*VOL3, bytes); if( cconn==NULL ) { fprintf(stderr, "could not allocate memory for cconn\n"); exit(3); } buffer = calloc(2*nfc*K*LZ, bytes); if( buffer==NULL) { fprintf(stderr, "could not allocate memory for buffers\n"); exit(4); } /****************************************************************** * calculate source coordinates ******************************************************************/ source_coords[0] = g_source_location / (LX_global*LY_global*LZ); source_coords[1] = ( g_source_location % (LX_global*LY_global*LZ) ) / (LY_global*LZ); source_coords[2] = ( g_source_location % (LY_global*LZ) ) / LZ; source_coords[3] = g_source_location % LZ; if(g_cart_id==0) fprintf(stdout, "# source coords = %3d%3d%3d%3d\n", source_coords[0], source_coords[1], source_coords[2], source_coords[3]); /****************************************************************** * final normalization of the correlators ******************************************************************/ /* correlator_norm = 1. / ( 2. * g_kappa * g_kappa * (double)(LX_global*LY_global*LZ) );*/ correlator_norm = 1.; if(g_cart_id==0) fprintf(stdout, "# correlator_norm = %12.5e\n", correlator_norm); /****************************************************************** ****************************************************************** ** ** ** local - local ** ** ** ****************************************************************** ******************************************************************/ if(g_cart_id==0) fprintf(stdout, "# Starting LL\n"); for(timeslice=0;timeslice<T_global;timeslice++) { if(prec==64) { for(idx=0; idx<2*nfc*K*VOL3; idx++) ((double*)cconn)[idx] = 0.; } else { for(idx=0; idx<2*nfc*K*VOL3; idx++) ((float*)cconn)[idx] = 0.; } for(icol=0;icol<n_c;icol++) { ratime = (double)clock() / CLOCKS_PER_SEC; for(i=0; i<n_s; i++) { if(prec==64) { sprintf(filename, "%s.%.4d.00.%.2d.inverted", filename_prefix, Nconf, n_c*i+icol); status = read_lime_spinor_timeslice( (double*)(((double**)spinor_field)[i]), timeslice, filename, position, checksum+n_c*i+icol); } else { fprintf(stderr, "[] Error, no single precision timeslice-wise reading yet\n"); exit(72); } if (status != 0) { fprintf(stderr, "[] Error, could not read propagator from file %s\n", filename); exit(73); } if(nfc>1) { if(prop_single_file) { if(prec==64) { status = read_lime_spinor_timeslice( (double*)(((double**)spinor_field)[i+n_s]), timeslice, filename, 1-position, checksum+n_c*i+icol+n_s*n_c); } else { fprintf(stderr, "[] Error, no single precision timeslice-wise reading yet\n"); exit(72); } } else { if(prec==64) { sprintf(filename, "%s.%.4d.00.%.2d.inverted", filename_prefix2, Nconf, n_c*i+icol); status = read_lime_spinor_timeslice((double*)(((double**)spinor_field)[i+n_s]), timeslice, filename, position, checksum+n_c*i+icol+n_s*n_c); } else { fprintf(stderr, "[] Error, no single precision timeslice-wise reading yet\n"); exit(72); } } if (status != 0) { fprintf(stderr, "[] Error, could not read propagator from file %s\n", filename); exit(74); } } // of if nfc > 1 } // of is retime = (double)clock() / CLOCKS_PER_SEC; if(g_cart_id==0) fprintf(stdout, "# time for preparing light prop.: %e seconds\n", retime-ratime); ratime = (double)clock() / CLOCKS_PER_SEC; count = -1; for(sigmalight=1; sigmalight>=-1; sigmalight-=2) { for(sigmaheavy=1; sigmaheavy>=-1; sigmaheavy-=2) { count++; if(count>=nfc) continue; if(prec==64) { chi = (void*) &( ((double**)spinor_field)[ ( (1-sigmalight)/2 )*n_s ] ); psi = (void*) &( ((double**)spinor_field)[ ( (1-sigmaheavy)/2 )*n_s ] ); } else { chi = (void*) &( ((float**)spinor_field)[ ( (1-sigmalight)/2 )*n_s ] ); psi = (void*) &( ((float**)spinor_field)[ ( (1-sigmaheavy)/2 )*n_s ] ); } if(sigmalight == sigmaheavy) { xgindex1 = gindex1; xgindex2 = gindex2; xisimag=isimag; xvsign=vsign; conf_gamma_sign = c_conf_gamma_sign; } else { xgindex1 = ngindex1; xgindex2 = ngindex2; xisimag=nisimag; xvsign=nvsign; conf_gamma_sign = n_conf_gamma_sign; } // (pseudo-)scalar sector for(idx=0; idx<16; idx++) { //fprintf(stdout, "# sigma(%d, %d): (idx,i) = (%d,%d) ---> (%d,%d)\n", // sigmalight, sigmaheavy, idx, i, xgindex1[idx], xgindex2[idx]); if(prec==64) { vptr = (void*)( ((double*)cconn) + 2*(count*K + idx) ); } else { vptr = (void*)( ((float*)cconn) + 2*(count*K + idx) ); } contract_twopoint_xdep_timeslice(vptr, xgindex1[idx], xgindex2[idx], chi, psi, 1, nfc*K, 1.0, prec); } // (pseudo-)vector sector for(idx = 16; idx < 64; idx+=3) { for(i = 0; i < 3; i++) { //if(xgindex1[idx+i]==xgindex2[idx+i] && (xgindex2[idx+i]==1 || xgindex2[idx+i]==2 || xgindex2[idx+i]==3) ) { // fprintf(stdout, "# sigma(%d, %d): (idx,i) = (%d,%d) ---> (%d,%d); factor = %e\n", // sigmalight, sigmaheavy, idx, i, xgindex1[idx+i], xgindex2[idx+i], conf_gamma_sign[(idx-16)/3]*xvsign[idx-16+i]); //} if(prec==64) { vptr = (void*)( ((double*)cconn) + 2*(count*K + (16+(idx-16)/3)) ); } else { vptr = (void*)( ((float*)cconn) + 2*(count*K + (16+(idx-16)/3)) ); } contract_twopoint_xdep_timeslice(vptr, xgindex1[idx+i], xgindex2[idx+i], chi, psi, 1, nfc*K, conf_gamma_sign[(idx-16)/3]*xvsign[idx-16+i], prec); } } }} } // of loop on colors /*************************************************************** * write contractions to file ***************************************************************/ ratime = (double)clock() / CLOCKS_PER_SEC; sprintf(filename, "correl.%.4d.t%.2dx%.2dy%.2dz%.2d", Nconf, source_coords[0], source_coords[1], source_coords[2], source_coords[3]); if(timeslice == 0) { // fprintf(stdout, "# [] opening file %s for writing\n", filename); ofs = fopen(filename, "w"); } else { // fprintf(stdout, "# [] opening file %s for appending\n", filename); ofs = fopen(filename, "a"); } if(ofs==NULL) { fprintf(stderr, "Error, could not open file %s for writing\n", filename); exit(7); } if(write_ascii) { sprintf(filename, "correl.%.4d.t%.2dx%.2dy%.2dz%.2d.ascii", Nconf, source_coords[0], source_coords[1], source_coords[2], source_coords[3]); if(timeslice == 0) { ofs2 = fopen(filename, "w"); } else { ofs2 = fopen(filename, "a"); } } for(x1=0; x1<LX_global; x1++) { for(x2=0; x2<LY_global; x2++) { shift = ( (x1 % LX) * LY + (x2 % LY) ) * LZ; if(prec==64) { vptr = (void*)( ((double*)cconn)+shift*2*nfc*K); bytes = sizeof(double); } else { vptr = (void*)( ((float*)cconn)+shift*2*nfc*K); bytes = sizeof(float); } if( fwrite(vptr, bytes, 2*nfc*K*LZ, ofs) != 2*nfc*K*LZ ) { fprintf(stderr, "Error, could not write proper amount of data\n"); exit(8); } if(write_ascii) { for(x3=0; x3<LZ; x3++) { count = -1; for(j=0; j<nfc; j++) { for(i=0; i<K; i++) { count++; if(prec==64) { fprintf(ofs2, "%3d%3d%3d%3d%3d%3d%6lu%25.16e%25.16e\n", j, i, timeslice, x1, x2, x3, shift, ((double*)cconn)[(shift+x3)*2*nfc*K+2*count], ((double*)cconn)[(shift+x3)*2*nfc*K+2*count+1]); } else { fprintf(ofs2, "%3d%3d%3d%3d%3d%3d%6lu%16.7e%16.7e\n", j, i, timeslice, x1, x2, x3, shift, ((float*)cconn)[(shift+x3)*2*nfc*K+2*count], ((float*)cconn)[(shift+x3)*2*nfc*K+2*count+1]); } }} } } // of if write_ascii }} if(g_cart_id==0) { if(ofs != NULL) fclose(ofs); if(ofs2 != NULL) fclose(ofs2); } retime = (double)clock() / CLOCKS_PER_SEC; if(g_cart_id==0) fprintf(stdout, "# time to write LL contractions: %e seconds\n", retime-ratime); } // of loop on timeslices if(g_cart_id==0) fprintf(stdout, "# finished LL contractions\n"); /************************************************** * free the allocated memory, finalize **************************************************/ if(no_fields>0) { if(prec==64) { for(i=0; i<no_fields; i++) free( ((double**)spinor_field)[i]); } else { for(i=0; i<no_fields; i++) free( ((float**)spinor_field)[i]); } free(spinor_field); } free_geometry(); free(cconn); free(buffer); #ifdef MPI MPI_Finalize(); #endif fprintf(stdout, "\n# [ll_conn] %s# [ll_conn] end of run\n", ctime(&g_the_time)); fflush(stdout); fprintf(stderr, "\n# [ll_conn] %s# [ll_conn] end of run\n", ctime(&g_the_time)); fflush(stderr); return(0); }
int main(int argc, char **argv) { const int n_c=3; const int n_s=4; const char outfile_prefix[] = "delta_pp_2pt_v4"; int c, i, icomp; int filename_set = 0; int append, status; int l_LX_at, l_LXstart_at; int ix, it, iix, x1,x2,x3; int ir, ir2, is; int VOL3; int do_gt=0; int dims[3]; double *connt=NULL; spinor_propagator_type *connq=NULL; int verbose = 0; int sx0, sx1, sx2, sx3; int write_ascii=0; int fermion_type = 1; // Wilson fermion type int pos; char filename[200], contype[200], gauge_field_filename[200]; double ratime, retime; //double plaq_m, plaq_r; double *work=NULL; fermion_propagator_type *fp1=NULL, *fp2=NULL, *fp3=NULL, *uprop=NULL, *dprop=NULL, *fpaux=NULL; spinor_propagator_type *sp1=NULL, *sp2=NULL; double q[3], phase, *gauge_trafo=NULL; complex w, w1; size_t items, bytes; FILE *ofs; int timeslice; DML_Checksum ildg_gauge_field_checksum, *spinor_field_checksum=NULL, connq_checksum; uint32_t nersc_gauge_field_checksum; int threadid, nthreads; /******************************************************************* * Gamma components for the Delta: * */ const int num_component = 4; int gamma_component[2][4] = { {0, 1, 2, 3}, {0, 1, 2, 3} }; double gamma_component_sign[4] = {+1.,+1.,-1.,+1.}; /* *******************************************************************/ fftw_complex *in=NULL; #ifdef MPI fftwnd_mpi_plan plan_p; #else fftwnd_plan plan_p; #endif #ifdef MPI MPI_Status status; #endif #ifdef MPI MPI_Init(&argc, &argv); #endif while ((c = getopt(argc, argv, "ah?vgf:F:")) != -1) { switch (c) { case 'v': verbose = 1; break; case 'f': strcpy(filename, optarg); filename_set=1; break; case 'a': write_ascii = 1; fprintf(stdout, "# [] will write in ascii format\n"); break; case 'F': if(strcmp(optarg, "Wilson") == 0) { fermion_type = _WILSON_FERMION; } else if(strcmp(optarg, "tm") == 0) { fermion_type = _TM_FERMION; } else { fprintf(stderr, "[] Error, unrecognized fermion type\n"); exit(145); } fprintf(stdout, "# [] will use fermion type %s ---> no. %d\n", optarg, fermion_type); break; case 'g': do_gt = 1; fprintf(stdout, "# [] will perform gauge transform\n"); break; case 'h': case '?': default: usage(); break; } } /* set the default values */ if(filename_set==0) strcpy(filename, "cvc.input"); fprintf(stdout, "# reading input from file %s\n", filename); read_input_parser(filename); /* some checks on the input data */ if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) { if(g_proc_id==0) fprintf(stdout, "T and L's must be set\n"); usage(); } if(g_kappa == 0.) { if(g_proc_id==0) fprintf(stdout, "kappa should be > 0.n"); usage(); } #ifdef OPENMP omp_set_num_threads(g_num_threads); #else fprintf(stdout, "[delta_pp_2pt_v4] Warning, resetting global thread number to 1\n"); g_num_threads = 1; #endif /* initialize MPI parameters */ mpi_init(argc, argv); #ifdef OPENMP status = fftw_threads_init(); if(status != 0) { fprintf(stderr, "\n[] Error from fftw_init_threads; status was %d\n", status); exit(120); } #endif /****************************************************** * ******************************************************/ VOL3 = LX*LY*LZ; l_LX_at = LX; l_LXstart_at = 0; FFTW_LOC_VOLUME = T*LX*LY*LZ; fprintf(stdout, "# [%2d] parameters:\n"\ "# [%2d] l_LX_at = %3d\n"\ "# [%2d] l_LXstart_at = %3d\n"\ "# [%2d] FFTW_LOC_VOLUME = %3d\n", g_cart_id, g_cart_id, l_LX_at, g_cart_id, l_LXstart_at, g_cart_id, FFTW_LOC_VOLUME); if(init_geometry() != 0) { fprintf(stderr, "ERROR from init_geometry\n"); exit(1); } geometry(); if(N_Jacobi>0) { // alloc the gauge field alloc_gauge_field(&g_gauge_field, VOL3); switch(g_gauge_file_format) { case 0: sprintf(gauge_field_filename, "%s.%.4d", gaugefilename_prefix, Nconf); break; case 1: sprintf(gauge_field_filename, "%s.%.5d", gaugefilename_prefix, Nconf); break; } } else { g_gauge_field = NULL; } /********************************************************************* * gauge transformation *********************************************************************/ if(do_gt) { init_gauge_trafo(&gauge_trafo, 1.); } // determine the source location sx0 = g_source_location/(LX*LY*LZ)-Tstart; sx1 = (g_source_location%(LX*LY*LZ)) / (LY*LZ); sx2 = (g_source_location%(LY*LZ)) / LZ; sx3 = (g_source_location%LZ); // g_source_time_slice = sx0; fprintf(stdout, "# [] source location %d = (%d,%d,%d,%d)\n", g_source_location, sx0, sx1, sx2, sx3); // allocate memory for the spinor fields g_spinor_field = NULL; no_fields = n_s*n_c; // if(fermion_type == _TM_FERMION) { // no_fields *= 2; // } if(N_Jacobi>0) no_fields++; g_spinor_field = (double**)calloc(no_fields, sizeof(double*)); for(i=0; i<no_fields-1; i++) alloc_spinor_field(&g_spinor_field[i], VOL3); alloc_spinor_field(&g_spinor_field[no_fields-1], VOL3); work = g_spinor_field[no_fields-1]; spinor_field_checksum = (DML_Checksum*)malloc(no_fields * sizeof(DML_Checksum) ); if(spinor_field_checksum == NULL ) { fprintf(stderr, "[] Error, could not alloc checksums for spinor fields\n"); exit(73); } // allocate memory for the contractions items = 4* num_component*T; bytes = sizeof(double); connt = (double*)malloc(items*bytes); if(connt == NULL) { fprintf(stderr, "\n[] Error, could not alloc connt\n"); exit(2); } for(ix=0; ix<items; ix++) connt[ix] = 0.; items = num_component * (size_t)VOL3; connq = create_sp_field( items ); if(connq == NULL) { fprintf(stderr, "\n[] Error, could not alloc connq\n"); exit(2); } /****************************************************** * initialize FFTW ******************************************************/ items = 2 * num_component * g_sv_dim * g_sv_dim * VOL3; bytes = sizeof(double); in = (fftw_complex*)malloc(num_component*g_sv_dim*g_sv_dim*VOL3*sizeof(fftw_complex)); if(in == NULL) { fprintf(stderr, "[] Error, could not malloc in for FFTW\n"); exit(155); } dims[0]=LX; dims[1]=LY; dims[2]=LZ; //plan_p = fftwnd_create_plan(3, dims, FFTW_FORWARD, FFTW_MEASURE | FFTW_IN_PLACE); plan_p = fftwnd_create_plan_specific(3, dims, FFTW_FORWARD, FFTW_MEASURE, in, num_component*g_sv_dim*g_sv_dim, (fftw_complex*)( connq[0][0] ), num_component*g_sv_dim*g_sv_dim); uprop = (fermion_propagator_type*)malloc(g_num_threads * sizeof(fermion_propagator_type) ); fp1 = (fermion_propagator_type*)malloc(g_num_threads * sizeof(fermion_propagator_type) ); fp2 = (fermion_propagator_type*)malloc(g_num_threads * sizeof(fermion_propagator_type) ); fp3 = (fermion_propagator_type*)malloc(g_num_threads * sizeof(fermion_propagator_type) ); fpaux = (fermion_propagator_type*)malloc(g_num_threads * sizeof(fermion_propagator_type) ); if(uprop==NULL || fp1==NULL || fp2==NULL || fp3==NULL || fpaux==NULL ) { fprintf(stderr, "[] Error, could not alloc fermion propagator points\n"); exit(57); } sp1 = (spinor_propagator_type*)malloc(g_num_threads * sizeof(spinor_propagator_type) ); sp2 = (spinor_propagator_type*)malloc(g_num_threads * sizeof(spinor_propagator_type) ); if(sp1==NULL || sp2==NULL) { fprintf(stderr, "[] Error, could not alloc spinor propagator points\n"); exit(59); } for(i=0;i<g_num_threads;i++) { create_fp(uprop+i); } for(i=0;i<g_num_threads;i++) { create_fp(fp1+i); } for(i=0;i<g_num_threads;i++) { create_fp(fp2+i); } for(i=0;i<g_num_threads;i++) { create_fp(fp3+i); } for(i=0;i<g_num_threads;i++) { create_fp(fpaux+i); } for(i=0;i<g_num_threads;i++) { create_sp(sp1+i); } for(i=0;i<g_num_threads;i++) { create_sp(sp2+i); } /****************************************************** * loop on timeslices ******************************************************/ for(timeslice=0; timeslice<T; timeslice++) { append = (int)( timeslice != 0 ); // read timeslice of the gauge field if( N_Jacobi>0) { switch(g_gauge_file_format) { case 0: status = read_lime_gauge_field_doubleprec_timeslice(g_gauge_field, gauge_field_filename, timeslice, &ildg_gauge_field_checksum); break; case 1: status = read_nersc_gauge_field_timeslice(g_gauge_field, gauge_field_filename, timeslice, &nersc_gauge_field_checksum); break; } if(status != 0) { fprintf(stderr, "[] Error, could not read gauge field\n"); exit(21); } #ifdef OPENMP status = APE_Smearing_Step_Timeslice_threads(g_gauge_field, N_ape, alpha_ape); #else for(i=0; i<N_ape; i++) { status = APE_Smearing_Step_Timeslice(g_gauge_field, alpha_ape); } #endif } // read timeslice of the 12 up-type propagators and smear them for(is=0;is<n_s*n_c;is++) { if(do_gt == 0) { sprintf(filename, "%s.%.4d.t%.2dx%.2dy%.2dz%.2d.%.2d.inverted", filename_prefix, Nconf, sx0, sx1, sx2, sx3, is); status = read_lime_spinor_timeslice(g_spinor_field[is], timeslice, filename, 0, spinor_field_checksum+is); if(status != 0) { fprintf(stderr, "[] Error, could not read propagator from file %s\n", filename); exit(102); } if(N_Jacobi > 0) { fprintf(stdout, "# [] Jacobi smearing propagator no. %d with paramters N_Jacobi=%d, kappa_Jacobi=%f\n", is, N_Jacobi, kappa_Jacobi); #ifdef OPENMP Jacobi_Smearing_Step_one_Timeslice_threads(g_gauge_field, g_spinor_field[is], work, N_Jacobi, kappa_Jacobi); #else for(c=0; c<N_Jacobi; c++) { Jacobi_Smearing_Step_one_Timeslice(g_gauge_field, g_spinor_field[is], work, kappa_Jacobi); } #endif } } else { // of if do_gt == 0 // apply gt apply_gt_prop(gauge_trafo, g_spinor_field[is], is/n_c, is%n_c, 4, filename_prefix, g_source_location); } // of if do_gt == 0 } /****************************************************** * contractions ******************************************************/ #ifdef OPENMP omp_set_num_threads(g_num_threads); #pragma omp parallel private (ix,icomp,threadid) \ firstprivate (fermion_type,gamma_component,num_component,connq,\ gamma_component_sign,VOL3,g_spinor_field,fp1,fp2,fp3,fpaux,uprop,sp1,sp2) { threadid = omp_get_thread_num(); #else threadid = 0; #endif for(ix=threadid; ix<VOL3; ix+=g_num_threads) { // assign the propagators _assign_fp_point_from_field(uprop[threadid], g_spinor_field, ix); if(fermion_type == _TM_FERMION) { _fp_eq_rot_ti_fp(fp1[threadid], uprop[threadid], +1, fermion_type, fp2[threadid]); _fp_eq_fp_ti_rot(uprop[threadid], fp1[threadid], +1, fermion_type, fp2[threadid]); } for(icomp=0; icomp<num_component; icomp++) { _sp_eq_zero( connq[ix*num_component+icomp]); /****************************************************** * prepare propagators ******************************************************/ // fp1[threadid] = C Gamma_1 x S_u = g0 g2 Gamma_1 S_u _fp_eq_zero(fp1[threadid]); _fp_eq_zero(fpaux[threadid]); _fp_eq_gamma_ti_fp(fp1[threadid], gamma_component[0][icomp], uprop[threadid]); _fp_eq_gamma_ti_fp(fpaux[threadid], 2, fp1[threadid]); _fp_eq_gamma_ti_fp(fp1[threadid], 0, fpaux[threadid]); // fp2[threadid] = C Gamma_1 x S_u x C Gamma_2 _fp_eq_zero(fp2[threadid]); _fp_eq_zero(fpaux[threadid]); _fp_eq_fp_ti_gamma(fp2[threadid], 0, fp1[threadid]); _fp_eq_fp_ti_gamma(fpaux[threadid], 2, fp2[threadid]); _fp_eq_fp_ti_gamma(fp2[threadid], gamma_component[1][icomp], fpaux[threadid]); // fp3[threadid] = S_u x C Gamma_2 = S_u g0 g2 Gamma_2 _fp_eq_zero(fp3[threadid]); _fp_eq_zero(fpaux[threadid]); _fp_eq_fp_ti_gamma(fp3[threadid], 0, uprop[threadid]); _fp_eq_fp_ti_gamma(fpaux[threadid], 2, fp3[threadid]); _fp_eq_fp_ti_gamma(fp3[threadid], gamma_component[1][icomp], fpaux[threadid]); /****************************************************** * contractions ******************************************************/ // (1) // reduce _fp_eq_zero(fpaux[threadid]); _fp_eq_fp_eps_contract13_fp(fpaux[threadid], fp1[threadid], uprop[threadid]); // reduce to spin propagator _sp_eq_zero( sp1[threadid] ); _sp_eq_fp_del_contract23_fp(sp1[threadid], fp3[threadid], fpaux[threadid]); // (2) // reduce to spin propagator _sp_eq_zero( sp2[threadid] ); _sp_eq_fp_del_contract24_fp(sp2[threadid], fp3[threadid], fpaux[threadid]); // add and assign _sp_pl_eq_sp(sp1[threadid], sp2[threadid]); _sp_eq_sp_ti_re(sp2[threadid], sp1[threadid], -gamma_component_sign[icomp]); _sp_eq_sp( connq[ix*num_component+icomp], sp2[threadid]); // (3) // reduce _fp_eq_zero(fpaux[threadid]); _fp_eq_fp_eps_contract13_fp(fpaux[threadid], fp2[threadid], uprop[threadid]); // reduce to spin propagator _sp_eq_zero( sp1[threadid] ); _sp_eq_fp_del_contract23_fp(sp1[threadid], uprop[threadid], fpaux[threadid]); // (4) // reduce _fp_eq_zero(fpaux[threadid]); _fp_eq_fp_eps_contract13_fp(fpaux[threadid], fp1[threadid], fp3[threadid]); // reduce to spin propagator _sp_eq_zero( sp2[threadid] ); _sp_eq_fp_del_contract24_fp(sp2[threadid], uprop[threadid], fpaux[threadid]); // add and assign _sp_pl_eq_sp(sp1[threadid], sp2[threadid]); _sp_eq_sp_ti_re(sp2[threadid], sp1[threadid], -gamma_component_sign[icomp]); _sp_pl_eq_sp( connq[ix*num_component+icomp], sp2[threadid]); // (5) // reduce _fp_eq_zero(fpaux[threadid]); _fp_eq_fp_eps_contract13_fp(fpaux[threadid], fp2[threadid], uprop[threadid]); // reduce to spin propagator _sp_eq_zero( sp1[threadid] ); _sp_eq_fp_del_contract34_fp(sp1[threadid], uprop[threadid], fpaux[threadid]); // (6) // reduce _fp_eq_zero(fpaux[threadid]); _fp_eq_fp_eps_contract13_fp(fpaux[threadid], fp1[threadid], fp3[threadid]); // reduce to spin propagator _sp_eq_zero( sp2[threadid] ); _sp_eq_fp_del_contract34_fp(sp2[threadid], uprop[threadid], fpaux[threadid]); // add and assign _sp_pl_eq_sp(sp1[threadid], sp2[threadid]); _sp_eq_sp_ti_re(sp2[threadid], sp1[threadid], -gamma_component_sign[icomp]); _sp_pl_eq_sp( connq[ix*num_component+icomp], sp2[threadid]); } // of icomp } // of ix #ifdef OPENMP } #endif /*********************************************** * finish calculation of connq ***********************************************/ if(g_propagator_bc_type == 0) { // multiply with phase factor fprintf(stdout, "# [] multiplying timeslice %d with boundary phase factor\n", timeslice); ir = (timeslice - sx0 + T_global) % T_global; w1.re = cos( 3. * M_PI*(double)ir / (double)T_global ); w1.im = sin( 3. * M_PI*(double)ir / (double)T_global ); for(ix=0;ix<num_component*VOL3;ix++) { _sp_eq_sp(sp1[0], connq[ix] ); _sp_eq_sp_ti_co( connq[ix], sp1[0], w1); } } else if (g_propagator_bc_type == 1) { // multiply with step function if(timeslice < sx0) { fprintf(stdout, "# [] multiplying timeslice %d with boundary step function\n", timeslice); for(ix=0;ix<num_component*VOL3;ix++) { _sp_eq_sp(sp1[0], connq[ix] ); _sp_eq_sp_ti_re( connq[ix], sp1[0], -1.); } } } if(write_ascii) { sprintf(filename, "%s_x.%.4d.t%.2dx%.2dy%.2dz%.2d.ascii", outfile_prefix, Nconf, sx0, sx1, sx2, sx3); write_contraction2( connq[0][0], filename, num_component*g_sv_dim*g_sv_dim, VOL3, 1, append); } /****************************************************************** * Fourier transform ******************************************************************/ items = 2 * num_component * g_sv_dim * g_sv_dim * VOL3; bytes = sizeof(double); memcpy(in, connq[0][0], items * bytes); ir = num_component * g_sv_dim * g_sv_dim; #ifdef OPENMP fftwnd_threads(g_num_threads, plan_p, ir, in, ir, 1, (fftw_complex*)(connq[0][0]), ir, 1); #else fftwnd(plan_p, ir, in, ir, 1, (fftw_complex*)(connq[0][0]), ir, 1); #endif // add phase factor from the source location iix = 0; for(x1=0;x1<LX;x1++) { q[0] = (double)x1 / (double)LX; for(x2=0;x2<LY;x2++) { q[1] = (double)x2 / (double)LY; for(x3=0;x3<LZ;x3++) { q[2] = (double)x3 / (double)LZ; phase = 2. * M_PI * ( q[0]*sx1 + q[1]*sx2 + q[2]*sx3 ); w1.re = cos(phase); w1.im = sin(phase); for(icomp=0; icomp<num_component; icomp++) { _sp_eq_sp(sp1[0], connq[iix] ); _sp_eq_sp_ti_co( connq[iix], sp1[0], w1) ; iix++; } }}} // of x3, x2, x1 // write to file sprintf(filename, "%s_q.%.4d.t%.2dx%.2dy%.2dz%.2d", outfile_prefix, Nconf, sx0, sx1, sx2, sx3); sprintf(contype, "2-pt. function, (t,q_1,q_2,q_3)-dependent, source_timeslice = %d", sx0); write_lime_contraction_timeslice(connq[0][0], filename, 64, num_component*g_sv_dim*g_sv_dim, contype, Nconf, 0, &connq_checksum, timeslice); if(write_ascii) { strcat(filename, ".ascii"); write_contraction2(connq[0][0],filename, num_component*g_sv_dim*g_sv_dim, VOL3, 1, append); } /*********************************************** * calculate connt ***********************************************/ for(icomp=0;icomp<num_component; icomp++) { // fwd _sp_eq_sp(sp1[0], connq[icomp]); _sp_eq_gamma_ti_sp(sp2[0], 0, sp1[0]); _sp_pl_eq_sp(sp1[0], sp2[0]); _co_eq_tr_sp(&w, sp1[0]); connt[2*(icomp*T + timeslice) ] = w.re * 0.25; connt[2*(icomp*T + timeslice)+1] = w.im * 0.25; // bwd _sp_eq_sp(sp1[0], connq[icomp]); _sp_eq_gamma_ti_sp(sp2[0], 0, sp1[0]); _sp_mi_eq_sp(sp1[0], sp2[0]); _co_eq_tr_sp(&w, sp1[0]); connt[2*(icomp*T+timeslice + num_component*T) ] = w.re * 0.25; connt[2*(icomp*T+timeslice + num_component*T)+1] = w.im * 0.25; } } // of loop on timeslice // write connt sprintf(filename, "%s.%.4d.t%.2dx%.2dy%.2dz%.2d.fw", outfile_prefix, Nconf, sx0, sx1, sx2, sx3); ofs = fopen(filename, "w"); if(ofs == NULL) { fprintf(stderr, "[] Error, could not open file %s for writing\n", filename); exit(3); } fprintf(ofs, "#%12.8f%3d%3d%3d%3d%8.4f%6d\n", g_kappa, T_global, LX, LY, LZ, g_mu, Nconf); for(icomp=0; icomp<num_component; icomp++) { ir = sx0; fprintf(ofs, "%3d%3d%3d%16.7e%16.7e%6d\n", gamma_component[0][icomp], gamma_component[1][icomp], 0, connt[2*(icomp*T+ir)], 0., Nconf); for(it=1;it<T/2;it++) { ir = ( it + sx0 ) % T_global; ir2 = ( (T_global - it) + sx0 ) % T_global; fprintf(ofs, "%3d%3d%3d%16.7e%16.7e%6d\n", gamma_component[0][icomp], gamma_component[1][icomp], it, connt[2*(icomp*T+ir)], connt[2*(icomp*T+ir2)], Nconf); } ir = ( it + sx0 ) % T_global; fprintf(ofs, "%3d%3d%3d%16.7e%16.7e%6d\n", gamma_component[0][icomp], gamma_component[1][icomp], it, connt[2*(icomp*T+ir)], 0., Nconf); } fclose(ofs); sprintf(filename, "%s.%.4d.t%.2dx%.2dy%.2dz%.2d.bw", outfile_prefix, Nconf, sx0, sx1, sx2, sx3); ofs = fopen(filename, "w"); if(ofs == NULL) { fprintf(stderr, "[] Error, could not open file %s for writing\n", filename); exit(3); } fprintf(ofs, "#%12.8f%3d%3d%3d%3d%8.4f%6d\n", g_kappa, T_global, LX, LY, LZ, g_mu, Nconf); for(icomp=0; icomp<num_component; icomp++) { ir = sx0; fprintf(ofs, "%3d%3d%3d%16.7e%16.7e%6d\n", gamma_component[0][icomp], gamma_component[1][icomp], 0, connt[2*(num_component*T+icomp*T+ir)], 0., Nconf); for(it=1;it<T/2;it++) { ir = ( it + sx0 ) % T_global; ir2 = ( (T_global - it) + sx0 ) % T_global; fprintf(ofs, "%3d%3d%3d%16.7e%16.7e%6d\n", gamma_component[0][icomp], gamma_component[1][icomp], it, connt[2*(num_component*T+icomp*T+ir)], connt[2*(num_component*T+icomp*T+ir2)], Nconf); } ir = ( it + sx0 ) % T_global; fprintf(ofs, "%3d%3d%3d%16.7e%16.7e%6d\n", gamma_component[0][icomp], gamma_component[1][icomp], it, connt[2*(num_component*T+icomp*T+ir)], 0., Nconf); } fclose(ofs); /*********************************************** * free the allocated memory, finalize ***********************************************/ free_geometry(); if(connt!= NULL) free(connt); if(connq!= NULL) free(connq); if(gauge_trafo != NULL) free(gauge_trafo); if(g_spinor_field!=NULL) { for(i=0; i<no_fields; i++) free(g_spinor_field[i]); free(g_spinor_field); g_spinor_field=(double**)NULL; } if(spinor_field_checksum !=NULL) free(spinor_field_checksum); if(g_gauge_field != NULL) free(g_gauge_field); for(i=0;i<g_num_threads;i++) { free_fp(uprop+i); } for(i=0;i<g_num_threads;i++) { free_fp(fp1+i); } for(i=0;i<g_num_threads;i++) { free_fp(fp2+i); } for(i=0;i<g_num_threads;i++) { free_fp(fp3+i); } for(i=0;i<g_num_threads;i++) { free_fp(fpaux+i); } for(i=0;i<g_num_threads;i++) { free_sp(sp1+i); } for(i=0;i<g_num_threads;i++) { free_sp(sp2+i); } if(uprop!=NULL) free(uprop); if(fp1!=NULL) free(fp1); if(fp2!=NULL) free(fp2); if(fp3!=NULL) free(fp3); if(fpaux!=NULL) free(fpaux); if(sp1!=NULL) free(sp1); if(sp2!=NULL) free(sp2); free(in); fftwnd_destroy_plan(plan_p); g_the_time = time(NULL); fprintf(stdout, "# [] %s# [] end fo run\n", ctime(&g_the_time)); fflush(stdout); fprintf(stderr, "# [] %s# [] end fo run\n", ctime(&g_the_time)); fflush(stderr); #ifdef MPI MPI_Finalize(); #endif return(0); }
int main(int argc, char **argv) { int c, mu; int filename_set = 0; int sl0, sl1, sl2, sl3; double *disc; double vp1[8], vp2[8], vp3[8], vp4[8], vp5[8]; char filename[200]; while ((c = getopt(argc, argv, "h?vf:")) != -1) { switch (c) { case 'v': verbose = 1; break; case 'f': strcpy(filename, optarg); filename_set=1; break; case 'h': case '?': default: usage(); break; } } /************************************** * set the default values, read input **************************************/ if(filename_set==0) strcpy(filename, "cvc.input.test"); fprintf(stdout, "# Reading test input from file %s\n", filename); read_input_parser(filename); /* some checks on the input data */ if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) { if(g_proc_id==0) fprintf(stdout, "T and L's must be set\n"); usage(); } if(g_kappa == 0.) { if(g_proc_id==0) fprintf(stdout, "kappa should be > 0.n"); usage(); } /* initialize MPI parameters */ mpi_init(argc, argv); T = T_global; if(init_geometry() != 0) { fprintf(stderr, "ERROR from init_geometry\n"); exit(1); } geometry(); /******************************** * the source locaton ********************************/ sl0 = g_source_location/(LX*LY*LZ); sl1 = ( g_source_location%(LX*LY*LZ) ) / (LY*LZ); sl2 = ( g_source_location%(LY*LZ) ) / (LZ); sl3 = g_source_location%LZ; fprintf(stdout, "# global sl = (%d, %d, %d, %d)\n", sl0, sl1, sl2, sl3); if( (disc = (double*)malloc(32*VOLUME*sizeof(double))) == (double*)NULL) { exit(102); } /******************************************************************* * (1) comparison of results from * - avc_disc_stochastic * - avc_disc_hpe and avc_disc_hpe5 * - vp_disc_hpe_loops_red/vp_disc_hpe_stoch to 3rd and 5th order *******************************************************************/ sprintf(filename, "outcvc_X.%.4d.%.4d", Nconf, Nsave); fprintf(stdout, "\n# Reading avc_disc_stochastic data from file %s\n", filename); read_contraction(disc, NULL, filename, 4); for(mu=0; mu<4; mu++) { vp1[2*mu ] = disc[_GWI(mu,g_source_location,VOLUME) ] / 60.; vp1[2*mu+1] = disc[_GWI(mu,g_source_location,VOLUME)+1] / 60.; } sprintf(filename, "cvc_hpe_X.%.4d.%.4d", Nconf, Nsave); fprintf(stdout, "\n# Reading avc_disc_hpe data from file %s\n", filename); read_contraction(disc, NULL, filename, 4); for(mu=0; mu<4; mu++) { vp2[2*mu ] = disc[_GWI(mu,g_source_location,VOLUME) ] / 60.; vp2[2*mu+1] = disc[_GWI(mu,g_source_location,VOLUME)+1] / 60.; } sprintf(filename, "cvc_hpe5_X.%.4d.%.4d", Nconf, Nsave); fprintf(stdout, "\n# Reading avc_disc_hpe5 data from file %s\n", filename); read_lime_contraction(disc, filename, 4, 0); for(mu=0; mu<4; mu++) { vp3[2*mu ] = disc[_GWI(mu,g_source_location,VOLUME) ]; vp3[2*mu+1] = disc[_GWI(mu,g_source_location,VOLUME)+1]; } sprintf(filename, "vp_disc_hpe03_X.%.4d.%.4d", Nconf, Nsave); fprintf(stdout, "\n# Reading vp_disc_hpe03 data from file %s\n", filename); read_lime_contraction(disc, filename, 4, 0); for(mu=0; mu<4; mu++) { vp4[2*mu ] = disc[_GWI(mu,g_source_location,VOLUME) ]; vp4[2*mu+1] = disc[_GWI(mu,g_source_location,VOLUME)+1]; } sprintf(filename, "vp_disc_hpe05_X.%.4d.%.4d", Nconf, Nsave); fprintf(stdout, "\n# Reading vp_disc_hpe05 data from file %s\n", filename); read_lime_contraction(disc, filename, 4, 0); for(mu=0; mu<4; mu++) { vp5[2*mu ] = disc[_GWI(mu,g_source_location,VOLUME) ]; vp5[2*mu+1] = disc[_GWI(mu,g_source_location,VOLUME)+1]; } for(mu=0; mu<4; mu++) { fprintf(stdout, "\n#--------------------------------------------\n"\ "# mu = %d\n", mu); fprintf(stdout, "%30s%30s%30s\n", "method", "real part", "imaginary part"); fprintf(stdout, "%30s%30.16e%30.16e\n", "avc_disc_stochastic", vp1[2*mu], vp1[2*mu+1]); fprintf(stdout, "%30s%30.16e%30.16e\n", "avc_disc_hpe", vp2[2*mu], vp2[2*mu+1]); fprintf(stdout, "%30s%30.16e%30.16e\n", "avc_disc_hpe5", vp3[2*mu], vp3[2*mu+1]); fprintf(stdout, "%30s%30.16e%30.16e\n", "vp_disc_hpe03", vp4[2*mu], vp4[2*mu+1]); fprintf(stdout, "%30s%30.16e%30.16e\n", "vp_disc_hpe05", vp5[2*mu], vp5[2*mu+1]); } fprintf(stdout, "\n#=======================================================\n"); /******************************************************************* * (2) comparison of results from * - lvc_disc_stochastic * - lvc_disc_hpe for 4th and 6th order *******************************************************************/ sprintf(filename, "outlvc_X.%.4d.%.4d", Nconf, Nsave); fprintf(stdout, "\n# Reading lvc_disc_stochastic data from file %s\n", filename); read_contraction(disc, NULL, filename, 4); for(mu=0; mu<4; mu++) { vp1[2*mu ] = disc[_GWI(mu,g_source_location,VOLUME) ] / 60.; vp1[2*mu+1] = disc[_GWI(mu,g_source_location,VOLUME)+1] / 60.; } sprintf(filename, "lvc_disc_hpe04_X.%.4d.%.4d", Nconf, Nsave); fprintf(stdout, "\n# Reading lvc_disc_hpe04 data from file %s\n", filename); read_lime_contraction(disc, filename, 4, 0); for(mu=0; mu<4; mu++) { vp2[2*mu ] = disc[_GWI(mu,g_source_location,VOLUME) ]; vp2[2*mu+1] = disc[_GWI(mu,g_source_location,VOLUME)+1]; } sprintf(filename, "lvc_disc_hpe06_X.%.4d.%.4d", Nconf, Nsave); fprintf(stdout, "\n# Reading lvc_disc_hpe06 data from file %s\n", filename); read_lime_contraction(disc, filename, 4, 0); for(mu=0; mu<4; mu++) { vp3[2*mu ] = disc[_GWI(mu,g_source_location,VOLUME) ]; vp3[2*mu+1] = disc[_GWI(mu,g_source_location,VOLUME)+1]; } for(mu=0; mu<4; mu++) { fprintf(stdout, "\n#--------------------------------------------\n"\ "# mu = %d\n", mu); fprintf(stdout, "%30s%30s%30s\n", "method", "real part", "imaginary part"); fprintf(stdout, "%30s%30.16e%30.16e\n", "lvc_disc_stochastic", vp1[2*mu], vp1[2*mu+1]); fprintf(stdout, "%30s%30.16e%30.16e\n", "lvc_disc_hpe04", vp2[2*mu], vp2[2*mu+1]); fprintf(stdout, "%30s%30.16e%30.16e\n", "lvc_disc_hpe06", vp3[2*mu], vp3[2*mu+1]); } fprintf(stdout, "\n#=======================================================\n"); /******************************************************************* * (3) comparison of results from * - vp_disc_hpe_mc1/2 for 3rd and 5th order *******************************************************************/ sprintf(filename, "vp_disc_hpe-01_mc2_X.%.4d.%.4d", Nconf, Nsave); fprintf(stdout, "\n# Reading vp_disc_hpe-01_mc2 data from file %s\n", filename); read_lime_contraction(disc, filename, 4, 0); for(mu=0; mu<4; mu++) { vp1[2*mu ] = disc[_GWI(mu,g_source_location,VOLUME) ]; vp1[2*mu+1] = disc[_GWI(mu,g_source_location,VOLUME)+1]; } sprintf(filename, "vp_disc_hpe03_mc2_X.%.4d.%.4d", Nconf, Nsave); fprintf(stdout, "\n# Reading vp_disc_hpe03_mc2 data from file %s\n", filename); read_lime_contraction(disc, filename, 4, 0); for(mu=0; mu<4; mu++) { vp2[2*mu ] = disc[_GWI(mu,g_source_location,VOLUME) ]; vp2[2*mu+1] = disc[_GWI(mu,g_source_location,VOLUME)+1]; } sprintf(filename, "vp_disc_hpe05_mc2_X.%.4d.%.4d", Nconf, Nsave); fprintf(stdout, "\n# Reading vp_disc_hpe05_mc2 data from file %s\n", filename); read_lime_contraction(disc, filename, 4, 0); for(mu=0; mu<4; mu++) { vp3[2*mu ] = disc[_GWI(mu,g_source_location,VOLUME) ]; vp3[2*mu+1] = disc[_GWI(mu,g_source_location,VOLUME)+1]; } for(mu=0; mu<4; mu++) { fprintf(stdout, "\n#--------------------------------------------\n"\ "# mu = %d\n", mu); fprintf(stdout, "%30s%30s%30s\n", "method", "real part", "imaginary part"); fprintf(stdout, "%30s%30.16e%30.16e\n", "vp_disc_hpe00_mc", vp1[2*mu], vp1[2*mu+1]); fprintf(stdout, "%30s%30.16e%30.16e\n", "vp_disc_hpe03_mc", vp2[2*mu], vp2[2*mu+1]); fprintf(stdout, "%30s%30.16e%30.16e\n", "vp_disc_hpe05_mc", vp3[2*mu], vp3[2*mu+1]); } fprintf(stdout, "\n#=======================================================\n"); /*********************************************** * free the allocated memory, finalize ***********************************************/ free_geometry(); free(disc); #ifdef MPI MPI_Finalize(); #endif return(0); }