RockPhysicsInversion4D::~RockPhysicsInversion4D() { for(int i =0;i<4;i++) fftw_free(smoothingFilter_[i]); for(int i=0; i<2; i++){ for (int j=0; j<nf_[0]; j++){ delete meanRockPrediction_(i,j); } } fftwnd_destroy_plan(fftplan1_); fftwnd_destroy_plan(fftplan2_); }
int F77_FUNC_ (destroy_plan_3d, DESTROY_PLAN_3D)(fftwnd_plan *p) { if ( *p != NULL ) fftwnd_destroy_plan(*p); else fprintf(stderr," *** DESTROY_PLAN_3D: warning empty plan ***\n"); return 0; }
//----------------------------------------------------------------------- int fdct_wrapping_wavelet(CpxOffMat& Xhgh, vector<CpxNumMat>& csc) { int N1 = Xhgh.m(); int N2 = Xhgh.n(); int F1 = -Xhgh.s(); int F2 = -Xhgh.t(); CpxNumMat T(N1, N2); fdct_wrapping_ifftshift(Xhgh, T); fftwnd_plan p ; #pragma omp critical { p = fftw2d_create_plan(N2, N1, FFTW_BACKWARD, FFTW_ESTIMATE | FFTW_IN_PLACE); } fftwnd_one(p, (fftw_complex*)T.data(), NULL); #pragma omp critical { fftwnd_destroy_plan(p); } double sqrtprod = sqrt(double(N1*N2)); for(int j=0; j<N2; j++) for(int i=0; i<N1; i++) T(i,j) /= sqrtprod; csc[0] = T; //csc[0].resize(N1, N2); //fdct_wrapping_fftshift(T, csc[0]); return 0; }
//------------------------------------------------------------------------------------ int fdct3d_inverse_center(int N1,int N2,int N3,int b, double L1,double L2,double L3, int s, CpxCrvletPrtd& C, CpxNumTnsBlkd& W) { int mpirank; MPI_Comm_rank(MPI_COMM_WORLD, &mpirank); vector< vector<int> >& Cowners = C.owners(); if(Cowners[0][0]==mpirank) { int S1, S2, S3; int F1, F2, F3; double R1, R2, R3; fdct3d_rangecompute(L1, L2, L3, S1, S2, S3, F1, F2, F3, R1, R2, R3); DblOffVec big1(S1); fdct3d_lowpass(L1, big1); DblOffVec big2(S2); fdct3d_lowpass(L2, big2); DblOffVec big3(S3); fdct3d_lowpass(L3, big3); CpxNumTns T(S1,S2,S3); CpxNumTns& Cblk = C.block(0,0); //center block T = Cblk; fftwnd_plan p = fftw3d_create_plan(S3,S2,S1, FFTW_FORWARD, FFTW_ESTIMATE | FFTW_IN_PLACE); fftwnd_one(p, (fftw_complex*)T.data(), NULL); fftwnd_destroy_plan(p); double sqrtprod = sqrt(double(S1*S2*S3)); for(int i=0; i<S1; i++) for(int j=0; j<S2; j++) for(int k=0; k<S3; k++) T(i,j,k) /= sqrtprod; CpxOffTns A(S1,S2,S3); fdct3d_fftshift(S1,S2,S3,T,A); for(int i=-S1/2; i<-S1/2+S1; i++) for(int j=-S2/2; j<-S2/2+S2; j++) for(int k=-S3/2; k<-S3/2+S3; k++) { int bi,bj,bk; int oi,oj,ok; fdct3d_position_aux(N1,N2,N3,b, i,j,k, bi,bj,bk,oi,oj,ok); CpxNumTns& Wblk = W.block(bi,bj,bk); Wblk(oi,oj,ok) += A(i,j,k) * (big1(i)*big2(j)*big3(k)); } //done } return 0; }
void testnd_out_of_place(int rank, int *n, fftw_direction dir, fftwnd_plan validated_plan) { int istride, ostride; int N, dim, i; fftw_complex *in1, *in2, *out1, *out2; fftwnd_plan p; int flags = measure_flag | wisdom_flag; if (coinflip()) flags |= FFTW_THREADSAFE; N = 1; for (dim = 0; dim < rank; ++dim) N *= n[dim]; in1 = (fftw_complex *) fftw_malloc(N * MAX_STRIDE * sizeof(fftw_complex)); out1 = (fftw_complex *) fftw_malloc(N * MAX_STRIDE * sizeof(fftw_complex)); in2 = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex)); out2 = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex)); p = fftwnd_create_plan(rank, n, dir, flags); for (istride = 1; istride <= MAX_STRIDE; ++istride) { /* generate random inputs */ for (i = 0; i < N; ++i) { int j; c_re(in2[i]) = DRAND(); c_im(in2[i]) = DRAND(); for (j = 0; j < istride; ++j) { c_re(in1[i * istride + j]) = c_re(in2[i]); c_im(in1[i * istride + j]) = c_im(in2[i]); } } for (ostride = 1; ostride <= MAX_STRIDE; ++ostride) { int howmany = (istride < ostride) ? istride : ostride; if (howmany != 1 || istride != 1 || ostride != 1 || coinflip()) fftwnd_threads(nthreads, p, howmany, in1, istride, 1, out1, ostride, 1); else fftwnd_threads_one(nthreads, p, in1, out1); fftwnd(validated_plan, 1, in2, 1, 1, out2, 1, 1); for (i = 0; i < howmany; ++i) CHECK(compute_error_complex(out1 + i, ostride, out2, 1, N) < TOLERANCE, "testnd_out_of_place: wrong answer"); } } fftwnd_destroy_plan(p); fftw_free(out2); fftw_free(in2); fftw_free(out1); fftw_free(in1); }
void destroy_maxwell_data(maxwell_data *d) { if (d) { int i; for (i = 0; i < d->nplans; ++i) { #if defined(HAVE_FFTW3) FFTW(destroy_plan)((fftplan) (d->plans[i])); FFTW(destroy_plan)((fftplan) (d->iplans[i])); #elif defined(HAVE_FFTW) # ifdef HAVE_MPI # ifdef SCALAR_COMPLEX fftwnd_mpi_destroy_plan((fftplan) (d->plans[i])); fftwnd_mpi_destroy_plan((fftplan) (d->iplans[i])); # else /* not SCALAR_COMPLEX */ rfftwnd_mpi_destroy_plan((fftplan) (d->plans[i])); rfftwnd_mpi_destroy_plan((fftplan) (d->iplans[i])); # endif /* not SCALAR_COMPLEX */ # else /* not HAVE_MPI */ # ifdef SCALAR_COMPLEX fftwnd_destroy_plan((fftplan) (d->plans[i])); fftwnd_destroy_plan((fftplan) (d->iplans[i])); # else /* not SCALAR_COMPLEX */ rfftwnd_destroy_plan((fftplan) (d->plans[i])); rfftwnd_destroy_plan((fftplan) (d->iplans[i])); # endif /* not SCALAR_COMPLEX */ # endif /* not HAVE_MPI */ #endif /* HAVE FFTW */ } free(d->eps_inv); #if defined(HAVE_FFTW3) FFTW(free)(d->fft_data); if (d->fft_data2 != d->fft_data) FFTW(free)(d->fft_data2); #else free(d->fft_data); #endif free(d->k_plus_G); free(d->k_plus_G_normsqr); free(d); } }
void test_speed_nd_aux(struct size sz, fftw_direction dir, int flags, int specific) { fftw_complex *in; fftwnd_plan plan; double t; fftw_time begin, end; int i, N; /* only bench in-place multi-dim transforms */ flags |= FFTW_IN_PLACE; N = 1; for (i = 0; i < sz.rank; ++i) N *= (sz.narray[i]); in = (fftw_complex *) fftw_malloc(N * howmany_fields * sizeof(fftw_complex)); if (specific) { begin = fftw_get_time(); plan = fftwnd_create_plan_specific(sz.rank, sz.narray, dir, speed_flag | flags | wisdom_flag | no_vector_flag, in, howmany_fields, 0, 1); } else { begin = fftw_get_time(); plan = fftwnd_create_plan(sz.rank, sz.narray, dir, speed_flag | flags | wisdom_flag | no_vector_flag); } end = fftw_get_time(); CHECK(plan != NULL, "can't create plan"); t = fftw_time_to_sec(fftw_time_diff(end, begin)); WHEN_VERBOSE(2, printf("time for planner: %f s\n", t)); WHEN_VERBOSE(2, printf("\n")); WHEN_VERBOSE(2, (fftwnd_print_plan(plan))); WHEN_VERBOSE(2, printf("\n")); FFTW_TIME_FFT(fftwnd(plan, howmany_fields, in, howmany_fields, 1, 0, 0, 0), in, N * howmany_fields, t); fftwnd_destroy_plan(plan); WHEN_VERBOSE(1, printf("time for one fft: %s", smart_sprint_time(t))); WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / N))); WHEN_VERBOSE(1, printf("\"mflops\" = 5 (N log2 N) / (t in microseconds)" " = %f\n", howmany_fields * mflops(t, N))); fftw_free(in); WHEN_VERBOSE(1, printf("\n")); }
/* * Class: jfftw_complex_nd_Plan * Method: destroyPlan * Signature: ()V */ JNIEXPORT void JNICALL Java_jfftw_complex_nd_Plan_destroyPlan( JNIEnv* env, jobject obj ) { jclass clazz = (*env)->GetObjectClass( env, obj ); jfieldID id = (*env)->GetFieldID( env, clazz, "plan", "[B" ); jbyteArray arr = (jbyteArray)(*env)->GetObjectField( env, obj, id ); unsigned char* carr = (*env)->GetByteArrayElements( env, arr, 0 ); fftwnd_destroy_plan( *(fftwnd_plan*)carr ); (*env)->ReleaseByteArrayElements( env, arr, carr, 0 ); (*env)->SetObjectField( env, obj, id, NULL ); }
void testnd_correctness(struct size sz, fftw_direction dir, int alt_api, int specific, int force_buf) { fftwnd_plan validated_plan; validated_plan = fftwnd_create_plan(sz.rank, sz.narray, dir, measure_flag | wisdom_flag); testnd_out_of_place(sz.rank, sz.narray, dir, validated_plan); testnd_in_place(sz.rank, sz.narray, dir, validated_plan, alt_api, specific, force_buf); fftwnd_destroy_plan(validated_plan); }
void Wavelet::fft1DInPlace() { // use the operator version of the fourier transform if(isReal_) { int flag; rfftwnd_plan plan; flag = FFTW_ESTIMATE | FFTW_IN_PLACE; plan = rfftwnd_create_plan(1, &nzp_ ,FFTW_REAL_TO_COMPLEX,flag); // // NBNB-PAL: The call rfftwnd_on_real_to_complex is causing UMRs in Purify. // rfftwnd_one_real_to_complex(plan,rAmp_,cAmp_); fftwnd_destroy_plan(plan); isReal_ = false; } }
void Wavelet::invFFT1DInPlace() { // use the operator version of the fourier transform if(!isReal_) { int flag; rfftwnd_plan plan; flag = FFTW_ESTIMATE | FFTW_IN_PLACE; plan= rfftwnd_create_plan(1,&nzp_,FFTW_COMPLEX_TO_REAL,flag); rfftwnd_one_complex_to_real(plan,cAmp_,rAmp_); fftwnd_destroy_plan(plan); isReal_=true; double scale= static_cast<double>(1.0/static_cast<double>(nzp_)); for(int i=0; i < nzp_; i++) rAmp_[i] = static_cast<fftw_real>(rAmp_[i]*scale); } }
void CMainFrame::Fft_back(Complex *array) { int m_iDem = m_iDem_ampl; fftw_complex *in = new fftw_complex[m_iDem * m_iDem]; size_t dxdy = m_iDem * m_iDem; for(size_t i = 0; i < dxdy; i++) { in[i].re = array[i].re; in[i].im = array[i].im; } fftwnd_plan p = fftw2d_create_plan(m_iDem, m_iDem, FFTW_BACKWARD, FFTW_ESTIMATE | FFTW_IN_PLACE); fftwnd_one(p, in, NULL); fftwnd_destroy_plan(p); for(size_t i = 0; i < dxdy; i++) { array[i].re = (float) in[i].re; array[i].im = (float) in[i].im; } delete[] in; }
//--------------------- int fdct_wrapping_invwavelet(vector<CpxNumMat>& csc, CpxOffMat& Xhgh) { assert(csc.size()==1); CpxNumMat& C = csc[0]; int N1 = C.m(); int N2 = C.n(); CpxNumMat T(C); //CpxNumMat T(N1, N2); fdct_wrapping_ifftshift(N1, N2, F1, F2, C, T); fftwnd_plan p = fftw2d_create_plan(N2, N1, FFTW_FORWARD, FFTW_ESTIMATE | FFTW_IN_PLACE); fftwnd_one(p, (fftw_complex*)T.data(), NULL); fftwnd_destroy_plan(p); double sqrtprod = sqrt(double(N1*N2)); for(int j=0; j<N2; j++) for(int i=0; i<N1; i++) T(i,j) /= sqrtprod; Xhgh.resize(N1, N2); fdct_wrapping_fftshift(T, Xhgh); return 0; }
void testnd_correctness(struct size sz, fftw_direction dir, int alt_api, int specific, int force_buf) { fftwnd_plan validated_plan; if (dir != FFTW_FORWARD) return; if (force_buf) return; validated_plan = fftwnd_create_plan(sz.rank, sz.narray, dir, measure_flag | wisdom_flag); CHECK(validated_plan != NULL, "can't create plan"); testnd_out_of_place(sz.rank, sz.narray, validated_plan); testnd_in_place(sz.rank, sz.narray, validated_plan, alt_api, specific); fftwnd_destroy_plan(validated_plan); }
static void iDoFFT(void *map, int width, int height, int inverse, int center, int normalize) { if (inverse && center) iCenterFFT((im_complex*)map, width, height, inverse); #ifdef USE_FFTW3 #if (IM_COMPLEX==IM_FLOAT) fftwf_plan plan = fftwf_plan_dft_2d(height, width, (fftwf_complex*)map, (fftwf_complex*)map, // in-place transform inverse?FFTW_BACKWARD:FFTW_FORWARD, FFTW_ESTIMATE); fftwf_execute(plan); fftwf_destroy_plan(plan); #else fftw_plan plan = fftw_plan_dft_2d(height, width, (fftw_complex*)map, (fftw_complex*)map, // in-place transform inverse ? FFTW_BACKWARD : FFTW_FORWARD, FFTW_ESTIMATE); fftw_execute(plan); fftw_destroy_plan(plan); #endif #else fftwnd_plan plan = fftw2d_create_plan(height, width, inverse?FFTW_BACKWARD:FFTW_FORWARD, FFTW_ESTIMATE|FFTW_IN_PLACE); fftwnd(plan, 1, (FFTW_COMPLEX*)map, 1, 0, 0, 0, 0); fftwnd_destroy_plan(plan); #endif if (!inverse && center) iCenterFFT((im_complex*)map, width, height, inverse); if (normalize) { im_real NM = (im_real)(width * height); int count = (int)(2*NM); if (normalize == 1) NM = (im_real)sqrt(NM); im_real *fmap = (im_real*)map; for (int i = 0; i < count; i++) *fmap++ /= NM; } }
int main(int argc, char **argv) { int c, i, mu, nu; int count = 0; int filename_set = 0; int dims[4] = {0,0,0,0}; int l_LX_at, l_LXstart_at; int x0, x1, x2, x3, ix, iix; int dxm[4], dxn[4], ixpm, ixpn; int sid; double *disc = (double*)NULL; double *work = (double*)NULL; double q[4], fnorm; int verbose = 0; int do_gt = 0; char filename[100]; double ratime, retime; double plaq, _2kappamu, hpe3_coeff, onepmutilde2, mutilde2; double spinor1[24], spinor2[24], U_[18], U1_[18], U2_[18]; double *gauge_trafo=(double*)NULL; complex w, w1, w2, *cp1, *cp2, *cp3; FILE *ofs; fftw_complex *in=(fftw_complex*)NULL; #ifdef MPI fftwnd_mpi_plan plan_p, plan_m; int *status; #else fftwnd_plan plan_p, plan_m; #endif #ifdef MPI MPI_Init(&argc, &argv); #endif while ((c = getopt(argc, argv, "h?vgf:")) != -1) { switch (c) { case 'v': verbose = 1; break; case 'g': do_gt = 1; break; case 'f': strcpy(filename, optarg); filename_set=1; break; case 'h': case '?': default: usage(); break; } } /* set the default values */ set_default_input_values(); if(filename_set==0) strcpy(filename, "cvc.input"); /* read the input file */ read_input(filename); /* some checks on the input data */ if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) { if(g_proc_id==0) fprintf(stdout, "T and L's must be set\n"); usage(); } if(g_kappa == 0.) { if(g_proc_id==0) fprintf(stdout, "kappa should be > 0.n"); usage(); } /* initialize MPI parameters */ mpi_init(argc, argv); #ifdef MPI if((status = (int*)calloc(g_nproc, sizeof(int))) == (int*)NULL) { MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); exit(7); } #endif /* initialize fftw */ dims[0]=T_global; dims[1]=LX; dims[2]=LY; dims[3]=LZ; #ifdef MPI plan_p = fftwnd_mpi_create_plan(g_cart_grid, 4, dims, FFTW_BACKWARD, FFTW_MEASURE); plan_m = fftwnd_mpi_create_plan(g_cart_grid, 4, dims, FFTW_FORWARD, FFTW_MEASURE); fftwnd_mpi_local_sizes(plan_p, &T, &Tstart, &l_LX_at, &l_LXstart_at, &FFTW_LOC_VOLUME); #else plan_p = fftwnd_create_plan(4, dims, FFTW_BACKWARD, FFTW_MEASURE | FFTW_IN_PLACE); plan_m = fftwnd_create_plan(4, dims, FFTW_FORWARD, FFTW_MEASURE | FFTW_IN_PLACE); T = T_global; Tstart = 0; l_LX_at = LX; l_LXstart_at = 0; FFTW_LOC_VOLUME = T*LX*LY*LZ; #endif fprintf(stdout, "# [%2d] fftw parameters:\n"\ "# [%2d] T = %3d\n"\ "# [%2d] Tstart = %3d\n"\ "# [%2d] l_LX_at = %3d\n"\ "# [%2d] l_LXstart_at = %3d\n"\ "# [%2d] FFTW_LOC_VOLUME = %3d\n", g_cart_id, g_cart_id, T, g_cart_id, Tstart, g_cart_id, l_LX_at, g_cart_id, l_LXstart_at, g_cart_id, FFTW_LOC_VOLUME); #ifdef MPI if(T==0) { fprintf(stderr, "[%2d] local T is zero; exit\n", g_cart_id); MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); exit(2); } #endif if(init_geometry() != 0) { fprintf(stderr, "ERROR from init_geometry\n"); #ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); #endif exit(1); } geometry(); /* read the gauge field */ alloc_gauge_field(&g_gauge_field, VOLUMEPLUSRAND); sprintf(filename, "%s.%.4d", gaugefilename_prefix, Nconf); if(g_cart_id==0) fprintf(stdout, "reading gauge field from file %s\n", filename); read_lime_gauge_field_doubleprec(filename); xchange_gauge(); /* measure the plaquette */ plaquette(&plaq); if(g_cart_id==0) fprintf(stdout, "measured plaquette value: %25.16e\n", plaq); if(do_gt==1) { /*********************************** * initialize gauge transformation ***********************************/ init_gauge_trafo(&gauge_trafo, 1.); apply_gt_gauge(gauge_trafo); plaquette(&plaq); if(g_cart_id==0) fprintf(stdout, "measured plaquette value after gauge trafo: %25.16e\n", plaq); } /**************************************** * allocate memory for the spinor fields ****************************************/ no_fields = 3; g_spinor_field = (double**)calloc(no_fields, sizeof(double*)); for(i=0; i<no_fields; i++) alloc_spinor_field(&g_spinor_field[i], VOLUMEPLUSRAND); /**************************************** * allocate memory for the contractions ****************************************/ disc = (double*)calloc( 8*VOLUME, sizeof(double)); if( disc == (double*)NULL ) { fprintf(stderr, "could not allocate memory for disc\n"); # ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); # endif exit(3); } for(ix=0; ix<8*VOLUME; ix++) disc[ix] = 0.; work = (double*)calloc(48*VOLUME, sizeof(double)); if( work == (double*)NULL ) { fprintf(stderr, "could not allocate memory for work\n"); # ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); # endif exit(3); } /**************************************** * prepare Fourier transformation arrays ****************************************/ in = (fftw_complex*)malloc(FFTW_LOC_VOLUME*sizeof(fftw_complex)); if(in==(fftw_complex*)NULL) { #ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); #endif exit(4); } /*********************************************** * start loop on source id.s ***********************************************/ for(sid=g_sourceid; sid<=g_sourceid2; sid++) { /******************************** * read the first propagator ********************************/ #ifdef MPI ratime = MPI_Wtime(); #else ratime = (double)clock() / CLOCKS_PER_SEC; #endif if(format==0) { sprintf(filename, "%s.%.4d.%.2d.inverted", filename_prefix, Nconf, sid); if(read_lime_spinor(g_spinor_field[2], filename, 0) != 0) break; } else if(format==1) { sprintf(filename, "%s.%.4d.%.5d.inverted", filename_prefix, Nconf, sid); if(read_cmi(g_spinor_field[2], filename) != 0) break; } xchange_field(g_spinor_field[2]); #ifdef MPI retime = MPI_Wtime(); #else retime = (double)clock() / CLOCKS_PER_SEC; #endif fprintf(stdout, "time to read prop.: %e seconds\n", retime-ratime); if(do_gt==1) { /****************************************** * gauge transform the propagators for sid ******************************************/ for(ix=0; ix<VOLUME; ix++) { _fv_eq_cm_ti_fv(spinor1, gauge_trafo+18*ix, g_spinor_field[2]+_GSI(ix)); _fv_eq_fv(g_spinor_field[2]+_GSI(ix), spinor1); } xchange_field(g_spinor_field[2]); } /************************************************ * calculate the source: apply Q_phi_tbc ************************************************/ #ifdef MPI ratime = MPI_Wtime(); #else ratime = (double)clock() / CLOCKS_PER_SEC; #endif Q_phi_tbc(g_spinor_field[0], g_spinor_field[2]); xchange_field(g_spinor_field[0]); #ifdef MPI retime = MPI_Wtime(); #else retime = (double)clock() / CLOCKS_PER_SEC; #endif if(g_cart_id==0) fprintf(stdout, "time to calculate source: %e seconds\n", retime-ratime); /************************************************ * HPE: apply BH5 ************************************************/ BH5(g_spinor_field[1], g_spinor_field[2]); for(ix=0; ix<8*VOLUME; ix++) {disc[ix] = 0.;} /* add new contractions to (existing) disc */ # ifdef MPI ratime = MPI_Wtime(); # else ratime = (double)clock() / CLOCKS_PER_SEC; # endif for(mu=0; mu<4; mu++) { /* loop on Lorentz index of the current */ iix = _GWI(mu,0,VOLUME); for(ix=0; ix<VOLUME; ix++) { /* loop on lattice sites */ _cm_eq_cm_ti_co(U_, &g_gauge_field[_GGI(ix, mu)], &co_phase_up[mu]); /* first contribution */ _fv_eq_cm_ti_fv(spinor1, U_, &g_spinor_field[1][_GSI(g_iup[ix][mu])]); _fv_eq_gamma_ti_fv(spinor2, mu, spinor1); _fv_mi_eq_fv(spinor2, spinor1); _co_eq_fv_dag_ti_fv(&w, &g_spinor_field[0][_GSI(ix)], spinor2); disc[iix ] -= 0.5 * w.re; disc[iix+1] -= 0.5 * w.im; /* second contribution */ _fv_eq_cm_dag_ti_fv(spinor1, U_, &g_spinor_field[1][_GSI(ix)]); _fv_eq_gamma_ti_fv(spinor2, mu, spinor1); _fv_pl_eq_fv(spinor2, spinor1); _co_eq_fv_dag_ti_fv(&w, &g_spinor_field[0][_GSI(g_iup[ix][mu])], spinor2); disc[iix ] -= 0.5 * w.re; disc[iix+1] -= 0.5 * w.im; iix += 2; } /* of ix */ } /* of mu */ #ifdef MPI retime = MPI_Wtime(); #else retime = (double)clock() / CLOCKS_PER_SEC; #endif if(g_cart_id==0) fprintf(stdout, "[%2d] time to contract cvc: %e seconds\n", g_cart_id, retime-ratime); #ifdef MPI ratime = MPI_Wtime(); #else ratime = (double)clock() / CLOCKS_PER_SEC; #endif /* Fourier transform data, copy to work */ for(mu=0; mu<4; mu++) { memcpy((void*)in, (void*)(disc+_GWI(mu,0,VOLUME)), 2*VOLUME*sizeof(double)); #ifdef MPI fftwnd_mpi(plan_p, 1, in, NULL, FFTW_NORMAL_ORDER); #else fftwnd_one(plan_p, in, NULL); #endif memcpy((void*)(work+_GWI(mu,0,VOLUME)), (void*)in, 2*VOLUME*sizeof(double)); } /******************************** * read the second propagator ********************************/ #ifdef MPI ratime = MPI_Wtime(); #else ratime = (double)clock() / CLOCKS_PER_SEC; #endif if(format==0) { sprintf(filename, "%s.%.4d.%.2d.inverted", filename_prefix, Nconf, sid+g_resume); if(read_lime_spinor(g_spinor_field[2], filename, 0) != 0) break; } else if(format==1) { sprintf(filename, "%s.%.4d.%.5d.inverted", filename_prefix, Nconf, sid+g_resume); if(read_cmi(g_spinor_field[2], filename) != 0) break; } xchange_field(g_spinor_field[2]); #ifdef MPI retime = MPI_Wtime(); #else retime = (double)clock() / CLOCKS_PER_SEC; #endif fprintf(stdout, "time to read prop.: %e seconds\n", retime-ratime); if(do_gt==1) { /****************************************** * gauge transform the propagators for sid ******************************************/ for(ix=0; ix<VOLUME; ix++) { _fv_eq_cm_ti_fv(spinor1, gauge_trafo+18*ix, g_spinor_field[2]+_GSI(ix)); _fv_eq_fv(g_spinor_field[2]+_GSI(ix), spinor1); } xchange_field(g_spinor_field[2]); } /************************************************ * calculate the source: apply Q_phi_tbc ************************************************/ #ifdef MPI ratime = MPI_Wtime(); #else ratime = (double)clock() / CLOCKS_PER_SEC; #endif Q_phi_tbc(g_spinor_field[0], g_spinor_field[2]); xchange_field(g_spinor_field[0]); #ifdef MPI retime = MPI_Wtime(); #else retime = (double)clock() / CLOCKS_PER_SEC; #endif if(g_cart_id==0) fprintf(stdout, "time to calculate source: %e seconds\n", retime-ratime); /************************************************ * HPE: apply BH5 ************************************************/ BH5(g_spinor_field[1], g_spinor_field[2]); for(ix=0; ix<8*VOLUME; ix++) disc[ix] = 0.; /* add new contractions to (existing) disc */ # ifdef MPI ratime = MPI_Wtime(); # else ratime = (double)clock() / CLOCKS_PER_SEC; # endif for(mu=0; mu<4; mu++) { /* loop on Lorentz index of the current */ iix = _GWI(mu,0,VOLUME); for(ix=0; ix<VOLUME; ix++) { /* loop on lattice sites */ _cm_eq_cm_ti_co(U_, &g_gauge_field[_GGI(ix, mu)], &co_phase_up[mu]); /* first contribution */ _fv_eq_cm_ti_fv(spinor1, U_, &g_spinor_field[1][_GSI(g_iup[ix][mu])]); _fv_eq_gamma_ti_fv(spinor2, mu, spinor1); _fv_mi_eq_fv(spinor2, spinor1); _co_eq_fv_dag_ti_fv(&w, &g_spinor_field[0][_GSI(ix)], spinor2); disc[iix ] -= 0.5 * w.re; disc[iix+1] -= 0.5 * w.im; /* second contribution */ _fv_eq_cm_dag_ti_fv(spinor1, U_, &g_spinor_field[1][_GSI(ix)]); _fv_eq_gamma_ti_fv(spinor2, mu, spinor1); _fv_pl_eq_fv(spinor2, spinor1); _co_eq_fv_dag_ti_fv(&w, &g_spinor_field[0][_GSI(g_iup[ix][mu])], spinor2); disc[iix ] -= 0.5 * w.re; disc[iix+1] -= 0.5 * w.im; iix += 2; } /* of ix */ } /* of mu */ #ifdef MPI retime = MPI_Wtime(); #else retime = (double)clock() / CLOCKS_PER_SEC; #endif if(g_cart_id==0) fprintf(stdout, "[%2d] time to contract cvc: %e seconds\n", g_cart_id, retime-ratime); #ifdef MPI ratime = MPI_Wtime(); #else ratime = (double)clock() / CLOCKS_PER_SEC; #endif /* Fourier transform data, copy to work */ for(mu=0; mu<4; mu++) { memcpy((void*)in, (void*)(disc+_GWI(mu,0,VOLUME)), 2*VOLUME*sizeof(double)); #ifdef MPI fftwnd_mpi(plan_m, 1, in, NULL, FFTW_NORMAL_ORDER); #else fftwnd_one(plan_m, in, NULL); #endif memcpy((void*)(work+_GWI(4+mu,0,VOLUME)), (void*)in, 2*VOLUME*sizeof(double)); } fnorm = 1. / ((double)(T_global*LX*LY*LZ)); fprintf(stdout, "fnorm = %e\n", fnorm); for(mu=0; mu<4; mu++) { for(nu=0; nu<4; nu++) { cp1 = (complex*)(work+_GWI(mu,0,VOLUME)); cp2 = (complex*)(work+_GWI(4+nu,0,VOLUME)); cp3 = (complex*)(work+_GWI(8+4*mu+nu,0,VOLUME)); for(x0=0; x0<T; x0++) { q[0] = (double)(x0+Tstart) / (double)T_global; for(x1=0; x1<LX; x1++) { q[1] = (double)(x1) / (double)LX; for(x2=0; x2<LY; x2++) { q[2] = (double)(x2) / (double)LY; for(x3=0; x3<LZ; x3++) { q[3] = (double)(x3) / (double)LZ; ix = g_ipt[x0][x1][x2][x3]; w.re = cos( M_PI * (q[mu]-q[nu]) ); w.im = sin( M_PI * (q[mu]-q[nu]) ); _co_eq_co_ti_co(&w1, cp1, cp2); _co_eq_co_ti_co(cp3, &w1, &w); _co_ti_eq_re(cp3, fnorm); cp1++; cp2++; cp3++; } } } } } } /* save the result in momentum space */ sprintf(filename, "cvc_hpe5_ft.%.4d.%.2d", Nconf, sid); write_contraction(work+_GWI(8,0,VOLUME), NULL, filename, 16, 0, 0); #ifdef MPI retime = MPI_Wtime(); #else retime = (double)clock() / CLOCKS_PER_SEC; #endif if(g_cart_id==0) fprintf(stdout, "time to save cvc results: %e seconds\n", retime-ratime); } /* of loop on sid */ /*********************************************** * free the allocated memory, finalize ***********************************************/ free(g_gauge_field); for(i=0; i<no_fields; i++) free(g_spinor_field[i]); free(g_spinor_field); free_geometry(); fftw_free(in); free(disc); free(work); #ifdef MPI fftwnd_mpi_destroy_plan(plan_p); fftwnd_mpi_destroy_plan(plan_m); free(status); MPI_Finalize(); #else fftwnd_destroy_plan(plan_p); fftwnd_destroy_plan(plan_m); #endif return(0); }
/* * Create an fftwnd_plan specialized for specific arrays. (These * arrays are ignored, however, if they are NULL or if the flags do * not include FFTW_MEASURE.) The main advantage of being provided * arrays like this is that we can do runtime timing measurements of * our options, without worrying about allocating excessive scratch * space. */ fftwnd_plan fftwnd_create_plan_specific(int rank, const int *n, fftw_direction dir, int flags, fftw_complex *in, int istride, fftw_complex *out, int ostride) { fftwnd_plan p; if (!(p = fftwnd_create_plan_aux(rank, n, dir, flags))) return 0; if (!(flags & FFTW_MEASURE) || in == 0 || (!p->is_in_place && out == 0)) { /**** use default plan ****/ p->plans = fftwnd_create_plans_generic(fftwnd_new_plan_array(rank), rank, n, dir, flags); if (!p->plans) { fftwnd_destroy_plan(p); return 0; } if (flags & FFTWND_FORCE_BUFFERED) p->nbuffers = FFTWND_NBUFFERS; else p->nbuffers = FFTWND_DEFAULT_NBUFFERS; p->nwork = fftwnd_work_size(rank, n, flags, p->nbuffers + 1); if (p->nwork && !(flags & FFTW_THREADSAFE)) { p->work = (fftw_complex*) fftw_malloc(p->nwork * sizeof(fftw_complex)); if (!p->work) { fftwnd_destroy_plan(p); return 0; } } } else { /**** use runtime measurements to pick plan ****/ fftw_plan *plans_buf, *plans_nobuf; double t_buf, t_nobuf; p->nwork = fftwnd_work_size(rank, n, flags, FFTWND_NBUFFERS + 1); if (p->nwork && !(flags & FFTW_THREADSAFE)) { p->work = (fftw_complex*) fftw_malloc(p->nwork * sizeof(fftw_complex)); if (!p->work) { fftwnd_destroy_plan(p); return 0; } } else p->work = (fftw_complex*) NULL; /* two possible sets of 1D plans: */ plans_buf = fftwnd_create_plans_generic(fftwnd_new_plan_array(rank), rank, n, dir, flags); plans_nobuf = fftwnd_create_plans_specific(fftwnd_new_plan_array(rank), rank, n, p->n_after, dir, flags, in, istride, out, ostride); if (!plans_buf || !plans_nobuf) { destroy_plan_array(rank, plans_nobuf); destroy_plan_array(rank, plans_buf); fftwnd_destroy_plan(p); return 0; } /* time the two possible plans */ p->plans = plans_nobuf; p->nbuffers = 0; p->nwork = fftwnd_work_size(rank, n, flags, p->nbuffers + 1); t_nobuf = fftwnd_measure_runtime(p, in, istride, out, ostride); p->plans = plans_buf; p->nbuffers = FFTWND_NBUFFERS; p->nwork = fftwnd_work_size(rank, n, flags, p->nbuffers + 1); t_buf = fftwnd_measure_runtime(p, in, istride, out, ostride); /* pick the better one: */ if (t_nobuf < t_buf) { /* use unbuffered transform */ p->plans = plans_nobuf; p->nbuffers = 0; /* work array is unnecessarily large */ if (p->work) fftw_free(p->work); p->work = 0; destroy_plan_array(rank, plans_buf); /* allocate a work array of the correct size: */ p->nwork = fftwnd_work_size(rank, n, flags, p->nbuffers + 1); if (p->nwork && !(flags & FFTW_THREADSAFE)) { p->work = (fftw_complex*) fftw_malloc(p->nwork * sizeof(fftw_complex)); if (!p->work) { fftwnd_destroy_plan(p); return 0; } } } else { /* use buffered transform */ destroy_plan_array(rank, plans_nobuf); } } return p; }
void F77_FUNC_(fftwnd_f77_destroy_plan,FFTWND_F77_DESTROY_PLAN) (fftwnd_plan *p) { fftwnd_destroy_plan(*p); }
//----------------------------------------------------------------------- int fdct_wrapping_sepangle(double XL1, double XL2, int nbangle, CpxOffMat& Xhgh, vector<CpxNumMat>& csc) { //WEDGE ORDERING: from -45 degree, counter-clockwise typedef pair<int,int> intpair; map<intpair, fftwnd_plan> planmap; int nbquadrants = 4; int nd = nbangle / 4; int wcnt = 0; //backup CpxOffMat Xhghb(Xhgh); double XL1b = XL1; double XL2b = XL2; int qvec[] = {2,1,0,3}; for(int qi=0; qi<nbquadrants; qi++) { int q = qvec[qi]; //ROTATE data to its right position fdct_wrapping_rotate_forward(q, XL1b, XL2b, XL1, XL2); XL1 = abs(XL1); XL2 = abs(XL2); fdct_wrapping_rotate_forward(q, Xhghb, Xhgh); //figure out XS, XF, XR double XW1 = XL1/nd; double XW2 = XL2/nd; int XS1, XS2; int XF1, XF2; double XR1, XR2; fdct_wrapping_rangecompute(XL1, XL2, XS1, XS2, XF1, XF2, XR1, XR2); for(int w=nd-1; w>=0; w--) { double xs = XR1/4 - (XW1/2)/4; double xe = XR1; double ys = -XR2 + (w-0.5)*XW2; double ye = -XR2 + (w+1.5)*XW2; //x range int xn = int(ceil(xe-xs)); int yn = int(ceil(ye-ys)); //MAKE THEM ODD if(xn%2==0) xn++; if(yn%2==0) yn++; int xf = int(ceil(xs)); //int yf = int(ceil(ys)); //theta double thts, thtm, thte; //y direction if(w==0) { thts = atan2(-1.0, 1.0-1.0/nd); thtm = atan2(-1.0+1.0/nd, 1.0); thte = atan2(-1.0+3.0/nd, 1.0); } else if(w==nd-1) { thts = atan2(-1.0+(2.0*w-1.0)/nd, 1.0); thtm = atan2(-1.0+(2.0*w+1.0)/nd, 1.0); thte = atan2(1.0, 1.0-1.0/nd); } else { thts = atan2(-1.0+(2.0*w-1.0)/nd, 1.0); thtm = atan2(-1.0+(2.0*w+1.0)/nd, 1.0); thte = atan2(-1.0+(2.0*w+3.0)/nd, 1.0); } //wrapping int xh = xn/2; int yh = yn/2; //half length double R21 = XR2/XR1; //ratio CpxOffMat wpdata(xn,yn); for(int xcur=xf; xcur<xe; xcur++) { //for each layer int yfm = (int)ceil( max(-XR2, R21*xcur*tan(thts)) ); int yto = (int)floor( min(XR2, R21*xcur*tan(thte)) ); for(int ycur=yfm; ycur<=yto; ycur++) { int tmpx = xcur%xn; if(tmpx<-xh) tmpx+=xn; if(tmpx>=-xh+xn) tmpx-=xn; int tmpy = ycur%yn; if(tmpy<-yh) tmpy+=yn; if(tmpy>=-yh+yn) tmpy-=yn; wpdata(tmpx,tmpy) = Xhgh(xcur,ycur); //partition of unity double thtcur = atan2(ycur/XR2, xcur/XR1); double wtht; if(thtcur<thtm) { double l,r; fdct_wrapping_window((thtcur-thts)/(thtm-thts), l, r); wtht = l; } else { double l,r; fdct_wrapping_window((thtcur-thtm)/(thte-thtm), l, r); wtht = r; } double pou = wtht; wpdata(tmpx,tmpy) *= pou; } } //IFFT { //rotate backward CpxOffMat rpdata; fdct_wrapping_rotate_backward(q, wpdata, rpdata); //ifftshift int xn = rpdata.m(); int yn = rpdata.n(); //reset xn, yn CpxNumMat tpdata(xn,yn); fdct_wrapping_ifftshift(rpdata, tpdata); //ifft fftwnd_plan p = NULL; map<intpair,fftwnd_plan>::iterator mit=planmap.find( intpair(xn,yn) ); if(mit!=planmap.end()) { p = (*mit).second; } else { p = fftw2d_create_plan(yn, xn, FFTW_BACKWARD, FFTW_ESTIMATE | FFTW_IN_PLACE); planmap[ intpair(xn, yn) ] = p; } fftwnd_one(p, (fftw_complex*)tpdata.data(), NULL); double sqrtprod = sqrt(double(xn*yn)); for(int j=0; j<yn; j++) for(int i=0; i<xn; i++) tpdata(i,j) /= sqrtprod; //store csc[wcnt] = tpdata; } //fdct_wrapping_fftshift(xn,yn,xh,yh,tpdata,wpdata); //ROTATION //fdct_wrapping_rotate_backward(q, wpdata, csc[wcnt]); wcnt++; } //end of w loop } //end of q loop //PUT THE RIGHT DATA BACK Xhgh = Xhghb; XL1 = XL1b; XL2 = XL2b; for(map<intpair, fftwnd_plan>::iterator mit=planmap.begin(); mit!=planmap.end(); mit++) { fftwnd_plan p = (*mit).second; fftwnd_destroy_plan(p); } return 0; }
int main(int argc, char **argv) { int c, i, mu, nu; int count = 0; int filename_set = 0; int dims[4] = {0,0,0,0}; int l_LX_at, l_LXstart_at; int x0, x1, x2, x3, ix, iix; int dxm[4], dxn[4], ixpm, ixpn; int sid; double *disc = (double*)NULL; double *disc2 = (double*)NULL; double *work = (double*)NULL; double q[4], fnorm; int verbose = 0; int do_gt = 0; char filename[100], contype[200]; double ratime, retime; double plaq, _2kappamu, hpe3_coeff, onepmutilde2, mutilde2; double spinor1[24], spinor2[24], U_[18], U1_[18], U2_[18]; double *gauge_trafo=(double*)NULL; complex w, w1, w2, *cp1, *cp2, *cp3; FILE *ofs; fftw_complex *in=(fftw_complex*)NULL; #ifdef MPI fftwnd_mpi_plan plan_p, plan_m; #else fftwnd_plan plan_p, plan_m; #endif #ifdef MPI MPI_Init(&argc, &argv); #endif while ((c = getopt(argc, argv, "h?vgf:")) != -1) { switch (c) { case 'v': verbose = 1; break; case 'g': do_gt = 1; break; case 'f': strcpy(filename, optarg); filename_set=1; break; case 'h': case '?': default: usage(); break; } } /* set the default values */ if(filename_set==0) strcpy(filename, "cvc.input"); fprintf(stdout, "# Reading input from file %s\n", filename); read_input_parser(filename); /* some checks on the input data */ if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) { if(g_proc_id==0) fprintf(stdout, "T and L's must be set\n"); usage(); } if(g_kappa == 0.) { if(g_proc_id==0) fprintf(stdout, "kappa should be > 0.n"); usage(); } /* initialize MPI parameters */ mpi_init(argc, argv); /* initialize fftw */ dims[0]=T_global; dims[1]=LX; dims[2]=LY; dims[3]=LZ; #ifdef MPI plan_p = fftwnd_mpi_create_plan(g_cart_grid, 4, dims, FFTW_BACKWARD, FFTW_MEASURE); plan_m = fftwnd_mpi_create_plan(g_cart_grid, 4, dims, FFTW_FORWARD, FFTW_MEASURE); fftwnd_mpi_local_sizes(plan_p, &T, &Tstart, &l_LX_at, &l_LXstart_at, &FFTW_LOC_VOLUME); #else plan_p = fftwnd_create_plan(4, dims, FFTW_BACKWARD, FFTW_MEASURE | FFTW_IN_PLACE); plan_m = fftwnd_create_plan(4, dims, FFTW_FORWARD, FFTW_MEASURE | FFTW_IN_PLACE); T = T_global; Tstart = 0; l_LX_at = LX; l_LXstart_at = 0; FFTW_LOC_VOLUME = T*LX*LY*LZ; #endif fprintf(stdout, "# [%2d] fftw parameters:\n"\ "# [%2d] T = %3d\n"\ "# [%2d] Tstart = %3d\n"\ "# [%2d] l_LX_at = %3d\n"\ "# [%2d] l_LXstart_at = %3d\n"\ "# [%2d] FFTW_LOC_VOLUME = %3d\n", g_cart_id, g_cart_id, T, g_cart_id, Tstart, g_cart_id, l_LX_at, g_cart_id, l_LXstart_at, g_cart_id, FFTW_LOC_VOLUME); #ifdef MPI if(T==0) { fprintf(stderr, "[%2d] local T is zero; exit\n", g_cart_id); MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); exit(2); } #endif if(init_geometry() != 0) { fprintf(stderr, "ERROR from init_geometry\n"); #ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); #endif exit(1); } geometry(); /* read the gauge field */ alloc_gauge_field(&g_gauge_field, VOLUMEPLUSRAND); sprintf(filename, "%s.%.4d", gaugefilename_prefix, Nconf); if(g_cart_id==0) fprintf(stdout, "reading gauge field from file %s\n", filename); read_lime_gauge_field_doubleprec(filename); #ifdef MPI xchange_gauge(); #endif /* measure the plaquette */ plaquette(&plaq); if(g_cart_id==0) fprintf(stdout, "measured plaquette value: %25.16e\n", plaq); if(do_gt==1) { /*********************************** * initialize gauge transformation ***********************************/ init_gauge_trafo(&gauge_trafo, 1.); apply_gt_gauge(gauge_trafo); plaquette(&plaq); if(g_cart_id==0) fprintf(stdout, "measured plaquette value after gauge trafo: %25.16e\n", plaq); } /**************************************** * allocate memory for the spinor fields ****************************************/ no_fields = 3; g_spinor_field = (double**)calloc(no_fields, sizeof(double*)); for(i=0; i<no_fields; i++) alloc_spinor_field(&g_spinor_field[i], VOLUMEPLUSRAND); /**************************************** * allocate memory for the contractions ****************************************/ disc = (double*)calloc( 8*VOLUME, sizeof(double)); if( disc == (double*)NULL ) { fprintf(stderr, "could not allocate memory for disc\n"); # ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); # endif exit(3); } for(ix=0; ix<8*VOLUME; ix++) disc[ix] = 0.; disc2 = (double*)calloc( 8*VOLUME, sizeof(double)); if( disc2 == (double*)NULL ) { fprintf(stderr, "could not allocate memory for disc2\n"); # ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); # endif exit(3); } for(ix=0; ix<8*VOLUME; ix++) disc2[ix] = 0.; work = (double*)calloc(48*VOLUME, sizeof(double)); if( work == (double*)NULL ) { fprintf(stderr, "could not allocate memory for work\n"); # ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); # endif exit(3); } /**************************************** * prepare Fourier transformation arrays ****************************************/ in = (fftw_complex*)malloc(FFTW_LOC_VOLUME*sizeof(fftw_complex)); if(in==(fftw_complex*)NULL) { #ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); #endif exit(4); } /************************************************ * HPE: calculate coeff. of 3rd order term ************************************************/ _2kappamu = 2. * g_kappa * g_mu; onepmutilde2 = 1. + _2kappamu * _2kappamu; mutilde2 = _2kappamu * _2kappamu; hpe3_coeff = 16. * g_kappa*g_kappa*g_kappa*g_kappa * (1. + 6. * mutilde2 + mutilde2*mutilde2) / onepmutilde2 / onepmutilde2 / onepmutilde2 / onepmutilde2; /* hpe3_coeff = 8. * g_kappa*g_kappa*g_kappa * \ (1. + 6.*_2kappamu*_2kappamu + _2kappamu*_2kappamu*_2kappamu*_2kappamu) / (1. + _2kappamu*_2kappamu) / (1. + _2kappamu*_2kappamu) / (1. + _2kappamu*_2kappamu) / (1. + _2kappamu*_2kappamu); */ fprintf(stdout, "hpe3_coeff = %25.16e\n", hpe3_coeff); /************************************************ * HPE: calculate the plaquette terms ************************************************/ for(ix=0; ix<VOLUME; ix++) { for(mu=0; mu<4; mu++) { for(i=1; i<4; i++) { nu = (mu+i)%4; _cm_eq_cm_ti_cm(U1_, g_gauge_field+_GGI(ix,mu), g_gauge_field+_GGI(g_iup[ix][mu],nu) ); _cm_eq_cm_ti_cm(U2_, g_gauge_field+_GGI(ix,nu), g_gauge_field+_GGI(g_iup[ix][nu],mu) ); _cm_eq_cm_ti_cm_dag(U_, U1_, U2_); _co_eq_tr_cm(&w1, U_); iix = g_idn[ix][nu]; _cm_eq_cm_ti_cm(U1_, g_gauge_field+_GGI(iix,mu), g_gauge_field+_GGI(g_iup[iix][mu],nu) ); _cm_eq_cm_ti_cm(U2_, g_gauge_field+_GGI(iix,nu), g_gauge_field+_GGI(g_iup[iix][nu],mu) ); _cm_eq_cm_ti_cm_dag(U_, U1_, U2_); _co_eq_tr_cm(&w2, U_); disc2[_GWI(mu,ix,VOLUME)+1] += hpe3_coeff * (w1.im - w2.im); /* _cm_eq_cm_ti_cm(U1_, g_gauge_field+_GGI(g_idn[ix][nu],nu), g_gauge_field+_GGI(ix,mu) ); _cm_eq_cm_ti_cm(U2_, g_gauge_field+_GGI(g_idn[ix][nu],mu), g_gauge_field+_GGI(g_iup[g_idn[ix][nu]][mu], nu) ); _cm_eq_cm_ti_cm_dag(U_, U1_, U2_); _co_eq_tr_cm(&w2, U_); disc2[_GWI(mu,ix,VOLUME)+1] += hpe3_coeff * (w1.im + w2.im); */ /* fprintf(stdout, "mu=%1d, ix=%5d, nu=%1d, w1=%25.16e +i %25.16e; w2=%25.16e +i %25.16e\n", mu, ix, nu, w1.re, w1.im, w2.re, w2.im); */ } /* of nu */ /**************************************** * - in case lattice size equals 4 * calculate additional loop term * - _NOTE_ the possible minus sign from * the fermionic boundary conditions ****************************************/ if(dims[mu]==4) { wilson_loop(&w, ix, mu, dims[mu]); fnorm = -64. * g_kappa*g_kappa*g_kappa*g_kappa / onepmutilde2 / onepmutilde2 / onepmutilde2 / onepmutilde2; disc2[_GWI(mu,ix,VOLUME)+1] += fnorm * w.im; /* fprintf(stdout, "loop contribution: ix=%5d, mu=%2d, fnorm=%25.16e, w=%25.16e\n", ix, mu, fnorm, w.im); */ } /* fprintf(stdout, "-------------------------------------------\n"); fprintf(stdout, "disc2[ix=%d,mu=%d] = %25.16e +i %25.16e\n", ix, mu, disc2[_GWI(mu,ix,VOLUME)], disc2[_GWI(mu,ix,VOLUME)+1]); fprintf(stdout, "-------------------------------------------\n"); */ } } /* sprintf(filename, "avc_disc_hpe5_3rd.%.4d", Nconf); ofs = fopen(filename, "w"); for(ix=0; ix<VOLUME; ix++) { for(mu=0; mu<4; mu++) { fprintf(ofs, "%6d%3d%25.16e\t%25.16e\n", ix, mu, disc[_GWI(mu,ix,VOLUME)], disc[_GWI(mu,ix,VOLUME)+1]); } } fclose(ofs); for(ix=0; ix<8*VOLUME; ix++) disc[ix] = 0.; */ /* for(x0=0; x0<T; x0++) { for(x1=0; x1<LX; x1++) { for(x2=0; x2<LY; x2++) { for(x3=0; x3<LZ; x3++) { ix = g_ipt[x0][x1][x2][x3]; for(mu=0; mu<4; mu++) { dxm[0]=0; dxm[1]=0; dxm[2]=0; dxm[3]=0; dxm[mu]=1; for(i=1; i<4; i++) { nu = (mu+i)%4; dxn[0]=0; dxn[1]=0; dxn[2]=0; dxn[3]=0; dxn[nu]=1; ixpm = g_ipt[(x0+dxm[0]+T)%T][(x1+dxm[1]+LX)%LX][(x2+dxm[2]+LY)%LY][(x3+dxm[3]+LZ)%LZ]; ixpn = g_ipt[(x0+dxn[0]+T)%T][(x1+dxn[1]+LX)%LX][(x2+dxn[2]+LY)%LY][(x3+dxn[3]+LZ)%LZ]; _cm_eq_cm_ti_cm(U1_, g_gauge_field + 72*ix+18*mu, g_gauge_field + 72*ixpm+18*nu ); _cm_eq_cm_ti_cm(U2_, g_gauge_field + 72*ix+18*nu, g_gauge_field + 72*ixpn+18*mu ); _cm_eq_cm_ti_cm_dag(U_, U1_, U2_); _co_eq_tr_cm(&w1, U_); ixpm = g_ipt[(x0+dxm[0]-dxn[0]+T)%T][(x1+dxm[1]-dxn[1]+LX)%LX][(x2+dxm[2]-dxn[2]+LY)%LY][(x3+dxm[3]-dxn[3]+LZ)%LZ]; ixpn = g_ipt[(x0-dxn[0]+T)%T][(x1-dxn[1]+LX)%LX][(x2-dxn[2]+LY)%LY][(x3-dxn[3]+LZ)%LZ]; _cm_eq_cm_ti_cm(U1_, g_gauge_field + 72*ixpn+18*nu, g_gauge_field + 72*ix+18*mu); _cm_eq_cm_ti_cm(U2_, g_gauge_field + 72*ixpn+18*mu, g_gauge_field + 72*ixpm+18*nu); _cm_eq_cm_ti_cm_dag(U_, U1_, U2_); _co_eq_tr_cm(&w2, U_); disc2[_GWI(mu,ix,VOLUME)+1] += hpe3_coeff * (w1.im + w2.im); fprintf(stdout, "mu=%1d, ix=%5d, nu=%1d, w1=%25.16e; w2=%25.16e\n", mu, ix, nu, w1.im, w2.im); } fprintf(stdout, "-------------------------------------------\n"); fprintf(stdout, "disc2[ix=%d,mu=%d] = %25.16e +i %25.16e\n", ix, mu, disc2[_GWI(mu,ix,VOLUME)], disc2[_GWI(mu,ix,VOLUME)+1]); fprintf(stdout, "-------------------------------------------\n"); } } } } } */ /*********************************************** * start loop on source id.s ***********************************************/ for(sid=g_sourceid; sid<=g_sourceid2; sid+=g_sourceid_step) { /* read the new propagator */ #ifdef MPI ratime = MPI_Wtime(); #else ratime = (double)clock() / CLOCKS_PER_SEC; #endif if(format==0) { sprintf(filename, "%s.%.4d.%.2d.inverted", filename_prefix, Nconf, sid); if(read_lime_spinor(g_spinor_field[2], filename, 0) != 0) break; } else if(format==1) { sprintf(filename, "%s.%.4d.%.5d.inverted", filename_prefix, Nconf, sid); if(read_cmi(g_spinor_field[2], filename) != 0) break; } xchange_field(g_spinor_field[2]); #ifdef MPI retime = MPI_Wtime(); #else retime = (double)clock() / CLOCKS_PER_SEC; #endif fprintf(stdout, "time to read prop.: %e seconds\n", retime-ratime); if(do_gt==1) { /****************************************** * gauge transform the propagators for sid ******************************************/ for(ix=0; ix<VOLUME; ix++) { _fv_eq_cm_ti_fv(spinor1, gauge_trafo+18*ix, g_spinor_field[2]+_GSI(ix)); _fv_eq_fv(g_spinor_field[2]+_GSI(ix), spinor1); } xchange_field(g_spinor_field[2]); } count++; /************************************************ * calculate the source: apply Q_phi_tbc ************************************************/ #ifdef MPI ratime = MPI_Wtime(); #else ratime = (double)clock() / CLOCKS_PER_SEC; #endif Q_phi_tbc(g_spinor_field[0], g_spinor_field[2]); xchange_field(g_spinor_field[0]); #ifdef MPI retime = MPI_Wtime(); #else retime = (double)clock() / CLOCKS_PER_SEC; #endif if(g_cart_id==0) fprintf(stdout, "time to calculate source: %e seconds\n", retime-ratime); /************************************************ * HPE: apply BH5 ************************************************/ BH5(g_spinor_field[1], g_spinor_field[2]); /* add new contractions to (existing) disc */ # ifdef MPI ratime = MPI_Wtime(); # else ratime = (double)clock() / CLOCKS_PER_SEC; # endif for(mu=0; mu<4; mu++) { /* loop on Lorentz index of the current */ iix = _GWI(mu,0,VOLUME); for(ix=0; ix<VOLUME; ix++) { /* loop on lattice sites */ _cm_eq_cm_ti_co(U_, &g_gauge_field[_GGI(ix, mu)], &co_phase_up[mu]); /* first contribution */ _fv_eq_cm_ti_fv(spinor1, U_, &g_spinor_field[1][_GSI(g_iup[ix][mu])]); _fv_eq_gamma_ti_fv(spinor2, mu, spinor1); _fv_mi_eq_fv(spinor2, spinor1); _co_eq_fv_dag_ti_fv(&w, &g_spinor_field[0][_GSI(ix)], spinor2); disc[iix ] -= 0.5 * w.re; disc[iix+1] -= 0.5 * w.im; /* second contribution */ _fv_eq_cm_dag_ti_fv(spinor1, U_, &g_spinor_field[1][_GSI(ix)]); _fv_eq_gamma_ti_fv(spinor2, mu, spinor1); _fv_pl_eq_fv(spinor2, spinor1); _co_eq_fv_dag_ti_fv(&w, &g_spinor_field[0][_GSI(g_iup[ix][mu])], spinor2); disc[iix ] -= 0.5 * w.re; disc[iix+1] -= 0.5 * w.im; iix += 2; } /* of ix */ } /* of mu */ #ifdef MPI retime = MPI_Wtime(); #else retime = (double)clock() / CLOCKS_PER_SEC; #endif if(g_cart_id==0) fprintf(stdout, "# time to contract cvc: %e seconds\n", retime-ratime); /************************************************ * save results for count = multiple of Nsave ************************************************/ if(count%Nsave == 0) { if(g_cart_id == 0) fprintf(stdout, "save results for count = %d\n", count); fnorm = 1. / ( (double)count * g_prop_normsqr ); if(g_cart_id==0) fprintf(stdout, "# X-fnorm = %e\n", fnorm); for(mu=0; mu<4; mu++) { for(ix=0; ix<VOLUME; ix++) { work[_GWI(mu,ix,VOLUME) ] = disc[_GWI(mu,ix,VOLUME) ] * fnorm + disc2[_GWI(mu,ix,VOLUME) ]; work[_GWI(mu,ix,VOLUME)+1] = disc[_GWI(mu,ix,VOLUME)+1] * fnorm + disc2[_GWI(mu,ix,VOLUME)+1]; } } /* save the result in position space */ sprintf(filename, "cvc_hpe5_X.%.4d.%.4d", Nconf, count); sprintf(contype, "cvc-disc-all-hpe-05-X"); write_lime_contraction(work, filename, 64, 4, contype, Nconf, count); /* sprintf(filename, "cvc_hpe5_Xascii.%.4d.%.4d", Nconf, count); write_contraction(work, NULL, filename, 4, 2, 0); */ #ifdef MPI ratime = MPI_Wtime(); #else ratime = (double)clock() / CLOCKS_PER_SEC; #endif /* Fourier transform data, copy to work */ for(mu=0; mu<4; mu++) { memcpy((void*)in, (void*)(work+_GWI(mu,0,VOLUME)), 2*VOLUME*sizeof(double)); #ifdef MPI fftwnd_mpi(plan_m, 1, in, NULL, FFTW_NORMAL_ORDER); #else fftwnd_one(plan_m, in, NULL); #endif memcpy((void*)(work+_GWI(4+mu,0,VOLUME)), (void*)in, 2*VOLUME*sizeof(double)); memcpy((void*)in, (void*)(work+_GWI(mu,0,VOLUME)), 2*VOLUME*sizeof(double)); #ifdef MPI fftwnd_mpi(plan_p, 1, in, NULL, FFTW_NORMAL_ORDER); #else fftwnd_one(plan_p, in, NULL); #endif memcpy((void*)(work+_GWI(mu,0,VOLUME)), (void*)in, 2*VOLUME*sizeof(double)); } /* of mu =0 ,..., 3*/ fnorm = 1. / (double)(T_global*LX*LY*LZ); if(g_cart_id==0) fprintf(stdout, "# P-fnorm = %e\n", fnorm); for(mu=0; mu<4; mu++) { for(nu=0; nu<4; nu++) { cp1 = (complex*)(work+_GWI(mu,0,VOLUME)); cp2 = (complex*)(work+_GWI(4+nu,0,VOLUME)); cp3 = (complex*)(work+_GWI(8+4*mu+nu,0,VOLUME)); for(x0=0; x0<T; x0++) { q[0] = (double)(x0+Tstart) / (double)T_global; for(x1=0; x1<LX; x1++) { q[1] = (double)(x1) / (double)LX; for(x2=0; x2<LY; x2++) { q[2] = (double)(x2) / (double)LY; for(x3=0; x3<LZ; x3++) { q[3] = (double)(x3) / (double)LZ; ix = g_ipt[x0][x1][x2][x3]; w.re = cos( M_PI * (q[mu]-q[nu]) ); w.im = sin( M_PI * (q[mu]-q[nu]) ); _co_eq_co_ti_co(&w1, cp1, cp2); _co_eq_co_ti_co(cp3, &w1, &w); _co_ti_eq_re(cp3, fnorm); cp1++; cp2++; cp3++; } } } } } } /* save the result in momentum space */ sprintf(filename, "cvc_hpe5_P.%.4d.%.4d", Nconf, count); sprintf(contype, "cvc-disc-all-hpe-05-P"); write_lime_contraction(work+_GWI(8,0,VOLUME), filename, 64, 16, contype, Nconf, count); /* sprintf(filename, "cvc_hpe5_Pascii.%.4d.%.4d", Nconf, count); write_contraction(work+_GWI(8,0,VOLUME), NULL, filename, 16, 2, 0); */ #ifdef MPI retime = MPI_Wtime(); #else retime = (double)clock() / CLOCKS_PER_SEC; #endif if(g_cart_id==0) fprintf(stdout, "# time to save cvc results: %e seconds\n", retime-ratime); } /* of count % Nsave == 0 */ } /* of loop on sid */ /*********************************************** * free the allocated memory, finalize ***********************************************/ free(g_gauge_field); for(i=0; i<no_fields; i++) free(g_spinor_field[i]); free(g_spinor_field); free_geometry(); fftw_free(in); free(disc); free(work); #ifdef MPI fftwnd_mpi_destroy_plan(plan_p); fftwnd_mpi_destroy_plan(plan_m); MPI_Finalize(); #else fftwnd_destroy_plan(plan_p); fftwnd_destroy_plan(plan_m); #endif return(0); }
void test_planner(int rank) { /* * create and destroy many plans, at random. Check the * garbage-collecting allocator of twiddle factors */ int i, dim; int r, s; fftw_plan p[PLANNER_TEST_SIZE]; fftwnd_plan pnd[PLANNER_TEST_SIZE]; int *narr, maxdim; chk_mem_leak = 0; verbose--; please_wait(); if (rank < 1) rank = 1; narr = (int *) fftw_malloc(rank * sizeof(int)); maxdim = (int) pow(8192.0, 1.0/rank); for (i = 0; i < PLANNER_TEST_SIZE; ++i) { p[i] = (fftw_plan) 0; pnd[i] = (fftwnd_plan) 0; } for (i = 0; i < PLANNER_TEST_SIZE * PLANNER_TEST_SIZE; ++i) { r = rand(); if (r < 0) r = -r; r = r % PLANNER_TEST_SIZE; for (dim = 0; dim < rank; ++dim) { do { s = rand(); if (s < 0) s = -s; s = s % maxdim + 1; } while (s == 0); narr[dim] = s; } if (rank == 1) { if (p[r]) fftw_destroy_plan(p[r]); p[r] = fftw_create_plan(narr[0], random_dir(), measure_flag | wisdom_flag); if (paranoid && narr[0] < 200) test_correctness(narr[0]); } if (pnd[r]) fftwnd_destroy_plan(pnd[r]); pnd[r] = fftwnd_create_plan(rank, narr, random_dir(), measure_flag | wisdom_flag); if (i % (PLANNER_TEST_SIZE * PLANNER_TEST_SIZE / 20) == 0) { WHEN_VERBOSE(0, printf("test planner: so far so good\n")); WHEN_VERBOSE(0, printf("test planner: iteration %d out of %d\n", i, PLANNER_TEST_SIZE * PLANNER_TEST_SIZE)); } } for (i = 0; i < PLANNER_TEST_SIZE; ++i) { if (p[i]) fftw_destroy_plan(p[i]); if (pnd[i]) fftwnd_destroy_plan(pnd[i]); } fftw_free(narr); verbose++; chk_mem_leak = 1; }
void testnd_in_place(int rank, int *n, fftw_direction dir, fftwnd_plan validated_plan, int alternate_api, int specific, int force_buffered) { int istride; int N, dim, i; fftw_complex *in1, *in2, *out2; fftwnd_plan p; int flags = measure_flag | wisdom_flag | FFTW_IN_PLACE; if (coinflip()) flags |= FFTW_THREADSAFE; if (force_buffered) flags |= FFTWND_FORCE_BUFFERED; N = 1; for (dim = 0; dim < rank; ++dim) N *= n[dim]; in1 = (fftw_complex *) fftw_malloc(N * MAX_STRIDE * sizeof(fftw_complex)); in2 = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex)); out2 = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex)); if (!specific) { if (alternate_api && (rank == 2 || rank == 3)) { if (rank == 2) p = fftw2d_create_plan(n[0], n[1], dir, flags); else p = fftw3d_create_plan(n[0], n[1], n[2], dir, flags); } else /* standard api */ p = fftwnd_create_plan(rank, n, dir, flags); } else { /* specific plan creation */ if (alternate_api && (rank == 2 || rank == 3)) { if (rank == 2) p = fftw2d_create_plan_specific(n[0], n[1], dir, flags, in1, 1, (fftw_complex *) NULL, 1); else p = fftw3d_create_plan_specific(n[0], n[1], n[2], dir, flags, in1, 1, (fftw_complex *) NULL, 1); } else /* standard api */ p = fftwnd_create_plan_specific(rank, n, dir, flags, in1, 1, (fftw_complex *) NULL, 1); } for (istride = 1; istride <= MAX_STRIDE; ++istride) { /* * generate random inputs */ for (i = 0; i < N; ++i) { int j; c_re(in2[i]) = DRAND(); c_im(in2[i]) = DRAND(); for (j = 0; j < istride; ++j) { c_re(in1[i * istride + j]) = c_re(in2[i]); c_im(in1[i * istride + j]) = c_im(in2[i]); } } if (istride != 1 || istride != 1 || coinflip()) fftwnd(p, istride, in1, istride, 1, (fftw_complex *) NULL, 1, 1); else fftwnd_one(p, in1, NULL); fftwnd(validated_plan, 1, in2, 1, 1, out2, 1, 1); for (i = 0; i < istride; ++i) CHECK(compute_error_complex(in1 + i, istride, out2, 1, N) < TOLERANCE, "testnd_in_place: wrong answer"); } fftwnd_destroy_plan(p); fftw_free(out2); fftw_free(in2); fftw_free(in1); }
void rfftwnd_destroy_plan(fftwnd_plan plan) { fftwnd_destroy_plan(plan); }
//------------------------------------------------------------------------------------ int fdct3d_inverse_angles(int N1,int N2,int N3,int b, double L1,double L2,double L3, int s,int nd, CpxCrvletPrtd& C, CpxNumTnsBlkd& W) { int mpirank; MPI_Comm_rank(MPI_COMM_WORLD, &mpirank); int mpisize; MPI_Comm_size(MPI_COMM_WORLD, &mpisize); vector< vector<int> >& Cowners = C.owners(); vector<int>& crvowners = Cowners[s]; //LEXING: the owner information for wedges in scale s int nf = 6; int wcnt = 0; int S1, S2, S3; int F1, F2, F3; double R1, R2, R3; fdct3d_rangecompute(L1, L2, L3, S1, S2, S3, F1, F2, F3, R1, R2, R3); DblOffVec big1(S1); fdct3d_lowpass(L1, big1); DblOffVec big2(S2); fdct3d_lowpass(L2, big2); DblOffVec big3(S3); fdct3d_lowpass(L3, big3); double Lh1 = L1/2; double Lh2 = L2/2; double Lh3 = L3/2; int Sh1, Sh2, Sh3; int Fh1, Fh2, Fh3; double Rh1, Rh2, Rh3; fdct3d_rangecompute(Lh1, Lh2, Lh3, Sh1, Sh2, Sh3, Fh1, Fh2, Fh3, Rh1, Rh2, Rh3); DblOffVec sma1(S1); fdct3d_lowpass(Lh1, sma1); DblOffVec sma2(S2); fdct3d_lowpass(Lh2, sma2); DblOffVec sma3(S3); fdct3d_lowpass(Lh3, sma3); double W1 = L1/nd; double W2 = L2/nd; double W3 = L3/nd; typedef pair<int,int> intpair; typedef pair<int, intpair> inttriple; map<inttriple, fftwnd_plan> planmap; //face 0: x,y,z for(int h=0; h<nd; h++) { //(y first z second) for(int g=0; g<nd; g++) { if(crvowners[wcnt]==mpirank) { double xs = R1/4-(W1/2)/4; double xe = R1; double ys = -R2 + (2*g-1)*W2/2; double ye = -R2 + (2*g+3)*W2/2; double zs = -R3 + (2*h-1)*W3/2; double ze = -R3 + (2*h+3)*W3/2; int xn = int(ceil(xe-xs)); int yn = int(ceil(ye-ys)); int zn = int(ceil(ze-zs)); double thts, thtm, thte; //y to x if(g==0) { thts = atan2(-1.0, 1.0-1.0/nd); thtm = atan2(-1.0+1.0/nd, 1.0); thte = atan2(-1.0+3.0/nd, 1.0); } else if(g==nd-1) { thts = atan2(-1.0+(2.0*g-1.0)/nd, 1.0); thtm = atan2(-1.0+(2.0*g+1.0)/nd, 1.0); thte = atan2(1.0, 1.0-1.0/nd); } else { thts = atan2(-1.0+(2.0*g-1.0)/nd, 1.0); thtm = atan2(-1.0+(2.0*g+1.0)/nd, 1.0); thte = atan2(-1.0+(2.0*g+3.0)/nd, 1.0); } double phis, phim, phie; //z to x if(h==0) { phis = atan2(-1.0, 1.0-1.0/nd); phim = atan2(-1.0+1.0/nd, 1.0); phie = atan2(-1.0+3.0/nd, 1.0); } else if(h==nd-1) { phis = atan2(-1.0+(2.0*h-1.0)/nd, 1.0); phim = atan2(-1.0+(2.0*h+1.0)/nd, 1.0); phie = atan2(1.0, 1.0-1.0/nd); } else { phis = atan2(-1.0+(2.0*h-1.0)/nd, 1.0); phim = atan2(-1.0+(2.0*h+1.0)/nd, 1.0); phie = atan2(-1.0+(2.0*h+3.0)/nd, 1.0); } int xh = xn/2; int yh = yn/2; int zh = zn/2; //half double R21 = R2/R1; double R31 = R3/R1; CpxNumTns tpdata(xn,yn,zn); CpxNumTns& Cblk = C.block(s,wcnt); tpdata = Cblk; //fft fftwnd_plan p = NULL; map<inttriple, fftwnd_plan>::iterator mit = planmap.find( inttriple(xn, intpair(yn,zn)) ); if(mit!=planmap.end()) { p = (*mit).second; } else { p = fftw3d_create_plan(zn, yn, xn, FFTW_FORWARD, FFTW_ESTIMATE | FFTW_IN_PLACE); planmap[ inttriple(xn, intpair(yn,zn)) ] = p; } fftwnd_one(p, (fftw_complex*)tpdata.data(), NULL); //cerr<<"wedge s"<<endl; double sqrtprod = sqrt(double(xn*yn*zn)); for(int i=0; i<xn; i++) for(int j=0; j<yn; j++) for(int k=0; k<zn; k++) tpdata(i,j,k) /= sqrtprod; CpxOffTns wpdata(xn,yn,zn); fdct3d_fftshift(xn,yn,zn,tpdata,wpdata); for(int xcur=(int)ceil(xs); xcur<xe; xcur++) { int yfm = (int)ceil( max(-R2, R21*xcur*tan(thts)) ); int yto = (int)floor( min(R2, R21*xcur*tan(thte)) ); int zfm = (int)ceil( max(-R3, R31*xcur*tan(phis)) ); int zto = (int)floor( min(R3, R31*xcur*tan(phie)) ); for(int ycur=yfm; ycur<=yto; ycur++) for(int zcur=zfm; zcur<=zto; zcur++) { int tmpx = xcur%xn; if(tmpx<-xh) tmpx+=xn; if(tmpx>=-xh+xn) tmpx-=xn; int tmpy = ycur%yn; if(tmpy<-yh) tmpy+=yn; if(tmpy>=-yh+yn) tmpy-=yn; int tmpz = zcur%zn; if(tmpz<-zh) tmpz+=zn; if(tmpz>=-zh+zn) tmpz-=zn; double thtcur = atan2(ycur/R2, xcur/R1); double phicur = atan2(zcur/R3, xcur/R1); double glbpou; fdct3d_globalpou(thtcur, phicur, M_PI/4-atan2(1.0-1.0/nd, 1.0), glbpou); double wtht; if(thtcur<thtm) { if(g==0) wtht = 1; else { double l,r; fdct3d_window( (thtcur-thts)/(thtm-thts), l, r); wtht = l; } } else { if(g==nd-1) wtht = 1; else { double l,r; fdct3d_window( (thtcur-thtm)/(thte-thtm), l, r); wtht = r; } } double wphi; if(phicur<phim) { if(h==0) wphi = 1; else { double l,r; fdct3d_window( (phicur-phis)/(phim-phis), l, r); wphi = l; } } else { if(h==nd-1) wphi = 1; else { double l,r; fdct3d_window( (phicur-phim)/(phie-phim), l, r); wphi = r; } } double pou = glbpou * wtht * wphi; wpdata(tmpx, tmpy, tmpz) *= pou; double ss = sma1(xcur)*sma2(ycur)*sma3(zcur); double bb = big1(xcur)*big2(ycur)*big3(zcur); int bi,bj,bk; int oi,oj,ok; fdct3d_position_aux(N1,N2,N3,b, xcur,ycur,zcur, bi,bj,bk,oi,oj,ok); CpxNumTns& Wblk = W.block(bi,bj,bk); Wblk(oi,oj,ok) += wpdata(tmpx,tmpy,tmpz) * bb * sqrt(1.0-ss*ss); } } //xcur } //if wcnt++; } } //end of face //face 1. y z x for(int f=0; f<nd; f++) { for(int h=0; h<nd; h++) { if(crvowners[wcnt]==mpirank) { double ys = R2/4-(W2/2)/4; double ye = R2; double zs = -R3 + (2*h-1)*W3/2; double ze = -R3 + (2*h+3)*W3/2; double xs = -R1 + (2*f-1)*W1/2; double xe = -R1 + (2*f+3)*W1/2; int xn = int(ceil(xe-xs)); int yn = int(ceil(ye-ys)); int zn = int(ceil(ze-zs)); double thts, thtm, thte; //z to y if(h==0) { thts = atan2(-1.0, 1.0-1.0/nd); thtm = atan2(-1.0+1.0/nd, 1.0); thte = atan2(-1.0+3.0/nd, 1.0); } else if(h==nd-1) { thts = atan2(-1.0+(2.0*h-1.0)/nd, 1.0); thtm = atan2(-1.0+(2.0*h+1.0)/nd, 1.0); thte = atan2(1.0, 1.0-1.0/nd); } else { thts = atan2(-1.0+(2.0*h-1.0)/nd, 1.0); thtm = atan2(-1.0+(2.0*h+1.0)/nd, 1.0); thte = atan2(-1.0+(2.0*h+3.0)/nd, 1.0); } double phis, phim, phie; //z to x if(f==0) { phis = atan2(-1.0, 1.0-1.0/nd); phim = atan2(-1.0+1.0/nd, 1.0); phie = atan2(-1.0+3.0/nd, 1.0); } else if(f==nd-1) { phis = atan2(-1.0+(2.0*f-1.0)/nd, 1.0); phim = atan2(-1.0+(2.0*f+1.0)/nd, 1.0); phie = atan2(1.0, 1.0-1.0/nd); } else { phis = atan2(-1.0+(2.0*f-1.0)/nd, 1.0); phim = atan2(-1.0+(2.0*f+1.0)/nd, 1.0); phie = atan2(-1.0+(2.0*f+3.0)/nd, 1.0); } int xh = xn/2; int yh = yn/2; int zh = zn/2; double R32 = R3/R2; double R12 = R1/R2; CpxNumTns tpdata(xn,yn,zn); CpxNumTns& Cblk = C.block(s,wcnt); tpdata = Cblk; //fft fftwnd_plan p = NULL; map<inttriple, fftwnd_plan>::iterator mit = planmap.find( inttriple(xn, intpair(yn,zn)) ); if(mit!=planmap.end()) { p = (*mit).second; } else { p = fftw3d_create_plan(zn, yn, xn, FFTW_FORWARD, FFTW_ESTIMATE | FFTW_IN_PLACE); planmap[ inttriple(xn, intpair(yn,zn)) ] = p; } fftwnd_one(p, (fftw_complex*)tpdata.data(), NULL); //cerr<<"wedge s"<<endl; double sqrtprod = sqrt(double(xn*yn*zn)); for(int i=0; i<xn; i++) for(int j=0; j<yn; j++) for(int k=0; k<zn; k++) tpdata(i,j,k) /= sqrtprod; CpxOffTns wpdata(xn,yn,zn); fdct3d_fftshift(xn,yn,zn,tpdata,wpdata); for(int ycur=(int)ceil(ys); ycur<ye; ycur++) { int zfm = (int)ceil( max(-R3, R32*ycur*tan(thts)) ); int zto = (int)floor( min(R3, R32*ycur*tan(thte)) ); int xfm = (int)ceil( max(-R1, R12*ycur*tan(phis)) ); int xto = (int)floor( min(R1, R12*ycur*tan(phie)) ); for(int zcur=zfm; zcur<=zto; zcur++) for(int xcur=xfm; xcur<=xto; xcur++) { int tmpx = xcur%xn; if(tmpx<-xh) tmpx+=xn; if(tmpx>=-xh+xn) tmpx-=xn; int tmpy = ycur%yn; if(tmpy<-yh) tmpy+=yn; if(tmpy>=-yh+yn) tmpy-=yn; int tmpz = zcur%zn; if(tmpz<-zh) tmpz+=zn; if(tmpz>=-zh+zn) tmpz-=zn; double thtcur = atan2(zcur/R3, ycur/R2); double phicur = atan2(xcur/R1, ycur/R2); double glbpou; fdct3d_globalpou(thtcur, phicur, M_PI/4-atan2(1.0-1.0/nd, 1.0), glbpou); //CHECK double wtht; if(thtcur<thtm) { if(h==0) wtht = 1; else { double l,r; fdct3d_window( (thtcur-thts)/(thtm-thts), l, r); wtht = l; } } else { if(h==nd-1) wtht = 1; else { double l,r; fdct3d_window( (thtcur-thtm)/(thte-thtm), l, r); wtht = r; } } double wphi; if(phicur<phim) { if(f==0) wphi = 1; else { double l,r; fdct3d_window( (phicur-phis)/(phim-phis), l, r); wphi = l; } } else { if(f==nd-1) wphi = 1; else { double l,r; fdct3d_window( (phicur-phim)/(phie-phim), l, r); wphi = r; } } double pou = glbpou * wtht * wphi; wpdata(tmpx, tmpy, tmpz) *= pou; double ss = sma1(xcur)*sma2(ycur)*sma3(zcur); double bb = big1(xcur)*big2(ycur)*big3(zcur); int bi,bj,bk; int oi,oj,ok; fdct3d_position_aux(N1,N2,N3,b, xcur,ycur,zcur, bi,bj,bk,oi,oj,ok); CpxNumTns& Wblk = W.block(bi,bj,bk); Wblk(oi,oj,ok) += wpdata(tmpx,tmpy,tmpz) * bb * sqrt(1.0-ss*ss); } } //ycur }//if wcnt++; } }//end of face //face 2. z x y for(int g=0; g<nd; g++) { for(int f=0; f<nd; f++) { if(crvowners[wcnt]==mpirank) { double zs = R3/4-(W3/2)/4; double ze = R3; double xs = -R1 + (2*f-1)*W1/2; double xe = -R1 + (2*f+3)*W1/2; double ys = -R2 + (2*g-1)*W2/2; double ye = -R2 + (2*g+3)*W2/2; int xn = int(ceil(xe-xs)); int yn = int(ceil(ye-ys)); int zn = int(ceil(ze-zs)); double thts, thtm, thte; //y to x if(f==0) { thts = atan2(-1.0, 1.0-1.0/nd); thtm = atan2(-1.0+1.0/nd, 1.0); thte = atan2(-1.0+3.0/nd, 1.0); } else if(f==nd-1) { thts = atan2(-1.0+(2.0*f-1.0)/nd, 1.0); thtm = atan2(-1.0+(2.0*f+1.0)/nd, 1.0); thte = atan2(1.0, 1.0-1.0/nd); } else { thts = atan2(-1.0+(2.0*f-1.0)/nd, 1.0); thtm = atan2(-1.0+(2.0*f+1.0)/nd, 1.0); thte = atan2(-1.0+(2.0*f+3.0)/nd, 1.0); } double phis, phim, phie; //z to x if(g==0) { phis = atan2(-1.0, 1.0-1.0/nd); phim = atan2(-1.0+1.0/nd, 1.0); phie = atan2(-1.0+3.0/nd, 1.0); } else if(g==nd-1) { phis = atan2(-1.0+(2.0*g-1.0)/nd, 1.0); phim = atan2(-1.0+(2.0*g+1.0)/nd, 1.0); phie = atan2(1.0, 1.0-1.0/nd); } else { phis = atan2(-1.0+(2.0*g-1.0)/nd, 1.0); phim = atan2(-1.0+(2.0*g+1.0)/nd, 1.0); phie = atan2(-1.0+(2.0*g+3.0)/nd, 1.0); } int xh = xn/2; int yh = yn/2; int zh = zn/2; double R13 = double(F1)/double(F3); double R23 = double(F2)/double(F3); CpxNumTns tpdata(xn,yn,zn); CpxNumTns& Cblk = C.block(s,wcnt); tpdata = Cblk; //fft fftwnd_plan p = NULL; map<inttriple, fftwnd_plan>::iterator mit = planmap.find( inttriple(xn, intpair(yn,zn)) ); if(mit!=planmap.end()) { p = (*mit).second; } else { p = fftw3d_create_plan(zn, yn, xn, FFTW_FORWARD, FFTW_ESTIMATE | FFTW_IN_PLACE); planmap[ inttriple(xn, intpair(yn,zn)) ] = p; } fftwnd_one(p, (fftw_complex*)tpdata.data(), NULL); //cerr<<"wedge s"<<endl; double sqrtprod = sqrt(double(xn*yn*zn)); for(int i=0; i<xn; i++) for(int j=0; j<yn; j++) for(int k=0; k<zn; k++) tpdata(i,j,k) /= sqrtprod; CpxOffTns wpdata(xn,yn,zn); fdct3d_fftshift(xn,yn,zn,tpdata,wpdata); for(int zcur=(int)ceil(zs); zcur<ze; zcur++) { int xfm = (int)ceil( max(-R1, R13*zcur*tan(thts)) ); int xto = (int)floor( min(R1, R13*zcur*tan(thte)) ); int yfm = (int)ceil( max(-R2, R23*zcur*tan(phis)) ); int yto = (int)floor( min(R2, R23*zcur*tan(phie)) ); for(int xcur=xfm; xcur<=xto; xcur++) for(int ycur=yfm; ycur<=yto; ycur++) { int tmpx = xcur%xn; if(tmpx<-xh) tmpx+=xn; if(tmpx>=-xh+xn) tmpx-=xn; int tmpy = ycur%yn; if(tmpy<-yh) tmpy+=yn; if(tmpy>=-yh+yn) tmpy-=yn; int tmpz = zcur%zn; if(tmpz<-zh) tmpz+=zn; if(tmpz>=-zh+zn) tmpz-=zn; double thtcur = atan2(xcur/R1, zcur/R3); double phicur = atan2(ycur/R2, zcur/R3); double glbpou; fdct3d_globalpou(thtcur, phicur, M_PI/4-atan2(1.0-1.0/nd, 1.0), glbpou); double wtht; if(thtcur<thtm) { if(f==0) wtht = 1; else { double l,r; fdct3d_window( (thtcur-thts)/(thtm-thts), l, r); wtht = l; } } else { if(f==nd-1) wtht = 1; else { double l,r; fdct3d_window( (thtcur-thtm)/(thte-thtm), l, r); wtht = r; } } double wphi; if(phicur<phim) { if(g==0) wphi = 1; else { double l,r; fdct3d_window( (phicur-phis)/(phim-phis), l, r); wphi = l; } } else { if(g==nd-1) wphi = 1; else { double l,r; fdct3d_window( (phicur-phim)/(phie-phim), l, r); wphi = r; } } double pou = glbpou * wtht * wphi; wpdata(tmpx, tmpy, tmpz) *= pou; double ss = sma1(xcur)*sma2(ycur)*sma3(zcur); double bb = big1(xcur)*big2(ycur)*big3(zcur); int bi,bj,bk; int oi,oj,ok; fdct3d_position_aux(N1,N2,N3,b, xcur,ycur,zcur, bi,bj,bk,oi,oj,ok); CpxNumTns& Wblk = W.block(bi,bj,bk); Wblk(oi,oj,ok) += wpdata(tmpx,tmpy,tmpz) * bb * sqrt(1.0-ss*ss); } }//zcur }//if wcnt++; } }//end of face //face 3: -x,-y,-z for(int h=nd-1; h>=0; h--) { for(int g=nd-1; g>=0; g--) { if(crvowners[wcnt]==mpirank) { double xs = -R1; double xe = -R1/4+(W1/2)/4; double ys = -R2 + (2*g-1)*W2/2; double ye = -R2 + (2*g+3)*W2/2; double zs = -R3 + (2*h-1)*W3/2; double ze = -R3 + (2*h+3)*W3/2; int xn = int(ceil(xe-xs)); int yn = int(ceil(ye-ys)); int zn = int(ceil(ze-zs)); double thts, thtm, thte; //y to x if(g==0) { thts = atan2(-1.0, 1.0-1.0/nd); thtm = atan2(-1.0+1.0/nd, 1.0); thte = atan2(-1.0+3.0/nd, 1.0); } else if(g==nd-1) { thts = atan2(-1.0+(2.0*g-1.0)/nd, 1.0); thtm = atan2(-1.0+(2.0*g+1.0)/nd, 1.0); thte = atan2(1.0, 1.0-1.0/nd); } else { thts = atan2(-1.0+(2.0*g-1.0)/nd, 1.0); thtm = atan2(-1.0+(2.0*g+1.0)/nd, 1.0); thte = atan2(-1.0+(2.0*g+3.0)/nd, 1.0); } double phis, phim, phie; //z to x if(h==0) { phis = atan2(-1.0, 1.0-1.0/nd); phim = atan2(-1.0+1.0/nd, 1.0); phie = atan2(-1.0+3.0/nd, 1.0); } else if(h==nd-1) { phis = atan2(-1.0+(2.0*h-1.0)/nd, 1.0); phim = atan2(-1.0+(2.0*h+1.0)/nd, 1.0); phie = atan2(1.0, 1.0-1.0/nd); } else { phis = atan2(-1.0+(2.0*h-1.0)/nd, 1.0); phim = atan2(-1.0+(2.0*h+1.0)/nd, 1.0); phie = atan2(-1.0+(2.0*h+3.0)/nd, 1.0); } int xh = xn/2; int yh = yn/2; int zh = zn/2; double R21 = R2/R1; double R31 = R3/R1; CpxNumTns tpdata(xn,yn,zn); CpxNumTns& Cblk = C.block(s,wcnt); tpdata = Cblk; //fft fftwnd_plan p = NULL; map<inttriple, fftwnd_plan>::iterator mit = planmap.find( inttriple(xn, intpair(yn,zn)) ); if(mit!=planmap.end()) { p = (*mit).second; } else { p = fftw3d_create_plan(zn, yn, xn, FFTW_FORWARD, FFTW_ESTIMATE | FFTW_IN_PLACE); planmap[ inttriple(xn, intpair(yn,zn)) ] = p; } fftwnd_one(p, (fftw_complex*)tpdata.data(), NULL); //cerr<<"wedge s"<<endl; double sqrtprod = sqrt(double(xn*yn*zn)); for(int i=0; i<xn; i++) for(int j=0; j<yn; j++) for(int k=0; k<zn; k++) tpdata(i,j,k) /= sqrtprod; CpxOffTns wpdata(xn,yn,zn); fdct3d_fftshift(xn,yn,zn,tpdata,wpdata); for(int xcur=(int)ceil(xs); xcur<xe; xcur++) { int yfm = (int)ceil( max(-R2, R21*(-xcur)*tan(thts)) ); int yto = (int)floor( min(R2, R21*(-xcur)*tan(thte)) ); int zfm = (int)ceil( max(-R3, R31*(-xcur)*tan(phis)) ); int zto = (int)floor( min(R3, R31*(-xcur)*tan(phie)) ); for(int ycur=yfm; ycur<=yto; ycur++) for(int zcur=zfm; zcur<=zto; zcur++) { int tmpx = xcur%xn; if(tmpx<-xh) tmpx+=xn; if(tmpx>=-xh+xn) tmpx-=xn; int tmpy = ycur%yn; if(tmpy<-yh) tmpy+=yn; if(tmpy>=-yh+yn) tmpy-=yn; int tmpz = zcur%zn; if(tmpz<-zh) tmpz+=zn; if(tmpz>=-zh+zn) tmpz-=zn; double thtcur = atan2(ycur/R2, (-xcur)/R1); double phicur = atan2(zcur/R3, (-xcur)/R1); double glbpou; fdct3d_globalpou(thtcur, phicur, M_PI/4-atan2(1.0-1.0/nd, 1.0), glbpou); double wtht; if(thtcur<thtm) { if(g==0) wtht = 1; else { double l,r; fdct3d_window( (thtcur-thts)/(thtm-thts), l, r); wtht = l; } } else { if(g==nd-1) wtht = 1; else { double l,r; fdct3d_window( (thtcur-thtm)/(thte-thtm), l, r); wtht = r; } } double wphi; if(phicur<phim) { if(h==0) wphi = 1; else { double l,r; fdct3d_window( (phicur-phis)/(phim-phis), l, r); wphi = l; } } else { if(h==nd-1) wphi = 1; else { double l,r; fdct3d_window( (phicur-phim)/(phie-phim), l, r); wphi = r; } } double pou = glbpou * wtht * wphi; wpdata(tmpx, tmpy, tmpz) *= pou; double ss = sma1(xcur)*sma2(ycur)*sma3(zcur); double bb = big1(xcur)*big2(ycur)*big3(zcur); int bi,bj,bk; int oi,oj,ok; fdct3d_position_aux(N1,N2,N3,b, xcur,ycur,zcur, bi,bj,bk,oi,oj,ok); CpxNumTns& Wblk = W.block(bi,bj,bk); Wblk(oi,oj,ok) += wpdata(tmpx,tmpy,tmpz) * bb * sqrt(1.0-ss*ss); } } //xcur } //if wcnt++; } } //end of face //face 4: -y,-z,-x for(int f=nd-1; f>=0; f--) { for(int h=nd-1; h>=0; h--) { if(crvowners[wcnt]==mpirank) { double ys = -R2; double ye = -R2/4+(W2/2)/4; double zs = -R3 + (2*h-1)*W3/2; double ze = -R3 + (2*h+3)*W3/2; double xs = -R1 + (2*f-1)*W1/2; double xe = -R1 + (2*f+3)*W1/2; int xn = int(ceil(xe-xs)); int yn = int(ceil(ye-ys)); int zn = int(ceil(ze-zs)); double thts, thtm, thte; //z to y if(h==0) { thts = atan2(-1.0, 1.0-1.0/nd); thtm = atan2(-1.0+1.0/nd, 1.0); thte = atan2(-1.0+3.0/nd, 1.0); } else if(h==nd-1) { thts = atan2(-1.0+(2.0*h-1.0)/nd, 1.0); thtm = atan2(-1.0+(2.0*h+1.0)/nd, 1.0); thte = atan2(1.0, 1.0-1.0/nd); } else { thts = atan2(-1.0+(2.0*h-1.0)/nd, 1.0); thtm = atan2(-1.0+(2.0*h+1.0)/nd, 1.0); thte = atan2(-1.0+(2.0*h+3.0)/nd, 1.0); } double phis, phim, phie; //z to x if(f==0) { phis = atan2(-1.0, 1.0-1.0/nd); phim = atan2(-1.0+1.0/nd, 1.0); phie = atan2(-1.0+3.0/nd, 1.0); } else if(f==nd-1) { phis = atan2(-1.0+(2.0*f-1.0)/nd, 1.0); phim = atan2(-1.0+(2.0*f+1.0)/nd, 1.0); phie = atan2(1.0, 1.0-1.0/nd); } else { phis = atan2(-1.0+(2.0*f-1.0)/nd, 1.0); phim = atan2(-1.0+(2.0*f+1.0)/nd, 1.0); phie = atan2(-1.0+(2.0*f+3.0)/nd, 1.0); } int xh = xn/2; int yh = yn/2; int zh = zn/2; double R32 = double(F3)/double(F2); double R12 = double(F1)/double(F2); CpxNumTns tpdata(xn,yn,zn); CpxNumTns& Cblk = C.block(s,wcnt); tpdata = Cblk; //fft fftwnd_plan p = NULL; map<inttriple, fftwnd_plan>::iterator mit = planmap.find( inttriple(xn, intpair(yn,zn)) ); if(mit!=planmap.end()) { p = (*mit).second; } else { p = fftw3d_create_plan(zn, yn, xn, FFTW_FORWARD, FFTW_ESTIMATE | FFTW_IN_PLACE); planmap[ inttriple(xn, intpair(yn,zn)) ] = p; } fftwnd_one(p, (fftw_complex*)tpdata.data(), NULL); //cerr<<"wedge s"<<endl; double sqrtprod = sqrt(double(xn*yn*zn)); for(int i=0; i<xn; i++) for(int j=0; j<yn; j++) for(int k=0; k<zn; k++) tpdata(i,j,k) /= sqrtprod; CpxOffTns wpdata(xn,yn,zn); fdct3d_fftshift(xn,yn,zn,tpdata,wpdata); for(int ycur=(int)ceil(ys); ycur<ye; ycur++) { int zfm = (int)ceil( max(-R3, R32*(-ycur)*tan(thts)) ); int zto = (int)floor( min(R3, R32*(-ycur)*tan(thte)) ); int xfm = (int)ceil( max(-R1, R12*(-ycur)*tan(phis)) ); int xto = (int)floor( min(R1, R12*(-ycur)*tan(phie)) ); for(int zcur=zfm; zcur<=zto; zcur++) for(int xcur=xfm; xcur<=xto; xcur++) { int tmpx = xcur%xn; if(tmpx<-xh) tmpx+=xn; if(tmpx>=-xh+xn) tmpx-=xn; int tmpy = ycur%yn; if(tmpy<-yh) tmpy+=yn; if(tmpy>=-yh+yn) tmpy-=yn; int tmpz = zcur%zn; if(tmpz<-zh) tmpz+=zn; if(tmpz>=-zh+zn) tmpz-=zn; double thtcur = atan2(zcur/R3, (-ycur)/R2); double phicur = atan2(xcur/R1, (-ycur)/R2); double glbpou; fdct3d_globalpou(thtcur, phicur, M_PI/4-atan2(1.0-1.0/nd, 1.0), glbpou); //CHECK double wtht; if(thtcur<thtm) { if(h==0) wtht = 1; else { double l,r; fdct3d_window( (thtcur-thts)/(thtm-thts), l, r); wtht = l; } } else { if(h==nd-1) wtht = 1; else { double l,r; fdct3d_window( (thtcur-thtm)/(thte-thtm), l, r); wtht = r; } } double wphi; if(phicur<phim) { if(f==0) wphi = 1; else { double l,r; fdct3d_window( (phicur-phis)/(phim-phis), l, r); wphi = l; } } else { if(f==nd-1) wphi = 1; else { double l,r; fdct3d_window( (phicur-phim)/(phie-phim), l, r); wphi = r; } } double pou = glbpou * wtht * wphi; wpdata(tmpx, tmpy, tmpz) *= pou; double ss = sma1(xcur)*sma2(ycur)*sma3(zcur); double bb = big1(xcur)*big2(ycur)*big3(zcur); int bi,bj,bk; int oi,oj,ok; fdct3d_position_aux(N1,N2,N3,b, xcur,ycur,zcur, bi,bj,bk,oi,oj,ok); CpxNumTns& Wblk = W.block(bi,bj,bk); Wblk(oi,oj,ok) += wpdata(tmpx,tmpy,tmpz) * bb * sqrt(1.0-ss*ss); } } //ycur }//if wcnt++; } }//end of face //face 5.-z,-x,-y for(int g=nd-1; g>=0; g--) { for(int f=nd-1; f>=0; f--) { if(crvowners[wcnt]==mpirank) { double zs = -R3; double ze = -R3/4+(W3/2)/4; double xs = -R1 + (2*f-1)*W1/2; double xe = -R1 + (2*f+3)*W1/2; double ys = -R2 + (2*g-1)*W2/2; double ye = -R2 + (2*g+3)*W2/2; int xn = int(ceil(xe-xs)); int yn = int(ceil(ye-ys)); int zn = int(ceil(ze-zs)); double thts, thtm, thte; //y to x if(f==0) { thts = atan2(-1.0, 1.0-1.0/nd); thtm = atan2(-1.0+1.0/nd, 1.0); thte = atan2(-1.0+3.0/nd, 1.0); } else if(f==nd-1) { thts = atan2(-1.0+(2.0*f-1.0)/nd, 1.0); thtm = atan2(-1.0+(2.0*f+1.0)/nd, 1.0); thte = atan2(1.0, 1.0-1.0/nd); } else { thts = atan2(-1.0+(2.0*f-1.0)/nd, 1.0); thtm = atan2(-1.0+(2.0*f+1.0)/nd, 1.0); thte = atan2(-1.0+(2.0*f+3.0)/nd, 1.0); } double phis, phim, phie; //z to x if(g==0) { phis = atan2(-1.0, 1.0-1.0/nd); phim = atan2(-1.0+1.0/nd, 1.0); phie = atan2(-1.0+3.0/nd, 1.0); } else if(g==nd-1) { phis = atan2(-1.0+(2.0*g-1.0)/nd, 1.0); phim = atan2(-1.0+(2.0*g+1.0)/nd, 1.0); phie = atan2(1.0, 1.0-1.0/nd); } else { phis = atan2(-1.0+(2.0*g-1.0)/nd, 1.0); phim = atan2(-1.0+(2.0*g+1.0)/nd, 1.0); phie = atan2(-1.0+(2.0*g+3.0)/nd, 1.0); } int xh = xn/2; int yh = yn/2; int zh = zn/2; double R13 = double(F1)/double(F3); double R23 = double(F2)/double(F3); CpxNumTns tpdata(xn,yn,zn); CpxNumTns& Cblk = C.block(s,wcnt); tpdata = Cblk; //fft fftwnd_plan p = NULL; map<inttriple, fftwnd_plan>::iterator mit = planmap.find( inttriple(xn, intpair(yn,zn)) ); if(mit!=planmap.end()) { p = (*mit).second; } else { p = fftw3d_create_plan(zn, yn, xn, FFTW_FORWARD, FFTW_ESTIMATE | FFTW_IN_PLACE); planmap[ inttriple(xn, intpair(yn,zn)) ] = p; } fftwnd_one(p, (fftw_complex*)tpdata.data(), NULL); //cerr<<"wedge s"<<endl; double sqrtprod = sqrt(double(xn*yn*zn)); for(int i=0; i<xn; i++) for(int j=0; j<yn; j++) for(int k=0; k<zn; k++) tpdata(i,j,k) /= sqrtprod; CpxOffTns wpdata(xn,yn,zn); fdct3d_fftshift(xn,yn,zn,tpdata,wpdata); for(int zcur=(int)ceil(zs); zcur<ze; zcur++) { int xfm = (int)ceil( max(-R1, R13*(-zcur)*tan(thts)) ); int xto = (int)floor( min(R1, R13*(-zcur)*tan(thte)) ); int yfm = (int)ceil( max(-R2, R23*(-zcur)*tan(phis)) ); int yto = (int)floor( min(R2, R23*(-zcur)*tan(phie)) ); for(int xcur=xfm; xcur<=xto; xcur++) for(int ycur=yfm; ycur<=yto; ycur++) { int tmpx = xcur%xn; if(tmpx<-xh) tmpx+=xn; if(tmpx>=-xh+xn) tmpx-=xn; int tmpy = ycur%yn; if(tmpy<-yh) tmpy+=yn; if(tmpy>=-yh+yn) tmpy-=yn; int tmpz = zcur%zn; if(tmpz<-zh) tmpz+=zn; if(tmpz>=-zh+zn) tmpz-=zn; double thtcur = atan2(xcur/R1, (-zcur)/R3); double phicur = atan2(ycur/R2, (-zcur)/R3); double glbpou; fdct3d_globalpou(thtcur, phicur, M_PI/4-atan2(1.0-1.0/nd, 1.0), glbpou); double wtht; if(thtcur<thtm) { if(f==0) wtht = 1; else { double l,r; fdct3d_window( (thtcur-thts)/(thtm-thts), l, r); wtht = l; } } else { if(f==nd-1) wtht = 1; else { double l,r; fdct3d_window( (thtcur-thtm)/(thte-thtm), l, r); wtht = r; } } double wphi; if(phicur<phim) { if(g==0) wphi = 1; else { double l,r; fdct3d_window( (phicur-phis)/(phim-phis), l, r); wphi = l; } } else { if(g==nd-1) wphi = 1; else { double l,r; fdct3d_window( (phicur-phim)/(phie-phim), l, r); wphi = r; } } double pou = glbpou * wtht * wphi; wpdata(tmpx, tmpy, tmpz) *= pou; double ss = sma1(xcur)*sma2(ycur)*sma3(zcur); double bb = big1(xcur)*big2(ycur)*big3(zcur); int bi,bj,bk; int oi,oj,ok; fdct3d_position_aux(N1,N2,N3,b, xcur,ycur,zcur, bi,bj,bk,oi,oj,ok); CpxNumTns& Wblk = W.block(bi,bj,bk); Wblk(oi,oj,ok) += wpdata(tmpx,tmpy,tmpz) * bb * sqrt(1.0-ss*ss); } }//zcur }//if wcnt++; } }//end of face iA(wcnt==nd*nd*nf); //remove plans for(map<inttriple, fftwnd_plan>::iterator mit=planmap.begin(); mit!=planmap.end(); mit++) { fftwnd_plan p = (*mit).second; fftwnd_destroy_plan(p); } return 0; }
int main(int argc, char **argv) { const int n_c=3; const int n_s=4; const char outfile_prefix[] = "delta_pp_2pt_v3"; int c, i, icomp; int filename_set = 0; int append, status; int l_LX_at, l_LXstart_at; int ix, it, iix, x1,x2,x3; int ir, ir2, is; int VOL3; int do_gt=0; int dims[3]; double *connt=NULL; spinor_propagator_type *connq=NULL; int verbose = 0; int sx0, sx1, sx2, sx3; int write_ascii=0; int fermion_type = 1; // Wilson fermion type int num_threads=1; int pos; char filename[200], contype[200], gauge_field_filename[200]; double ratime, retime; //double plaq_m, plaq_r; double *work=NULL; fermion_propagator_type fp1=NULL, fp2=NULL, fp3=NULL, fp4=NULL, fpaux=NULL, uprop=NULL, dprop=NULL, *stochastic_fp=NULL; spinor_propagator_type sp1, sp2; double q[3], phase, *gauge_trafo=NULL; double *stochastic_source=NULL, *stochastic_prop=NULL; complex w, w1; size_t items, bytes; FILE *ofs; int timeslice; DML_Checksum ildg_gauge_field_checksum, *spinor_field_checksum=NULL, connq_checksum; uint32_t nersc_gauge_field_checksum; /***********************************************************/ int *qlatt_id=NULL, *qlatt_count=NULL, **qlatt_rep=NULL, **qlatt_map=NULL, qlatt_nclass=0; int use_lattice_momenta = 0; double **qlatt_list=NULL; /***********************************************************/ /***********************************************************/ int rel_momentum_filename_set = 0, rel_momentum_no=0; int **rel_momentum_list=NULL; char rel_momentum_filename[200]; /***********************************************************/ /***********************************************************/ int snk_momentum_no = 1; int **snk_momentum_list = NULL; int snk_momentum_filename_set = 0; char snk_momentum_filename[200]; /***********************************************************/ /******************************************************************* * Gamma components for the Delta: */ //const int num_component = 16; //int gamma_component[2][16] = { {0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3}, \ // {0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3}}; //double gamma_component_sign[16] = {1., 1.,-1., 1., 1., 1.,-1., 1.,-1.,-1., 1.,-1., 1., 1.,-1., 1.}; const int num_component = 4; int gamma_component[2][4] = { {0, 1, 2, 3}, {0, 1, 2, 3} }; double gamma_component_sign[4] = {+1.,+1.,+1.,+1.}; /* *******************************************************************/ fftw_complex *in=NULL; #ifdef MPI fftwnd_mpi_plan plan_p; #else fftwnd_plan plan_p; #endif #ifdef MPI MPI_Status status; #endif #ifdef MPI MPI_Init(&argc, &argv); #endif while ((c = getopt(argc, argv, "ah?vgf:t:F:p:P:")) != -1) { switch (c) { case 'v': verbose = 1; break; case 'f': strcpy(filename, optarg); filename_set=1; break; case 'a': write_ascii = 1; fprintf(stdout, "# [] will write in ascii format\n"); break; case 'F': if(strcmp(optarg, "Wilson") == 0) { fermion_type = _WILSON_FERMION; } else if(strcmp(optarg, "tm") == 0) { fermion_type = _TM_FERMION; } else { fprintf(stderr, "[] Error, unrecognized fermion type\n"); exit(145); } fprintf(stdout, "# [] will use fermion type %s ---> no. %d\n", optarg, fermion_type); break; case 't': num_threads = atoi(optarg); fprintf(stdout, "# [] number of threads set to %d\n", num_threads); break; case 's': use_lattice_momenta = 1; fprintf(stdout, "# [] will use lattice momenta\n"); break; case 'p': rel_momentum_filename_set = 1; strcpy(rel_momentum_filename, optarg); fprintf(stdout, "# [] will use current momentum file %s\n", rel_momentum_filename); break; case 'P': snk_momentum_filename_set = 1; strcpy(snk_momentum_filename, optarg); fprintf(stdout, "# [] will use nucleon momentum file %s\n", snk_momentum_filename); break; case 'g': do_gt = 1; fprintf(stdout, "# [] will perform gauge transform\n"); break; case 'h': case '?': default: usage(); break; } } #ifdef OPENMP omp_set_num_threads(num_threads); #endif /* set the default values */ if(filename_set==0) strcpy(filename, "cvc.input"); fprintf(stdout, "# reading input from file %s\n", filename); read_input_parser(filename); /* some checks on the input data */ if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) { if(g_proc_id==0) fprintf(stdout, "T and L's must be set\n"); usage(); } if(g_kappa == 0.) { if(g_proc_id==0) fprintf(stdout, "kappa should be > 0.n"); usage(); } /* initialize MPI parameters */ mpi_init(argc, argv); #ifdef OPENMP status = fftw_threads_init(); if(status != 0) { fprintf(stderr, "\n[] Error from fftw_init_threads; status was %d\n", status); exit(120); } #endif /****************************************************** * ******************************************************/ VOL3 = LX*LY*LZ; l_LX_at = LX; l_LXstart_at = 0; FFTW_LOC_VOLUME = T*LX*LY*LZ; fprintf(stdout, "# [%2d] parameters:\n"\ "# [%2d] l_LX_at = %3d\n"\ "# [%2d] l_LXstart_at = %3d\n"\ "# [%2d] FFTW_LOC_VOLUME = %3d\n", g_cart_id, g_cart_id, l_LX_at, g_cart_id, l_LXstart_at, g_cart_id, FFTW_LOC_VOLUME); if(init_geometry() != 0) { fprintf(stderr, "ERROR from init_geometry\n"); exit(1); } geometry(); if(N_Jacobi>0) { // alloc the gauge field alloc_gauge_field(&g_gauge_field, VOL3); switch(g_gauge_file_format) { case 0: sprintf(gauge_field_filename, "%s.%.4d", gaugefilename_prefix, Nconf); break; case 1: sprintf(gauge_field_filename, "%s.%.5d", gaugefilename_prefix, Nconf); break; } } else { g_gauge_field = NULL; } /********************************************************************* * gauge transformation *********************************************************************/ if(do_gt) { init_gauge_trafo(&gauge_trafo, 1.); } // determine the source location sx0 = g_source_location/(LX*LY*LZ)-Tstart; sx1 = (g_source_location%(LX*LY*LZ)) / (LY*LZ); sx2 = (g_source_location%(LY*LZ)) / LZ; sx3 = (g_source_location%LZ); // g_source_time_slice = sx0; fprintf(stdout, "# [] source location %d = (%d,%d,%d,%d)\n", g_source_location, sx0, sx1, sx2, sx3); source_timeslice = sx0; if(!use_lattice_momenta) { status = make_qcont_orbits_3d_parity_avg(&qlatt_id, &qlatt_count, &qlatt_list, &qlatt_nclass, &qlatt_rep, &qlatt_map); } else { status = make_qlatt_orbits_3d_parity_avg(&qlatt_id, &qlatt_count, &qlatt_list, &qlatt_nclass, &qlatt_rep, &qlatt_map); } if(status != 0) { fprintf(stderr, "\n[] Error while creating h4-lists\n"); exit(4); } fprintf(stdout, "# [] number of classes = %d\n", qlatt_nclass); /*************************************************************************** * read the relative momenta q to be used ***************************************************************************/ /* ofs = fopen(rel_momentum_filename, "r"); if(ofs == NULL) { fprintf(stderr, "[] Error, could not open file %s for reading\n", rel_momentum_filename); exit(6); } rel_momentum_no = 0; while( fgets(line, 199, ofs) != NULL) { if(line[0] != '#') { rel_momentum_no++; } } if(rel_momentum_no == 0) { fprintf(stderr, "[] Error, number of momenta is zero\n"); exit(7); } else { fprintf(stdout, "# [] number of current momenta = %d\n", rel_momentum_no); } rewind(ofs); rel_momentum_list = (int**)malloc(rel_momentum_no * sizeof(int*)); rel_momentum_list[0] = (int*)malloc(3*rel_momentum_no * sizeof(int)); for(i=1;i<rel_momentum_no;i++) { rel_momentum_list[i] = rel_momentum_list[i-1] + 3; } count=0; while( fgets(line, 199, ofs) != NULL) { if(line[0] != '#') { sscanf(line, "%d%d%d", rel_momentum_list[count], rel_momentum_list[count]+1, rel_momentum_list[count]+2); count++; } } fclose(ofs); fprintf(stdout, "# [] current momentum list:\n"); for(i=0;i<rel_momentum_no;i++) { fprintf(stdout, "\t%3d%3d%3d%3d\n", i, rel_momentum_list[i][0], rel_momentum_list[i][1], rel_momentum_list[i][2]); } */ /*************************************************************************** * read the nucleon final momenta to be used ***************************************************************************/ ofs = fopen(snk_momentum_filename, "r"); if(ofs == NULL) { fprintf(stderr, "[] Error, could not open file %s for reading\n", snk_momentum_filename); exit(6); } snk_momentum_no = 0; while( fgets(line, 199, ofs) != NULL) { if(line[0] != '#') { snk_momentum_no++; } } if(snk_momentum_no == 0) { fprintf(stderr, "[] Error, number of momenta is zero\n"); exit(7); } else { fprintf(stdout, "# [] number of nucleon final momenta = %d\n", snk_momentum_no); } rewind(ofs); snk_momentum_list = (int**)malloc(snk_momentum_no * sizeof(int*)); snk_momentum_list[0] = (int*)malloc(3*snk_momentum_no * sizeof(int)); for(i=1;i<snk_momentum_no;i++) { snk_momentum_list[i] = snk_momentum_list[i-1] + 3; } count=0; while( fgets(line, 199, ofs) != NULL) { if(line[0] != '#') { sscanf(line, "%d%d%d", snk_momentum_list[count], snk_momentum_list[count]+1, snk_momentum_list[count]+2); count++; } } fclose(ofs); fprintf(stdout, "# [] the nucleon final momentum list:\n"); for(i=0;i<snk_momentum_no;i++) { fprintf(stdout, "\t%3d%3d%3d%3d\n", i, snk_momentum_list[i][0], snk_momentum_list[i][1], snk_momentum_list[i][1], snk_momentum_list[i][2]); } /*********************************************************** * allocate memory for the spinor fields ***********************************************************/ g_spinor_field = NULL; if(fermion_type == _TM_FERMION) { no_fields = 2*n_s*n_c+3; } else { no_fields = n_s*n_c+3; } if(N_Jacobi>0) no_fields++; g_spinor_field = (double**)calloc(no_fields, sizeof(double*)); for(i=0; i<no_fields-2; i++) alloc_spinor_field(&g_spinor_field[i], VOL3); // work if(N_Jacobi>0) work = g_spinor_field[no_fields-4]; // stochastic_fv stochastic_fv = g_spinor_field[no_fields-3]; // stochastic source and propagator alloc_spinor_field(&g_spinor_field[no_fields-2], VOLUME); stochastic_source = g_spinor_field[no_fields-2]; alloc_spinor_field(&g_spinor_field[no_fields-1], VOLUME); stochastic_prop = g_spinor_field[no_fields-1]; spinor_field_checksum = (DML_Checksum*)malloc(no_fields * sizeof(DML_Checksum) ); if(spinor_field_checksum == NULL ) { fprintf(stderr, "[] Error, could not alloc checksums for spinor fields\n"); exit(73); } /************************************************* * allocate memory for the contractions *************************************************/ items = 4* num_component*T; bytes = sizeof(double); connt = (double*)malloc(items*bytes); if(connt == NULL) { fprintf(stderr, "\n[] Error, could not alloc connt\n"); exit(2); } for(ix=0; ix<items; ix++) connt[ix] = 0.; items = num_component * (size_t)VOL3; connq = create_sp_field( items ); if(connq == NULL) { fprintf(stderr, "\n[] Error, could not alloc connq\n"); exit(2); } items = (size_t)VOL3; stochastic_fp = create_sp_field( items ); if(stochastic_fp== NULL) { fprintf(stderr, "\n[] Error, could not alloc stochastic_fp\n"); exit(22); } /****************************************************** * initialize FFTW ******************************************************/ items = g_fv_dim * (size_t)VOL3; bytes = sizeof(fftw_complex); in = (fftw_complex*)malloc( items * bytes ); if(in == NULL) { fprintf(stderr, "[] Error, could not malloc in for FFTW\n"); exit(155); } dims[0]=LX; dims[1]=LY; dims[2]=LZ; //plan_p = fftwnd_create_plan(3, dims, FFTW_FORWARD, FFTW_MEASURE | FFTW_IN_PLACE); plan_p = fftwnd_create_plan_specific(3, dims, FFTW_FORWARD, FFTW_MEASURE, in, g_fv_dim, (fftw_complex*)( stochastic_fv ), g_fv_dim); // create the fermion propagator points create_fp(&uprop); create_fp(&dprop); create_fp(&fp1); create_fp(&fp2); create_fp(&fp3); create_fp(&stochastic_fp); create_sp(&sp1); create_sp(&sp2); // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! // !! implement twisting for _TM_FERMION // !! // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! #ifdef OPENMP #pragma omp parallel for private(ix) shared(stochastic_prop) #endif for(ix=0;ix<VOLUME;ix++) { _fv_eq_zero(stochastic_prop+_GSI(ix)); } for(sid=g_sourceid; sid<=g_sourceid2;sid+=g_sourceid_step) { switch(g_soruce_type) { case 2: // timeslice source sprintf(filename, "%s.%.4d.%.2d.%.5d.inverted", filename_prefix, Nconf, source_timeslice, sid); break; default: fprintf(stderr, "# [] source type %d not implented; exit\n", g_source_type); exit(100); } fprintf(stdout, "# [] trying to read sample up-prop. from file %s\n", filename); read_lime_spinor(stochastic_source, filename, 0); #ifdef OPENMP #pragma omp parallel for private(ix) shared(stochastic_prop, stochastic_source) #endif for(ix=0;ix<VOLUME;ix++) { _fv_pl_eq_fv(stochastic_prop+_GSI(ix), stochastic_source+_GSI(ix)); } } #ifdef OPENMP #pragma omp parallel for private(ix) shared(stochastic_prop, stochastic_source) #endif fnorm = 1. / ( (double)(g_sourceid2 - g_sourceid + 1) * g_prop_normsqr ); for(ix=0;ix<VOLUME;ix++) { _fv_ti_eq_re(stochastic_prop+_GSI(ix), fnorm); } // calculate the source if(fermion_type && g_propagator_bc_type == 1) { Q_Wilson_phi(stochastic_source, stochastic_prop); } else { Q_phi_tbc(stochastic_source, stochastic_prop); } /****************************************************** * prepare the stochastic fermion field ******************************************************/ // read timeslice of the gauge field if( N_Jacobi>0) { switch(g_gauge_file_format) { case 0: status = read_lime_gauge_field_doubleprec_timeslice(g_gauge_field, gauge_field_filename, source_timeslice, &ildg_gauge_field_checksum); break; case 1: status = read_nersc_gauge_field_timeslice(g_gauge_field, gauge_field_filename, source_timeslice, &nersc_gauge_field_checksum); break; } if(status != 0) { fprintf(stderr, "[] Error, could not read gauge field\n"); exit(21); } for(i=0; i<N_ape; i++) { #ifdef OPENMP status = APE_Smearing_Step_Timeslice_threads(g_gauge_field, alpha_ape); #else status = APE_Smearing_Step_Timeslice(g_gauge_field, alpha_ape); #endif } } // read timeslice of the 12 up-type propagators and smear them // // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! // !! implement twisting for _TM_FERMION // !! // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! for(is=0;is<n_s*n_c;is++) { if(fermion_type != _TM_FERMION) { sprintf(filename, "%s.%.4d.t%.2dx%.2dy%.2dz%.2d.%.2d.inverted", filename_prefix, Nconf, sx0, sx1, sx2, sx3, is); } else { sprintf(filename, "%s.%.4d.t%.2dx%.2dy%.2dz%.2d.%.2d.inverted", filename_prefix2, Nconf, sx0, sx1, sx2, sx3, is); } status = read_lime_spinor_timeslice(g_spinor_field[is], source_timeslice, filename, 0, spinor_field_checksum+is); if(status != 0) { fprintf(stderr, "[] Error, could not read propagator from file %s\n", filename); exit(102); } if(N_Jacobi > 0) { fprintf(stdout, "# [] Jacobi smearing propagator no. %d with paramters N_Jacobi=%d, kappa_Jacobi=%f\n", is, N_Jacobi, kappa_Jacobi); for(c=0; c<N_Jacobi; c++) { #ifdef OPENMP Jacobi_Smearing_Step_one_Timeslice_threads(g_gauge_field, g_spinor_field[is], work, kappa_Jacobi); #else Jacobi_Smearing_Step_one_Timeslice(g_gauge_field, g_spinor_field[is], work, kappa_Jacobi); #endif } } } for(is=0;is<g_fv_dim;is++) { for(ix=0;ix<VOL3;ix++) { iix = source_timeslice * VOL3 + ix; _fv_eq_gamma_ti_fv(spinor1, 5, g_spinor_field[is]+_GSI(iix)); _co_eq_fv_dagger_ti_fv(&w, stochastic_source+_GSI(ix), spinor1); stochastic_fv[_GSI(ix)+2*is ] = w.re; stochastic_fv[_GSI(ix)+2*is+1] = w.im; } } // Fourier transform items = g_fv_dim * (size_t)VOL3; bytes = sizeof(double); memcpy(in, stochastic_fv, items*bytes ); #ifdef OPENMP fftwnd_threads(num_threads, plan_p, g_fv_dim, in, g_fv_dim, 1, (fftw_complex*)(stochastic_fv), g_fv_dim, 1); #else fftwnd(plan_p, g_fv_dim, in, g_fv_dim, 1, (fftw_complex*)(stochastic_fv), g_fv_dim, 1); #endif /****************************************************** * loop on sink momenta (most likely only one: Q=(0,0,0)) ******************************************************/ for(imom_snk=0;imom_snk<snk_momentum_no; imom_snk++) { // create Phi_tilde _fv_eq_zero( spinor2 ); for(ix=0;ix<LX;ix++) { for(iy=0;iy<LY;iy++) { for(iz=0;iz<LZ;iz++) { iix = timeslice * VOL3 + ix; phase = -2.*M_PI*( (ix-sx1) * snk_momentum_list[imom_snk][0] / (double)LX + (iy-sx2) * snk_momentum_list[imom_snk][1] / (double)LY + (iz-sx3) * snk_momentum_list[imom_snk][2] / (double)LZ); w.re = cos(phase); w.im = sin(phase); _fv_eq_fv_ti_co(spinor1, stochastic_prop + _GSI(iix), &w); _fv_pl_eq_fv(spinor2, spinor); }}} // create Theta for(ir=0;ir<g_fv_dim;ir++) { for(is=0;is<g_fv_dim;is++) { _co_eq_co_ti_co( &(stochastic_fp[ix][ir][2*is]), &(spinor2[2*ir]), &(stochastic_fv[_GSI(ix)+2*is]) ); }} /****************************************************** * loop on timeslices ******************************************************/ for(timeslice=0; timeslice<T; timeslice++) { append = (int)( timeslice != 0 ); // read timeslice of the gauge field if( N_Jacobi>0) { switch(g_gauge_file_format) { case 0: status = read_lime_gauge_field_doubleprec_timeslice(g_gauge_field, gauge_field_filename, timeslice, &ildg_gauge_field_checksum); break; case 1: status = read_nersc_gauge_field_timeslice(g_gauge_field, gauge_field_filename, timeslice, &nersc_gauge_field_checksum); break; } if(status != 0) { fprintf(stderr, "[] Error, could not read gauge field\n"); exit(21); } for(i=0; i<N_ape; i++) { #ifdef OPENMP status = APE_Smearing_Step_Timeslice_threads(g_gauge_field, alpha_ape); #else status = APE_Smearing_Step_Timeslice(g_gauge_field, alpha_ape); #endif } } // read timeslice of the 12 up-type propagators and smear them for(is=0;is<n_s*n_c;is++) { sprintf(filename, "%s.%.4d.t%.2dx%.2dy%.2dz%.2d.%.2d.inverted", filename_prefix, Nconf, sx0, sx1, sx2, sx3, is); status = read_lime_spinor_timeslice(g_spinor_field[is], timeslice, filename, 0, spinor_field_checksum+is); if(status != 0) { fprintf(stderr, "[] Error, could not read propagator from file %s\n", filename); exit(102); } if(N_Jacobi > 0) { fprintf(stdout, "# [] Jacobi smearing propagator no. %d with paramters N_Jacobi=%d, kappa_Jacobi=%f\n", is, N_Jacobi, kappa_Jacobi); for(c=0; c<N_Jacobi; c++) { #ifdef OPENMP Jacobi_Smearing_Step_one_Timeslice_threads(g_gauge_field, g_spinor_field[is], work, kappa_Jacobi); #else Jacobi_Smearing_Step_one_Timeslice(g_gauge_field, g_spinor_field[is], work, kappa_Jacobi); #endif } } } if(fermion_type == _TM_FERMION) { // read timeslice of the 12 down-type propagators, smear them for(is=0;is<n_s*n_c;is++) { if(do_gt == 0) { sprintf(filename, "%s.%.4d.t%.2dx%.2dy%.2dz%.2d.%.2d.inverted", filename_prefix2, Nconf, sx0, sx1, sx2, sx3, is); status = read_lime_spinor_timeslice(g_spinor_field[n_s*n_c+is], timeslice, filename, 0, spinor_field_checksum+n_s*n_c+is); if(status != 0) { fprintf(stderr, "[] Error, could not read propagator from file %s\n", filename); exit(102); } if(N_Jacobi > 0) { fprintf(stdout, "# [] Jacobi smearing propagator no. %d with paramters N_Jacobi=%d, kappa_Jacobi=%f\n", is, N_Jacobi, kappa_Jacobi); for(c=0; c<N_Jacobi; c++) { #ifdef OPENMP Jacobi_Smearing_Step_one_Timeslice_threads(g_gauge_field, g_spinor_field[n_s*n_c+is], work, kappa_Jacobi); #else Jacobi_Smearing_Step_one_Timeslice(g_gauge_field, g_spinor_field[n_s*n_c+is], work, kappa_Jacobi); #endif } } } } /****************************************************** * contractions ******************************************************/ for(ix=0;ix<VOL3;ix++) //for(ix=0;ix<1;ix++) { // assign the propagators _assign_fp_point_from_field(uprop, g_spinor_field, ix); if(fermion_type==_TM_FERMION) { _assign_fp_point_from_field(dprop, g_spinor_field+n_s*n_c, ix); } else { _fp_eq_fp(dprop, uprop); } flavor rotation for twisted mass fermions if(fermion_type == _TM_FERMION) { _fp_eq_rot_ti_fp(fp1, uprop, +1, fermion_type, fp2); _fp_eq_fp_ti_rot(uprop, fp1, +1, fermion_type, fp2); // _fp_eq_rot_ti_fp(fp1, dprop, -1, fermion_type, fp2); // _fp_eq_fp_ti_rot(dprop, fp1, -1, fermion_type, fp2); } // test: print fermion propagator point //printf_fp(uprop, stdout); for(icomp=0; icomp<num_component; icomp++) { _sp_eq_zero( connq[ix*num_component+icomp]); /****************************************************** * first contribution ******************************************************/ _fp_eq_zero(fp1); _fp_eq_zero(fp2); _fp_eq_zero(fp3); // C Gamma_1 x S_u = g0 g2 Gamma_1 S_u _fp_eq_gamma_ti_fp(fp1, gamma_component[0][icomp], uprop); _fp_eq_gamma_ti_fp(fp3, 2, fp1); _fp_eq_gamma_ti_fp(fp1, 0, fp3); // S_u x C Gamma_2 = S_u x g0 g2 Gamma_2 _fp_eq_fp_ti_gamma(fp2, 0, uprop); _fp_eq_fp_ti_gamma(fp3, 2, fp2); _fp_eq_fp_ti_gamma(fp2, gamma_component[1][icomp], fp3); // first part // reduce _fp_eq_zero(fp3); _fp_eq_fp_eps_contract13_fp(fp3, fp1, uprop); // reduce to spin propagator _sp_eq_zero( sp1 ); _sp_eq_fp_del_contract23_fp(sp1, fp2, fp3); // second part // reduce to spin propagator _sp_eq_zero( sp2 ); _sp_eq_fp_del_contract24_fp(sp2, fp2, fp3); // add and assign _sp_pl_eq_sp(sp1, sp2); _sp_eq_sp_ti_re(sp2, sp1, -gamma_component_sign[icomp]); _sp_eq_sp( connq[ix*num_component+icomp], sp2); /****************************************************** * second contribution ******************************************************/ _fp_eq_zero(fp1); _fp_eq_zero(fp2); _fp_eq_zero(fp3); // first part // C Gamma_1 x S_u = g0 g2 Gamma_1 S_u _fp_eq_gamma_ti_fp(fp1, gamma_component[0][icomp], uprop); _fp_eq_gamma_ti_fp(fp3, 2, fp1); _fp_eq_gamma_ti_fp(fp1, 0, fp3); // S_u x C Gamma_2 = S_u g0 g2 Gamma_2 (same S_u as above) _fp_eq_fp_ti_gamma(fp2, 0, fp1); _fp_eq_fp_ti_gamma(fp3, 2, fp2); _fp_eq_fp_ti_gamma(fp1, gamma_component[1][icomp], fp3); // reduce _fp_eq_zero(fp3); _fp_eq_fp_eps_contract13_fp(fp3, fp1, uprop); // reduce to spin propagator _sp_eq_zero( sp1 ); _sp_eq_fp_del_contract23_fp(sp1, uprop, fp3); // second part // C Gamma_1 x S_u = g0 g2 Gamma_1 S_u _fp_eq_gamma_ti_fp(fp1, gamma_component[0][icomp], uprop); _fp_eq_gamma_ti_fp(fp3, 2, fp1); _fp_eq_gamma_ti_fp(fp1, 0, fp3); // S_u x C Gamma_2 = S_u g0 g2 Gamma_2 _fp_eq_fp_ti_gamma(fp2, 0, uprop); _fp_eq_fp_ti_gamma(fp3, 2, fp2); _fp_eq_fp_ti_gamma(fp2, gamma_component[1][icomp], fp3); // reduce _fp_eq_zero(fp3); _fp_eq_fp_eps_contract13_fp(fp3, fp1, fp2); // reduce to spin propagator _sp_eq_zero( sp2 ); _sp_eq_fp_del_contract24_fp(sp2, uprop, fp3); // add and assign _sp_pl_eq_sp(sp1, sp2); _sp_eq_sp_ti_re(sp2, sp1, -gamma_component_sign[icomp]); _sp_pl_eq_sp( connq[ix*num_component+icomp], sp2); /****************************************************** * third contribution ******************************************************/ _fp_eq_zero(fp1); _fp_eq_zero(fp2); _fp_eq_zero(fp3); // first part // C Gamma_1 x S_u = g0 g2 Gamma_1 S_u _fp_eq_gamma_ti_fp(fp1, gamma_component[0][icomp], uprop); _fp_eq_gamma_ti_fp(fp3, 2, fp1); _fp_eq_gamma_ti_fp(fp1, 0, fp3); // S_u x C Gamma_2 = S_u g0 g2 Gamma_2 _fp_eq_fp_ti_gamma(fp2, 0, fp1); _fp_eq_fp_ti_gamma(fp3, 2, fp2); _fp_eq_fp_ti_gamma(fp1, gamma_component[1][icomp], fp3); // reduce _fp_eq_zero(fp3); _fp_eq_fp_eps_contract13_fp(fp3, fp1, uprop); // reduce to spin propagator _sp_eq_zero( sp1 ); _sp_eq_fp_del_contract34_fp(sp1, uprop, fp3); // second part // C Gamma_1 x S_u = g0 g2 Gamma_1 S_u _fp_eq_gamma_ti_fp(fp1, gamma_component[0][icomp], uprop); _fp_eq_gamma_ti_fp(fp3, 2, fp1); _fp_eq_gamma_ti_fp(fp1, 0, fp3); // S_u x C Gamma_2 = S_u g0 g2 Gamma_2 _fp_eq_fp_ti_gamma(fp2, 0, uprop); _fp_eq_fp_ti_gamma(fp3, 2, fp2); _fp_eq_fp_ti_gamma(fp2, gamma_component[1][icomp], fp3); // reduce _fp_eq_zero(fp3); _fp_eq_fp_eps_contract13_fp(fp3, fp1, fp2); // reduce to spin propagator _sp_eq_zero( sp2 ); _sp_eq_fp_del_contract34_fp(sp2, uprop, fp3); // add and assign _sp_pl_eq_sp(sp1, sp2); _sp_eq_sp_ti_re(sp2, sp1, -gamma_component_sign[icomp]); _sp_pl_eq_sp( connq[ix*num_component+icomp], sp2); } // of icomp } // of ix /*********************************************** * finish calculation of connq ***********************************************/ if(g_propagator_bc_type == 0) { // multiply with phase factor fprintf(stdout, "# [] multiplying timeslice %d with boundary phase factor\n", timeslice); ir = (timeslice - sx0 + T_global) % T_global; w1.re = cos( 3. * M_PI*(double)ir / (double)T_global ); w1.im = sin( 3. * M_PI*(double)ir / (double)T_global ); for(ix=0;ix<num_component*VOL3;ix++) { _sp_eq_sp(sp1, connq[ix] ); _sp_eq_sp_ti_co( connq[ix], sp1, w1); } } else if (g_propagator_bc_type == 1) { // multiply with step function if(timeslice < sx0) { fprintf(stdout, "# [] multiplying timeslice %d with boundary step function\n", timeslice); for(ix=0;ix<num_component*VOL3;ix++) { _sp_eq_sp(sp1, connq[ix] ); _sp_eq_sp_ti_re( connq[ix], sp1, -1.); } } } if(write_ascii) { sprintf(filename, "%s_x.%.4d.t%.2dx%.2dy%.2dz%.2d.ascii", outfile_prefix, Nconf, sx0, sx1, sx2, sx3); write_contraction2( connq[0][0], filename, num_component*g_sv_dim*g_sv_dim, VOL3, 1, append); } /****************************************************************** * Fourier transform ******************************************************************/ items = 2 * num_component * g_sv_dim * g_sv_dim * VOL3; bytes = sizeof(double); memcpy(in, connq[0][0], items * bytes); ir = num_component * g_sv_dim * g_sv_dim; #ifdef OPENMP fftwnd_threads(num_threads, plan_p, ir, in, ir, 1, (fftw_complex*)(connq[0][0]), ir, 1); #else fftwnd(plan_p, ir, in, ir, 1, (fftw_complex*)(connq[0][0]), ir, 1); #endif // add phase factor from the source location iix = 0; for(x1=0;x1<LX;x1++) { q[0] = (double)x1 / (double)LX; for(x2=0;x2<LY;x2++) { q[1] = (double)x2 / (double)LY; for(x3=0;x3<LZ;x3++) { q[2] = (double)x3 / (double)LZ; phase = 2. * M_PI * ( q[0]*sx1 + q[1]*sx2 + q[2]*sx3 ); w1.re = cos(phase); w1.im = sin(phase); for(icomp=0; icomp<num_component; icomp++) { _sp_eq_sp(sp1, connq[iix] ); _sp_eq_sp_ti_co( connq[iix], sp1, w1) ; iix++; } }}} // of x3, x2, x1 // write to file sprintf(filename, "%s_q.%.4d.t%.2dx%.2dy%.2dz%.2d.Qx%.2dQy%.2dQz%.2d.%.5d", outfile_prefix, Nconf, sx0, sx1, sx2, sx3, qlatt_rep[snk_momentum_list[imom_snk]][1],qlatt_rep[snk_momentum_list[imom_snk]][2],qlatt_rep[snk_momentum_list[imom_snk]][3], g_sourceid2-g_sourceid+1); sprintf(contype, "2-pt. function, (t,q_1,q_2,q_3)-dependent, source_timeslice = %d", sx0); write_lime_contraction_timeslice(connq[0][0], filename, 64, num_component*g_sv_dim*g_sv_dim, contype, Nconf, 0, &connq_checksum, timeslice); if(write_ascii) { strcat(filename, ".ascii"); write_contraction2(connq[0][0],filename, num_component*g_sv_dim*g_sv_dim, VOL3, 1, append); } /*********************************************** * calculate connt ***********************************************/ for(icomp=0;icomp<num_component; icomp++) { // fwd _sp_eq_sp(sp1, connq[icomp]); _sp_eq_gamma_ti_sp(sp2, 0, sp1); _sp_pl_eq_sp(sp1, sp2); _co_eq_tr_sp(&w, sp1); connt[2*(icomp*T + timeslice) ] = w.re * 0.25; connt[2*(icomp*T + timeslice)+1] = w.im * 0.25; // bwd _sp_eq_sp(sp1, connq[icomp]); _sp_eq_gamma_ti_sp(sp2, 0, sp1); _sp_mi_eq_sp(sp1, sp2); _co_eq_tr_sp(&w, sp1); connt[2*(icomp*T+timeslice + num_component*T) ] = w.re * 0.25; connt[2*(icomp*T+timeslice + num_component*T)+1] = w.im * 0.25; } } // of loop on timeslice // write connt sprintf(filename, "%s.%.4d.t%.2dx%.2dy%.2dz%.2d.fw", outfile_prefix, Nconf, sx0, sx1, sx2, sx3); ofs = fopen(filename, "w"); if(ofs == NULL) { fprintf(stderr, "[] Error, could not open file %s for writing\n", filename); exit(3); } fprintf(ofs, "#%12.8f%3d%3d%3d%3d%8.4f%6d\n", g_kappa, T_global, LX, LY, LZ, g_mu, Nconf); for(icomp=0; icomp<num_component; icomp++) { ir = sx0; fprintf(ofs, "%3d%3d%3d%16.7e%16.7e%6d\n", gamma_component[0][icomp], gamma_component[1][icomp], 0, connt[2*(icomp*T+ir)], 0., Nconf); for(it=1;it<T/2;it++) { ir = ( it + sx0 ) % T_global; ir2 = ( (T_global - it) + sx0 ) % T_global; fprintf(ofs, "%3d%3d%3d%16.7e%16.7e%6d\n", gamma_component[0][icomp], gamma_component[1][icomp], it, connt[2*(icomp*T+ir)], connt[2*(icomp*T+ir2)], Nconf); } ir = ( it + sx0 ) % T_global; fprintf(ofs, "%3d%3d%3d%16.7e%16.7e%6d\n", gamma_component[0][icomp], gamma_component[1][icomp], it, connt[2*(icomp*T+ir)], 0., Nconf); } fclose(ofs); sprintf(filename, "%s.%.4d.t%.2dx%.2dy%.2dz%.2d.bw", outfile_prefix, Nconf, sx0, sx1, sx2, sx3); ofs = fopen(filename, "w"); if(ofs == NULL) { fprintf(stderr, "[] Error, could not open file %s for writing\n", filename); exit(3); } fprintf(ofs, "#%12.8f%3d%3d%3d%3d%8.4f%6d\n", g_kappa, T_global, LX, LY, LZ, g_mu, Nconf); for(icomp=0; icomp<num_component; icomp++) { ir = sx0; fprintf(ofs, "%3d%3d%3d%16.7e%16.7e%6d\n", gamma_component[0][icomp], gamma_component[1][icomp], 0, connt[2*(num_component*T+icomp*T+ir)], 0., Nconf); for(it=1;it<T/2;it++) { ir = ( it + sx0 ) % T_global; ir2 = ( (T_global - it) + sx0 ) % T_global; fprintf(ofs, "%3d%3d%3d%16.7e%16.7e%6d\n", gamma_component[0][icomp], gamma_component[1][icomp], it, connt[2*(num_component*T+icomp*T+ir)], connt[2*(num_component*T+icomp*T+ir2)], Nconf); } ir = ( it + sx0 ) % T_global; fprintf(ofs, "%3d%3d%3d%16.7e%16.7e%6d\n", gamma_component[0][icomp], gamma_component[1][icomp], it, connt[2*(num_component*T+icomp*T+ir)], 0., Nconf); } fclose(ofs); } // of loop on sink momentum ( = Delta^++ momentum, Qvec) /*********************************************** * free the allocated memory, finalize ***********************************************/ free_geometry(); if(connt!= NULL) free(connt); if(connq!= NULL) free(connq); if(gauge_trafo != NULL) free(gauge_trafo); if(g_spinor_field!=NULL) { for(i=0; i<no_fields; i++) free(g_spinor_field[i]); free(g_spinor_field); g_spinor_field=(double**)NULL; } if(spinor_field_checksum !=NULL) free(spinor_field_checksum); if(g_gauge_field != NULL) free(g_gauge_field); if(snk_momemtum_list != NULL) { if(snk_momentum_list[0] != NULL) free(snk_momentum_list[0]); free(snk_momentum_list); } if(rel_momemtum_list != NULL) { if(rel_momentum_list[0] != NULL) free(rel_momentum_list[0]); free(rel_momentum_list); } // free the fermion propagator points free_fp( &uprop ); free_fp( &dprop ); free_fp( &fp1 ); free_fp( &fp2 ); free_fp( &fp3 ); free_sp( &sp1 ); free_sp( &sp2 ); free(in); fftwnd_destroy_plan(plan_p); g_the_time = time(NULL); fprintf(stdout, "# [] %s# [] end fo run\n", ctime(&g_the_time)); fflush(stdout); fprintf(stderr, "# [] %s# [] end fo run\n", ctime(&g_the_time)); fflush(stderr); #ifdef MPI MPI_Finalize(); #endif return(0); }
void test_speed_nd_aux(struct size sz, fftw_direction dir, int flags, int specific) { int local_nx, local_x_start, local_ny_after_transpose, local_y_start_after_transpose, total_local_size; fftw_complex *in, *work; fftwnd_plan plan = 0; fftwnd_mpi_plan mpi_plan; double t, t0 = 0.0; int i, N; if (sz.rank < 2) return; /* only bench in-place multi-dim transforms */ flags |= FFTW_IN_PLACE; N = 1; for (i = 0; i < sz.rank; ++i) N *= (sz.narray[i]); if (specific) { return; } else { if (io_okay && !only_parallel) plan = fftwnd_create_plan(sz.rank, sz.narray, dir, speed_flag | flags | wisdom_flag | no_vector_flag); mpi_plan = fftwnd_mpi_create_plan(MPI_COMM_WORLD, sz.rank, sz.narray, dir, speed_flag | flags | wisdom_flag | no_vector_flag); } CHECK(mpi_plan != NULL, "can't create plan"); fftwnd_mpi_local_sizes(mpi_plan, &local_nx, &local_x_start, &local_ny_after_transpose, &local_y_start_after_transpose, &total_local_size); if (io_okay && !only_parallel) in = (fftw_complex *) fftw_malloc(N * howmany_fields * sizeof(fftw_complex)); else in = (fftw_complex *) fftw_malloc(total_local_size * howmany_fields * sizeof(fftw_complex)); work = (fftw_complex *) fftw_malloc(total_local_size * howmany_fields * sizeof(fftw_complex)); if (io_okay && !only_parallel) { FFTW_TIME_FFT(fftwnd(plan, howmany_fields, in, howmany_fields, 1, 0, 0, 0), in, N * howmany_fields, t0); fftwnd_destroy_plan(plan); WHEN_VERBOSE(1, printf("time for one fft (uniprocessor): %s\n", smart_sprint_time(t0))); } MPI_TIME_FFT(fftwnd_mpi(mpi_plan, howmany_fields, in, NULL, FFTW_NORMAL_ORDER), in, total_local_size * howmany_fields, t); if (io_okay) { WHEN_VERBOSE(1, printf("NORMAL: time for one fft (%d cpus): %s", ncpus, smart_sprint_time(t))); WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / N))); WHEN_VERBOSE(1, printf("NORMAL: \"mflops\" = 5 (N log2 N) / " "(t in microseconds)" " = %f\n", howmany_fields * mflops(t, N))); if (!only_parallel) WHEN_VERBOSE(1, printf("NORMAL: parallel speedup: %f\n", t0 / t)); } MPI_TIME_FFT(fftwnd_mpi(mpi_plan, howmany_fields, in, NULL, FFTW_TRANSPOSED_ORDER), in, total_local_size * howmany_fields, t); if (io_okay) { WHEN_VERBOSE(1, printf("TRANSP.: time for one fft (%d cpus): %s", ncpus, smart_sprint_time(t))); WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / N))); WHEN_VERBOSE(1, printf("TRANSP.: \"mflops\" = 5 (N log2 N) / " "(t in microseconds)" " = %f\n", howmany_fields * mflops(t, N))); if (!only_parallel) WHEN_VERBOSE(1, printf("TRANSP.: parallel speedup: %f\n", t0 / t)); } MPI_TIME_FFT(fftwnd_mpi(mpi_plan, howmany_fields, in, work, FFTW_NORMAL_ORDER), in, total_local_size * howmany_fields, t); if (io_okay) { WHEN_VERBOSE(1, printf("NORMAL,w/WORK: time for one fft (%d cpus): %s", ncpus, smart_sprint_time(t))); WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / N))); WHEN_VERBOSE(1, printf("NORMAL,w/WORK: \"mflops\" = 5 (N log2 N) / " "(t in microseconds)" " = %f\n", howmany_fields * mflops(t, N))); if (!only_parallel) WHEN_VERBOSE(1, printf("NORMAL,w/WORK: parallel speedup: %f\n", t0 / t)); } MPI_TIME_FFT(fftwnd_mpi(mpi_plan, howmany_fields, in, work, FFTW_TRANSPOSED_ORDER), in, total_local_size * howmany_fields, t); if (io_okay) { WHEN_VERBOSE(1, printf("TRANSP.,w/WORK: time for one fft (%d cpus): %s", ncpus, smart_sprint_time(t))); WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / N))); WHEN_VERBOSE(1, printf("TRANSP.,w/WORK: \"mflops\" = 5 (N log2 N) / " "(t in microseconds)" " = %f\n", howmany_fields * mflops(t, N))); if (!only_parallel) WHEN_VERBOSE(1, printf("TRANSP.,w/WORK: parallel speedup: %f\n", t0 / t)); } fftwnd_mpi_destroy_plan(mpi_plan); fftw_free(in); fftw_free(work); WHEN_VERBOSE(1, my_printf("\n")); }
/* Call fftw for a 1 band complex image. */ static int cfwfft1( IMAGE *dummy, IMAGE *in, IMAGE *out ) { fftwnd_plan plan; double *buf, *q, *p; int x, y; IMAGE *cmplx = im_open_local( dummy, "fwfft1:1", "t" ); /* Make dp complex image. */ if( !cmplx || im_pincheck( in ) || im_outcheck( out ) ) return( -1 ); if( in->Coding != IM_CODING_NONE || in->Bands != 1 ) { im_error( "im_fwfft", _( "one band uncoded only" ) ); return( -1 ); } if( im_clip2dcm( in, cmplx ) ) return( -1 ); /* Make the plan for the transform. */ if( !(plan = fftw2d_create_plan( in->Ysize, in->Xsize, FFTW_FORWARD, FFTW_MEASURE | FFTW_USE_WISDOM | FFTW_IN_PLACE )) ) { im_error( "im_fwfft", _( "unable to create transform plan" ) ); return( -1 ); } fftwnd_one( plan, (fftw_complex *) cmplx->data, NULL ); fftwnd_destroy_plan( plan ); /* WIO to out. */ if( im_cp_desc( out, in ) ) return( -1 ); out->Bbits = IM_BBITS_DPCOMPLEX; out->BandFmt = IM_BANDFMT_DPCOMPLEX; if( im_setupout( out ) ) return( -1 ); if( !(buf = (double *) IM_ARRAY( dummy, IM_IMAGE_SIZEOF_LINE( out ), PEL )) ) return( -1 ); /* Copy to out, normalise. */ for( p = (double *) cmplx->data, y = 0; y < out->Ysize; y++ ) { int size = out->Xsize * out->Ysize; q = buf; for( x = 0; x < out->Xsize; x++ ) { q[0] = p[0] / size; q[1] = p[1] / size; p += 2; q += 2; } if( im_writeline( y, out, (PEL *) buf ) ) return( -1 ); } return( 0 ); }
int main(int argc, char **argv) { int c, i, mu, nu; int filename_set = 0; int dims[4] = {0,0,0,0}; int l_LX_at, l_LXstart_at; int x0, x1, x2, x3, ix, iix; int xx0, xx1, xx2, xx3; int y0min, y0max, y1min, y1max, y2min, y2max, y3min, y3max; int y0, y1, y2, y3, iy; int z0, z1, z2, z3, iz; int gid, status; int model_type = -1; double *disc = (double*)NULL; double *disc2 = (double*)NULL; double *work = (double*)NULL; double q[4], fnorm; char filename[100], contype[200]; double ratime, retime; double rmin2, rmax2, rsqr; complex w, w1; FILE *ofs; fftw_complex *in=(fftw_complex*)NULL; #ifdef MPI fftwnd_mpi_plan plan_p, plan_m; #else fftwnd_plan plan_p, plan_m; #endif #ifdef MPI MPI_Init(&argc, &argv); #endif while ((c = getopt(argc, argv, "h?f:t:")) != -1) { switch (c) { case 't': model_type = atoi(optarg); break; case 'f': strcpy(filename, optarg); filename_set=1; break; case 'h': case '?': default: usage(); break; } } /* set the default values */ if(filename_set==0) strcpy(filename, "cvc.input"); fprintf(stdout, "# Reading input from file %s\n", filename); read_input_parser(filename); /* some checks on the input data */ if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) { if(g_proc_id==0) fprintf(stdout, "T and L's must be set\n"); usage(); } fprintf(stdout, "\n**************************************************\n"); fprintf(stdout, "* vp_disc_ft\n"); fprintf(stdout, "**************************************************\n\n"); #ifdef MPI if(g_cart_id==0) fprintf(stdout, "# Warning: MPI-version not yet available; exit\n"); exit(200); #endif /********************************* * initialize MPI parameters *********************************/ mpi_init(argc, argv); /* initialize fftw */ dims[0]=T_global; dims[1]=LX; dims[2]=LY; dims[3]=LZ; #ifdef MPI plan_p = fftwnd_mpi_create_plan(g_cart_grid, 4, dims, FFTW_BACKWARD, FFTW_MEASURE); plan_m = fftwnd_mpi_create_plan(g_cart_grid, 4, dims, FFTW_FORWARD, FFTW_MEASURE); fftwnd_mpi_local_sizes(plan_p, &T, &Tstart, &l_LX_at, &l_LXstart_at, &FFTW_LOC_VOLUME); #else plan_p = fftwnd_create_plan(4, dims, FFTW_BACKWARD, FFTW_MEASURE | FFTW_IN_PLACE); plan_m = fftwnd_create_plan(4, dims, FFTW_FORWARD, FFTW_MEASURE | FFTW_IN_PLACE); T = T_global; Tstart = 0; l_LX_at = LX; l_LXstart_at = 0; FFTW_LOC_VOLUME = T*LX*LY*LZ; #endif fprintf(stdout, "# [%2d] fftw parameters:\n"\ "# [%2d] T = %3d\n"\ "# [%2d] Tstart = %3d\n"\ "# [%2d] l_LX_at = %3d\n"\ "# [%2d] l_LXstart_at = %3d\n"\ "# [%2d] FFTW_LOC_VOLUME = %3d\n", g_cart_id, g_cart_id, T, g_cart_id, Tstart, g_cart_id, l_LX_at, g_cart_id, l_LXstart_at, g_cart_id, FFTW_LOC_VOLUME); #ifdef MPI if(T==0) { fprintf(stderr, "[%2d] local T is zero; exit\n", g_cart_id); MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); exit(2); } #endif if(init_geometry() != 0) { fprintf(stderr, "ERROR from init_geometry\n"); #ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); #endif exit(1); } geometry(); /**************************************** * allocate memory for the contractions ****************************************/ disc = (double*)calloc( 8*VOLUME, sizeof(double)); if( disc == (double*)NULL ) { fprintf(stderr, "could not allocate memory for disc\n"); # ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); # endif exit(3); } disc2 = (double*)calloc( 32*VOLUME, sizeof(double)); if( disc2 == (double*)NULL ) { fprintf(stderr, "could not allocate memory for disc2\n"); # ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); # endif exit(3); } for(ix=0; ix<32*VOLUME; ix++) disc2[ix] = 0.; work = (double*)calloc(32*VOLUME, sizeof(double)); if( work == (double*)NULL ) { fprintf(stderr, "could not allocate memory for work\n"); # ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); # endif exit(3); } /**************************************** * prepare Fourier transformation arrays ****************************************/ in = (fftw_complex*)malloc(FFTW_LOC_VOLUME*sizeof(fftw_complex)); if(in==(fftw_complex*)NULL) { #ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); #endif exit(4); } /*************************************** * set model type function ***************************************/ switch (model_type) { case 0: model_type_function = pidisc_model; fprintf(stdout, "# function pointer set to type pidisc_model\n"); case 1: model_type_function = pidisc_model1; fprintf(stdout, "# function pointer set to type pidisc_model1\n"); break; case 2: model_type_function = pidisc_model2; fprintf(stdout, "# function pointer set to type pidisc_model2\n"); break; case 3: model_type_function = pidisc_model3; fprintf(stdout, "# function pointer set to type pidisc_model3\n"); break; default: model_type_function = NULL; fprintf(stdout, "# no model function selected; will add zero\n"); break; } /**************************************** * prepare the model for pidisc * - same for all gauge configurations ****************************************/ rmin2 = g_rmin * g_rmin; rmax2 = g_rmax * g_rmax; if(model_type > -1) { for(mu=0; mu<16; mu++) { model_type_function(model_mrho, model_dcoeff_re, model_dcoeff_im, work, plan_m, mu); for(x0=-(T-1); x0<T; x0++) { y0 = (x0 + T_global) % T_global; for(x1=-(LX-1); x1<LX; x1++) { y1 = (x1 + LX) % LX; for(x2=-(LY-1); x2<LY; x2++) { y2 = (x2 + LY) % LY; for(x3=-(LZ-1); x3<LZ; x3++) { y3 = (x3 + LZ) % LZ; iy = g_ipt[y0][y1][y2][y3]; rsqr = (double)(x1*x1) + (double)(x2*x2) + (double)(x3*x3); if(rmin2-rsqr<=_Q2EPS && rsqr-rmax2<=_Q2EPS) continue; /* radius in range for data usage, so continue */ disc2[_GWI(mu,iy,VOLUME) ] += work[2*iy ]; disc2[_GWI(mu,iy,VOLUME)+1] += work[2*iy+1]; }}}} memcpy((void*)in, (void*)(disc2+_GWI(mu,0,VOLUME)), 2*VOLUME*sizeof(double)); #ifdef MPI fftwnd_mpi(plan_p, 1, in, NULL, FFTW_NORMAL_ORDER); #else fftwnd_one(plan_p, in, NULL); #endif memcpy((void*)(disc2+_GWI(mu,0,VOLUME)), (void*)in, 2*VOLUME*sizeof(double)); } } else { for(ix=0; ix<32*VOLUME; ix++) disc2[ix] = 0.; } /*********************************************** * start loop on gauge id.s ***********************************************/ for(gid=g_gaugeid; gid<=g_gaugeid2; gid+=g_gauge_step) { if(g_cart_id==0) fprintf(stdout, "# Start working on gauge id %d\n", gid); /* read the new contractions */ #ifdef MPI ratime = MPI_Wtime(); #else ratime = (double)clock() / CLOCKS_PER_SEC; #endif sprintf(filename, "%s.%.4d.%.4d", filename_prefix, gid, Nsave); if(g_cart_id==0) fprintf(stdout, "# Reading contraction data from file %s\n", filename); if(read_lime_contraction(disc, filename, 4, 0) == 106) { if(g_cart_id==0) fprintf(stderr, "Error, could not read from file %s, continue\n", filename); continue; } #ifdef MPI retime = MPI_Wtime(); #else retime = (double)clock() / CLOCKS_PER_SEC; #endif if(g_cart_id==0) fprintf(stdout, "# time to read contraction: %e seconds\n", retime-ratime); /************************************************ * prepare \Pi_\mu\nu (x,y) ************************************************/ # ifdef MPI ratime = MPI_Wtime(); # else ratime = (double)clock() / CLOCKS_PER_SEC; # endif for(x0=-T+1; x0<T; x0++) { y0min = x0<0 ? -x0 : 0; y0max = x0<0 ? T : T-x0; for(x1=-LX+1; x1<LX; x1++) { y1min = x1<0 ? -x1 : 0; y1max = x1<0 ? LX : LX-x1; for(x2=-LY+1; x2<LY; x2++) { y2min = x2<0 ? -x2 : 0; y2max = x2<0 ? LY : LY-x2; for(x3=-LZ+1; x3<LZ; x3++) { y3min = x3<0 ? -x3 : 0; y3max = x3<0 ? LZ : LZ-x3; xx0 = (x0+T ) % T; xx1 = (x1+LX) % LX; xx2 = (x2+LX) % LY; xx3 = (x3+LX) % LZ; ix = g_ipt[xx0][xx1][xx2][xx3]; rsqr = (double)(x1*x1) + (double)(x2*x2) + (double)(x3*x3); if(rmin2-rsqr>_Q2EPS || rsqr-rmax2>_Q2EPS) continue; for(y0=y0min; y0<y0max; y0++) { z0 = y0 + x0; for(y1=y1min; y1<y1max; y1++) { z1 = y1 + x1; for(y2=y2min; y2<y2max; y2++) { z2 = y2 + x2; for(y3=y3min; y3<y3max; y3++) { z3 = y3 + x3; iy = g_ipt[y0][y1][y2][y3]; iz = g_ipt[z0][z1][z2][z3]; i=0; for(mu=0; mu<4; mu++) { for(nu=0; nu<4; nu++) { iix = _GWI(i,ix,VOLUME); _co_eq_co_ti_co(&w, (complex*)(disc+_GWI(mu,iz,VOLUME)), (complex*)(disc+_GWI(nu,iy,VOLUME))); work[iix ] += w.re; work[iix+1] += w.im; i++; }} }}}} }}}} #ifdef MPI retime = MPI_Wtime(); #else retime = (double)clock() / CLOCKS_PER_SEC; #endif if(g_cart_id==0) fprintf(stdout, "# time to calculate \\Pi_\\mu\\nu in position space: %e seconds\n", retime-ratime); /*********************************************** * Fourier transform ***********************************************/ for(mu=0; mu<16; mu++) { memcpy((void*)in, (void*)(work+_GWI(mu,0,VOLUME)), 2*VOLUME*sizeof(double)); #ifdef MPI fftwnd_mpi(plan_p, 1, in, NULL, FFTW_NORMAL_ORDER); #else fftwnd_one(plan_p, in, NULL); #endif memcpy((void*)(work+_GWI(mu,0,VOLUME)), (void*)in, 2*VOLUME*sizeof(double)); } fnorm = 1. / ((double)T_global * (double)(LX*LY*LZ)); if(g_cart_id==0) fprintf(stdout, "# P-fnorm = %16.5e\n", fnorm); for(x0=0; x0<T; x0++) { q[0] = (double)(x0+Tstart) / (double)T_global; for(x1=0; x1<LX; x1++) { q[1] = (double)x1 / (double)LX; for(x2=0; x2<LY; x2++) { q[2] = (double)x2 / (double)LY; for(x3=0; x3<LZ; x3++) { q[3] = (double)x3 / (double)LZ; ix = g_ipt[x0][x1][x2][x3]; i=0; for(mu=0; mu<4; mu++) { for(nu=0; nu<4; nu++) { iix = _GWI(i,ix,VOLUME); w.re = cos(M_PI * (q[mu] - q[nu])); w.im = sin(M_PI * (q[mu] - q[nu])); work[iix ] = work[iix ] * fnorm + disc2[iix ]; work[iix+1] = work[iix+1] * fnorm + disc2[iix+1]; _co_eq_co_ti_co(&w1, (complex*)(work+iix), &w); work[iix ] = w1.re; work[iix+1] = w1.im; i++; }} }}}} /*********************************************** * save results ***********************************************/ sprintf(filename, "%s.%.4d.%.4d", filename_prefix2, gid, Nsave); if(g_cart_id==0) fprintf(stdout, "# Saving results to file %s\n", filename); sprintf(contype, "cvc-disc-P"); write_lime_contraction(work, filename, 64, 16, contype, gid, Nsave); /* sprintf(filename, "%sascii.%.4d.%.4d", filename_prefix2, gid, Nsave); write_contraction(work, NULL, filename, 16, 2, 0); */ if(g_cart_id==0) fprintf(stdout, "# Finished working on gauge id %d\n", gid); } /* of loop on gid */ /*********************************************** * free the allocated memory, finalize ***********************************************/ free_geometry(); fftw_free(in); free(disc); free(disc2); free(work); #ifdef MPI fftwnd_mpi_destroy_plan(plan_p); fftwnd_mpi_destroy_plan(plan_m); MPI_Finalize(); #else fftwnd_destroy_plan(plan_p); fftwnd_destroy_plan(plan_m); #endif return(0); }
int main(int argc, char **argv) { const int n_c=3; const int n_s=4; const char outfile_prefix[] = "delta_pp_2pt_v4"; int c, i, icomp; int filename_set = 0; int append, status; int l_LX_at, l_LXstart_at; int ix, it, iix, x1,x2,x3; int ir, ir2, is; int VOL3; int do_gt=0; int dims[3]; double *connt=NULL; spinor_propagator_type *connq=NULL; int verbose = 0; int sx0, sx1, sx2, sx3; int write_ascii=0; int fermion_type = 1; // Wilson fermion type int pos; char filename[200], contype[200], gauge_field_filename[200]; double ratime, retime; //double plaq_m, plaq_r; double *work=NULL; fermion_propagator_type *fp1=NULL, *fp2=NULL, *fp3=NULL, *uprop=NULL, *dprop=NULL, *fpaux=NULL; spinor_propagator_type *sp1=NULL, *sp2=NULL; double q[3], phase, *gauge_trafo=NULL; complex w, w1; size_t items, bytes; FILE *ofs; int timeslice; DML_Checksum ildg_gauge_field_checksum, *spinor_field_checksum=NULL, connq_checksum; uint32_t nersc_gauge_field_checksum; int threadid, nthreads; /******************************************************************* * Gamma components for the Delta: * */ const int num_component = 4; int gamma_component[2][4] = { {0, 1, 2, 3}, {0, 1, 2, 3} }; double gamma_component_sign[4] = {+1.,+1.,-1.,+1.}; /* *******************************************************************/ fftw_complex *in=NULL; #ifdef MPI fftwnd_mpi_plan plan_p; #else fftwnd_plan plan_p; #endif #ifdef MPI MPI_Status status; #endif #ifdef MPI MPI_Init(&argc, &argv); #endif while ((c = getopt(argc, argv, "ah?vgf:F:")) != -1) { switch (c) { case 'v': verbose = 1; break; case 'f': strcpy(filename, optarg); filename_set=1; break; case 'a': write_ascii = 1; fprintf(stdout, "# [] will write in ascii format\n"); break; case 'F': if(strcmp(optarg, "Wilson") == 0) { fermion_type = _WILSON_FERMION; } else if(strcmp(optarg, "tm") == 0) { fermion_type = _TM_FERMION; } else { fprintf(stderr, "[] Error, unrecognized fermion type\n"); exit(145); } fprintf(stdout, "# [] will use fermion type %s ---> no. %d\n", optarg, fermion_type); break; case 'g': do_gt = 1; fprintf(stdout, "# [] will perform gauge transform\n"); break; case 'h': case '?': default: usage(); break; } } /* set the default values */ if(filename_set==0) strcpy(filename, "cvc.input"); fprintf(stdout, "# reading input from file %s\n", filename); read_input_parser(filename); /* some checks on the input data */ if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) { if(g_proc_id==0) fprintf(stdout, "T and L's must be set\n"); usage(); } if(g_kappa == 0.) { if(g_proc_id==0) fprintf(stdout, "kappa should be > 0.n"); usage(); } #ifdef OPENMP omp_set_num_threads(g_num_threads); #else fprintf(stdout, "[delta_pp_2pt_v4] Warning, resetting global thread number to 1\n"); g_num_threads = 1; #endif /* initialize MPI parameters */ mpi_init(argc, argv); #ifdef OPENMP status = fftw_threads_init(); if(status != 0) { fprintf(stderr, "\n[] Error from fftw_init_threads; status was %d\n", status); exit(120); } #endif /****************************************************** * ******************************************************/ VOL3 = LX*LY*LZ; l_LX_at = LX; l_LXstart_at = 0; FFTW_LOC_VOLUME = T*LX*LY*LZ; fprintf(stdout, "# [%2d] parameters:\n"\ "# [%2d] l_LX_at = %3d\n"\ "# [%2d] l_LXstart_at = %3d\n"\ "# [%2d] FFTW_LOC_VOLUME = %3d\n", g_cart_id, g_cart_id, l_LX_at, g_cart_id, l_LXstart_at, g_cart_id, FFTW_LOC_VOLUME); if(init_geometry() != 0) { fprintf(stderr, "ERROR from init_geometry\n"); exit(1); } geometry(); if(N_Jacobi>0) { // alloc the gauge field alloc_gauge_field(&g_gauge_field, VOL3); switch(g_gauge_file_format) { case 0: sprintf(gauge_field_filename, "%s.%.4d", gaugefilename_prefix, Nconf); break; case 1: sprintf(gauge_field_filename, "%s.%.5d", gaugefilename_prefix, Nconf); break; } } else { g_gauge_field = NULL; } /********************************************************************* * gauge transformation *********************************************************************/ if(do_gt) { init_gauge_trafo(&gauge_trafo, 1.); } // determine the source location sx0 = g_source_location/(LX*LY*LZ)-Tstart; sx1 = (g_source_location%(LX*LY*LZ)) / (LY*LZ); sx2 = (g_source_location%(LY*LZ)) / LZ; sx3 = (g_source_location%LZ); // g_source_time_slice = sx0; fprintf(stdout, "# [] source location %d = (%d,%d,%d,%d)\n", g_source_location, sx0, sx1, sx2, sx3); // allocate memory for the spinor fields g_spinor_field = NULL; no_fields = n_s*n_c; // if(fermion_type == _TM_FERMION) { // no_fields *= 2; // } if(N_Jacobi>0) no_fields++; g_spinor_field = (double**)calloc(no_fields, sizeof(double*)); for(i=0; i<no_fields-1; i++) alloc_spinor_field(&g_spinor_field[i], VOL3); alloc_spinor_field(&g_spinor_field[no_fields-1], VOL3); work = g_spinor_field[no_fields-1]; spinor_field_checksum = (DML_Checksum*)malloc(no_fields * sizeof(DML_Checksum) ); if(spinor_field_checksum == NULL ) { fprintf(stderr, "[] Error, could not alloc checksums for spinor fields\n"); exit(73); } // allocate memory for the contractions items = 4* num_component*T; bytes = sizeof(double); connt = (double*)malloc(items*bytes); if(connt == NULL) { fprintf(stderr, "\n[] Error, could not alloc connt\n"); exit(2); } for(ix=0; ix<items; ix++) connt[ix] = 0.; items = num_component * (size_t)VOL3; connq = create_sp_field( items ); if(connq == NULL) { fprintf(stderr, "\n[] Error, could not alloc connq\n"); exit(2); } /****************************************************** * initialize FFTW ******************************************************/ items = 2 * num_component * g_sv_dim * g_sv_dim * VOL3; bytes = sizeof(double); in = (fftw_complex*)malloc(num_component*g_sv_dim*g_sv_dim*VOL3*sizeof(fftw_complex)); if(in == NULL) { fprintf(stderr, "[] Error, could not malloc in for FFTW\n"); exit(155); } dims[0]=LX; dims[1]=LY; dims[2]=LZ; //plan_p = fftwnd_create_plan(3, dims, FFTW_FORWARD, FFTW_MEASURE | FFTW_IN_PLACE); plan_p = fftwnd_create_plan_specific(3, dims, FFTW_FORWARD, FFTW_MEASURE, in, num_component*g_sv_dim*g_sv_dim, (fftw_complex*)( connq[0][0] ), num_component*g_sv_dim*g_sv_dim); uprop = (fermion_propagator_type*)malloc(g_num_threads * sizeof(fermion_propagator_type) ); fp1 = (fermion_propagator_type*)malloc(g_num_threads * sizeof(fermion_propagator_type) ); fp2 = (fermion_propagator_type*)malloc(g_num_threads * sizeof(fermion_propagator_type) ); fp3 = (fermion_propagator_type*)malloc(g_num_threads * sizeof(fermion_propagator_type) ); fpaux = (fermion_propagator_type*)malloc(g_num_threads * sizeof(fermion_propagator_type) ); if(uprop==NULL || fp1==NULL || fp2==NULL || fp3==NULL || fpaux==NULL ) { fprintf(stderr, "[] Error, could not alloc fermion propagator points\n"); exit(57); } sp1 = (spinor_propagator_type*)malloc(g_num_threads * sizeof(spinor_propagator_type) ); sp2 = (spinor_propagator_type*)malloc(g_num_threads * sizeof(spinor_propagator_type) ); if(sp1==NULL || sp2==NULL) { fprintf(stderr, "[] Error, could not alloc spinor propagator points\n"); exit(59); } for(i=0;i<g_num_threads;i++) { create_fp(uprop+i); } for(i=0;i<g_num_threads;i++) { create_fp(fp1+i); } for(i=0;i<g_num_threads;i++) { create_fp(fp2+i); } for(i=0;i<g_num_threads;i++) { create_fp(fp3+i); } for(i=0;i<g_num_threads;i++) { create_fp(fpaux+i); } for(i=0;i<g_num_threads;i++) { create_sp(sp1+i); } for(i=0;i<g_num_threads;i++) { create_sp(sp2+i); } /****************************************************** * loop on timeslices ******************************************************/ for(timeslice=0; timeslice<T; timeslice++) { append = (int)( timeslice != 0 ); // read timeslice of the gauge field if( N_Jacobi>0) { switch(g_gauge_file_format) { case 0: status = read_lime_gauge_field_doubleprec_timeslice(g_gauge_field, gauge_field_filename, timeslice, &ildg_gauge_field_checksum); break; case 1: status = read_nersc_gauge_field_timeslice(g_gauge_field, gauge_field_filename, timeslice, &nersc_gauge_field_checksum); break; } if(status != 0) { fprintf(stderr, "[] Error, could not read gauge field\n"); exit(21); } #ifdef OPENMP status = APE_Smearing_Step_Timeslice_threads(g_gauge_field, N_ape, alpha_ape); #else for(i=0; i<N_ape; i++) { status = APE_Smearing_Step_Timeslice(g_gauge_field, alpha_ape); } #endif } // read timeslice of the 12 up-type propagators and smear them for(is=0;is<n_s*n_c;is++) { if(do_gt == 0) { sprintf(filename, "%s.%.4d.t%.2dx%.2dy%.2dz%.2d.%.2d.inverted", filename_prefix, Nconf, sx0, sx1, sx2, sx3, is); status = read_lime_spinor_timeslice(g_spinor_field[is], timeslice, filename, 0, spinor_field_checksum+is); if(status != 0) { fprintf(stderr, "[] Error, could not read propagator from file %s\n", filename); exit(102); } if(N_Jacobi > 0) { fprintf(stdout, "# [] Jacobi smearing propagator no. %d with paramters N_Jacobi=%d, kappa_Jacobi=%f\n", is, N_Jacobi, kappa_Jacobi); #ifdef OPENMP Jacobi_Smearing_Step_one_Timeslice_threads(g_gauge_field, g_spinor_field[is], work, N_Jacobi, kappa_Jacobi); #else for(c=0; c<N_Jacobi; c++) { Jacobi_Smearing_Step_one_Timeslice(g_gauge_field, g_spinor_field[is], work, kappa_Jacobi); } #endif } } else { // of if do_gt == 0 // apply gt apply_gt_prop(gauge_trafo, g_spinor_field[is], is/n_c, is%n_c, 4, filename_prefix, g_source_location); } // of if do_gt == 0 } /****************************************************** * contractions ******************************************************/ #ifdef OPENMP omp_set_num_threads(g_num_threads); #pragma omp parallel private (ix,icomp,threadid) \ firstprivate (fermion_type,gamma_component,num_component,connq,\ gamma_component_sign,VOL3,g_spinor_field,fp1,fp2,fp3,fpaux,uprop,sp1,sp2) { threadid = omp_get_thread_num(); #else threadid = 0; #endif for(ix=threadid; ix<VOL3; ix+=g_num_threads) { // assign the propagators _assign_fp_point_from_field(uprop[threadid], g_spinor_field, ix); if(fermion_type == _TM_FERMION) { _fp_eq_rot_ti_fp(fp1[threadid], uprop[threadid], +1, fermion_type, fp2[threadid]); _fp_eq_fp_ti_rot(uprop[threadid], fp1[threadid], +1, fermion_type, fp2[threadid]); } for(icomp=0; icomp<num_component; icomp++) { _sp_eq_zero( connq[ix*num_component+icomp]); /****************************************************** * prepare propagators ******************************************************/ // fp1[threadid] = C Gamma_1 x S_u = g0 g2 Gamma_1 S_u _fp_eq_zero(fp1[threadid]); _fp_eq_zero(fpaux[threadid]); _fp_eq_gamma_ti_fp(fp1[threadid], gamma_component[0][icomp], uprop[threadid]); _fp_eq_gamma_ti_fp(fpaux[threadid], 2, fp1[threadid]); _fp_eq_gamma_ti_fp(fp1[threadid], 0, fpaux[threadid]); // fp2[threadid] = C Gamma_1 x S_u x C Gamma_2 _fp_eq_zero(fp2[threadid]); _fp_eq_zero(fpaux[threadid]); _fp_eq_fp_ti_gamma(fp2[threadid], 0, fp1[threadid]); _fp_eq_fp_ti_gamma(fpaux[threadid], 2, fp2[threadid]); _fp_eq_fp_ti_gamma(fp2[threadid], gamma_component[1][icomp], fpaux[threadid]); // fp3[threadid] = S_u x C Gamma_2 = S_u g0 g2 Gamma_2 _fp_eq_zero(fp3[threadid]); _fp_eq_zero(fpaux[threadid]); _fp_eq_fp_ti_gamma(fp3[threadid], 0, uprop[threadid]); _fp_eq_fp_ti_gamma(fpaux[threadid], 2, fp3[threadid]); _fp_eq_fp_ti_gamma(fp3[threadid], gamma_component[1][icomp], fpaux[threadid]); /****************************************************** * contractions ******************************************************/ // (1) // reduce _fp_eq_zero(fpaux[threadid]); _fp_eq_fp_eps_contract13_fp(fpaux[threadid], fp1[threadid], uprop[threadid]); // reduce to spin propagator _sp_eq_zero( sp1[threadid] ); _sp_eq_fp_del_contract23_fp(sp1[threadid], fp3[threadid], fpaux[threadid]); // (2) // reduce to spin propagator _sp_eq_zero( sp2[threadid] ); _sp_eq_fp_del_contract24_fp(sp2[threadid], fp3[threadid], fpaux[threadid]); // add and assign _sp_pl_eq_sp(sp1[threadid], sp2[threadid]); _sp_eq_sp_ti_re(sp2[threadid], sp1[threadid], -gamma_component_sign[icomp]); _sp_eq_sp( connq[ix*num_component+icomp], sp2[threadid]); // (3) // reduce _fp_eq_zero(fpaux[threadid]); _fp_eq_fp_eps_contract13_fp(fpaux[threadid], fp2[threadid], uprop[threadid]); // reduce to spin propagator _sp_eq_zero( sp1[threadid] ); _sp_eq_fp_del_contract23_fp(sp1[threadid], uprop[threadid], fpaux[threadid]); // (4) // reduce _fp_eq_zero(fpaux[threadid]); _fp_eq_fp_eps_contract13_fp(fpaux[threadid], fp1[threadid], fp3[threadid]); // reduce to spin propagator _sp_eq_zero( sp2[threadid] ); _sp_eq_fp_del_contract24_fp(sp2[threadid], uprop[threadid], fpaux[threadid]); // add and assign _sp_pl_eq_sp(sp1[threadid], sp2[threadid]); _sp_eq_sp_ti_re(sp2[threadid], sp1[threadid], -gamma_component_sign[icomp]); _sp_pl_eq_sp( connq[ix*num_component+icomp], sp2[threadid]); // (5) // reduce _fp_eq_zero(fpaux[threadid]); _fp_eq_fp_eps_contract13_fp(fpaux[threadid], fp2[threadid], uprop[threadid]); // reduce to spin propagator _sp_eq_zero( sp1[threadid] ); _sp_eq_fp_del_contract34_fp(sp1[threadid], uprop[threadid], fpaux[threadid]); // (6) // reduce _fp_eq_zero(fpaux[threadid]); _fp_eq_fp_eps_contract13_fp(fpaux[threadid], fp1[threadid], fp3[threadid]); // reduce to spin propagator _sp_eq_zero( sp2[threadid] ); _sp_eq_fp_del_contract34_fp(sp2[threadid], uprop[threadid], fpaux[threadid]); // add and assign _sp_pl_eq_sp(sp1[threadid], sp2[threadid]); _sp_eq_sp_ti_re(sp2[threadid], sp1[threadid], -gamma_component_sign[icomp]); _sp_pl_eq_sp( connq[ix*num_component+icomp], sp2[threadid]); } // of icomp } // of ix #ifdef OPENMP } #endif /*********************************************** * finish calculation of connq ***********************************************/ if(g_propagator_bc_type == 0) { // multiply with phase factor fprintf(stdout, "# [] multiplying timeslice %d with boundary phase factor\n", timeslice); ir = (timeslice - sx0 + T_global) % T_global; w1.re = cos( 3. * M_PI*(double)ir / (double)T_global ); w1.im = sin( 3. * M_PI*(double)ir / (double)T_global ); for(ix=0;ix<num_component*VOL3;ix++) { _sp_eq_sp(sp1[0], connq[ix] ); _sp_eq_sp_ti_co( connq[ix], sp1[0], w1); } } else if (g_propagator_bc_type == 1) { // multiply with step function if(timeslice < sx0) { fprintf(stdout, "# [] multiplying timeslice %d with boundary step function\n", timeslice); for(ix=0;ix<num_component*VOL3;ix++) { _sp_eq_sp(sp1[0], connq[ix] ); _sp_eq_sp_ti_re( connq[ix], sp1[0], -1.); } } } if(write_ascii) { sprintf(filename, "%s_x.%.4d.t%.2dx%.2dy%.2dz%.2d.ascii", outfile_prefix, Nconf, sx0, sx1, sx2, sx3); write_contraction2( connq[0][0], filename, num_component*g_sv_dim*g_sv_dim, VOL3, 1, append); } /****************************************************************** * Fourier transform ******************************************************************/ items = 2 * num_component * g_sv_dim * g_sv_dim * VOL3; bytes = sizeof(double); memcpy(in, connq[0][0], items * bytes); ir = num_component * g_sv_dim * g_sv_dim; #ifdef OPENMP fftwnd_threads(g_num_threads, plan_p, ir, in, ir, 1, (fftw_complex*)(connq[0][0]), ir, 1); #else fftwnd(plan_p, ir, in, ir, 1, (fftw_complex*)(connq[0][0]), ir, 1); #endif // add phase factor from the source location iix = 0; for(x1=0;x1<LX;x1++) { q[0] = (double)x1 / (double)LX; for(x2=0;x2<LY;x2++) { q[1] = (double)x2 / (double)LY; for(x3=0;x3<LZ;x3++) { q[2] = (double)x3 / (double)LZ; phase = 2. * M_PI * ( q[0]*sx1 + q[1]*sx2 + q[2]*sx3 ); w1.re = cos(phase); w1.im = sin(phase); for(icomp=0; icomp<num_component; icomp++) { _sp_eq_sp(sp1[0], connq[iix] ); _sp_eq_sp_ti_co( connq[iix], sp1[0], w1) ; iix++; } }}} // of x3, x2, x1 // write to file sprintf(filename, "%s_q.%.4d.t%.2dx%.2dy%.2dz%.2d", outfile_prefix, Nconf, sx0, sx1, sx2, sx3); sprintf(contype, "2-pt. function, (t,q_1,q_2,q_3)-dependent, source_timeslice = %d", sx0); write_lime_contraction_timeslice(connq[0][0], filename, 64, num_component*g_sv_dim*g_sv_dim, contype, Nconf, 0, &connq_checksum, timeslice); if(write_ascii) { strcat(filename, ".ascii"); write_contraction2(connq[0][0],filename, num_component*g_sv_dim*g_sv_dim, VOL3, 1, append); } /*********************************************** * calculate connt ***********************************************/ for(icomp=0;icomp<num_component; icomp++) { // fwd _sp_eq_sp(sp1[0], connq[icomp]); _sp_eq_gamma_ti_sp(sp2[0], 0, sp1[0]); _sp_pl_eq_sp(sp1[0], sp2[0]); _co_eq_tr_sp(&w, sp1[0]); connt[2*(icomp*T + timeslice) ] = w.re * 0.25; connt[2*(icomp*T + timeslice)+1] = w.im * 0.25; // bwd _sp_eq_sp(sp1[0], connq[icomp]); _sp_eq_gamma_ti_sp(sp2[0], 0, sp1[0]); _sp_mi_eq_sp(sp1[0], sp2[0]); _co_eq_tr_sp(&w, sp1[0]); connt[2*(icomp*T+timeslice + num_component*T) ] = w.re * 0.25; connt[2*(icomp*T+timeslice + num_component*T)+1] = w.im * 0.25; } } // of loop on timeslice // write connt sprintf(filename, "%s.%.4d.t%.2dx%.2dy%.2dz%.2d.fw", outfile_prefix, Nconf, sx0, sx1, sx2, sx3); ofs = fopen(filename, "w"); if(ofs == NULL) { fprintf(stderr, "[] Error, could not open file %s for writing\n", filename); exit(3); } fprintf(ofs, "#%12.8f%3d%3d%3d%3d%8.4f%6d\n", g_kappa, T_global, LX, LY, LZ, g_mu, Nconf); for(icomp=0; icomp<num_component; icomp++) { ir = sx0; fprintf(ofs, "%3d%3d%3d%16.7e%16.7e%6d\n", gamma_component[0][icomp], gamma_component[1][icomp], 0, connt[2*(icomp*T+ir)], 0., Nconf); for(it=1;it<T/2;it++) { ir = ( it + sx0 ) % T_global; ir2 = ( (T_global - it) + sx0 ) % T_global; fprintf(ofs, "%3d%3d%3d%16.7e%16.7e%6d\n", gamma_component[0][icomp], gamma_component[1][icomp], it, connt[2*(icomp*T+ir)], connt[2*(icomp*T+ir2)], Nconf); } ir = ( it + sx0 ) % T_global; fprintf(ofs, "%3d%3d%3d%16.7e%16.7e%6d\n", gamma_component[0][icomp], gamma_component[1][icomp], it, connt[2*(icomp*T+ir)], 0., Nconf); } fclose(ofs); sprintf(filename, "%s.%.4d.t%.2dx%.2dy%.2dz%.2d.bw", outfile_prefix, Nconf, sx0, sx1, sx2, sx3); ofs = fopen(filename, "w"); if(ofs == NULL) { fprintf(stderr, "[] Error, could not open file %s for writing\n", filename); exit(3); } fprintf(ofs, "#%12.8f%3d%3d%3d%3d%8.4f%6d\n", g_kappa, T_global, LX, LY, LZ, g_mu, Nconf); for(icomp=0; icomp<num_component; icomp++) { ir = sx0; fprintf(ofs, "%3d%3d%3d%16.7e%16.7e%6d\n", gamma_component[0][icomp], gamma_component[1][icomp], 0, connt[2*(num_component*T+icomp*T+ir)], 0., Nconf); for(it=1;it<T/2;it++) { ir = ( it + sx0 ) % T_global; ir2 = ( (T_global - it) + sx0 ) % T_global; fprintf(ofs, "%3d%3d%3d%16.7e%16.7e%6d\n", gamma_component[0][icomp], gamma_component[1][icomp], it, connt[2*(num_component*T+icomp*T+ir)], connt[2*(num_component*T+icomp*T+ir2)], Nconf); } ir = ( it + sx0 ) % T_global; fprintf(ofs, "%3d%3d%3d%16.7e%16.7e%6d\n", gamma_component[0][icomp], gamma_component[1][icomp], it, connt[2*(num_component*T+icomp*T+ir)], 0., Nconf); } fclose(ofs); /*********************************************** * free the allocated memory, finalize ***********************************************/ free_geometry(); if(connt!= NULL) free(connt); if(connq!= NULL) free(connq); if(gauge_trafo != NULL) free(gauge_trafo); if(g_spinor_field!=NULL) { for(i=0; i<no_fields; i++) free(g_spinor_field[i]); free(g_spinor_field); g_spinor_field=(double**)NULL; } if(spinor_field_checksum !=NULL) free(spinor_field_checksum); if(g_gauge_field != NULL) free(g_gauge_field); for(i=0;i<g_num_threads;i++) { free_fp(uprop+i); } for(i=0;i<g_num_threads;i++) { free_fp(fp1+i); } for(i=0;i<g_num_threads;i++) { free_fp(fp2+i); } for(i=0;i<g_num_threads;i++) { free_fp(fp3+i); } for(i=0;i<g_num_threads;i++) { free_fp(fpaux+i); } for(i=0;i<g_num_threads;i++) { free_sp(sp1+i); } for(i=0;i<g_num_threads;i++) { free_sp(sp2+i); } if(uprop!=NULL) free(uprop); if(fp1!=NULL) free(fp1); if(fp2!=NULL) free(fp2); if(fp3!=NULL) free(fp3); if(fpaux!=NULL) free(fpaux); if(sp1!=NULL) free(sp1); if(sp2!=NULL) free(sp2); free(in); fftwnd_destroy_plan(plan_p); g_the_time = time(NULL); fprintf(stdout, "# [] %s# [] end fo run\n", ctime(&g_the_time)); fflush(stdout); fprintf(stderr, "# [] %s# [] end fo run\n", ctime(&g_the_time)); fflush(stderr); #ifdef MPI MPI_Finalize(); #endif return(0); }
//------------------------------------------------------------------- int fdct_wrapping(int N1, int N2, int nbscales, int nbangles_coarse, int allcurvelets, CpxNumMat& x, vector< vector<CpxNumMat> >& c) { //--------------------------------------------- assert(N1==x.m() && N2==x.n()); int F1 = N1/2; int F2 = N2/2; // ifft original data CpxNumMat T(x); fftwnd_plan p = fftw2d_create_plan(N2, N1, FFTW_FORWARD, FFTW_ESTIMATE | FFTW_IN_PLACE); fftwnd_one(p, (fftw_complex*)T.data(), NULL); fftwnd_destroy_plan(p); double sqrtprod = sqrt(double(N1*N2)); for(int j=0; j<N2; j++) for(int i=0; i<N1; i++) T(i,j) /= sqrtprod; CpxOffMat O(N1, N2); fdct_wrapping_fftshift(T, O); //----------------------------------------------------------------------------- vector<CpxOffMat> Xhghs; Xhghs.resize(nbscales); CpxOffMat X; //unfold or not if(allcurvelets==1) { //-------------------------- double XL1 = 4.0*N1/3.0; double XL2 = 4.0*N2/3.0; //range int XS1, XS2; int XF1, XF2; double XR1, XR2; fdct_wrapping_rangecompute(XL1, XL2, XS1, XS2, XF1, XF2, XR1, XR2); IntOffVec t1(XS1); for(int i=-XF1; i<-XF1+XS1; i++) if( i<-N1/2) t1(i) = i+int(N1); else if(i>(N1-1)/2) t1(i) = i-int(N1); else t1(i) = i; IntOffVec t2(XS2); for(int i=-XF2; i<-XF2+XS2; i++) if( i<-N2/2) t2(i) = i+int(N2); else if(i>(N2-1)/2) t2(i) = i-int(N2); else t2(i) = i; X.resize(XS1, XS2); for(int j=-XF2; j<-XF2+XS2; j++) for(int i=-XF1; i<-XF1+XS1; i++) X(i,j) = O(t1(i), t2(j)); DblOffMat lowpass(XS1,XS2); fdct_wrapping_lowpasscompute(XL1, XL2, lowpass); //compute the low pass filter for(int j=-XF2; j<-XF2+XS2; j++) for(int i=-XF1; i<-XF1+XS1; i++) X(i,j) *= lowpass(i,j); } else { //-------------------------- X = O; } //separate double XL1 = 4.0*N1/3.0; double XL2 = 4.0*N2/3.0; //range for(int sc=nbscales-1; sc>0; sc--) { double XL1n = XL1/2; double XL2n = XL2/2; int XS1n, XS2n; int XF1n, XF2n; double XR1n, XR2n; fdct_wrapping_rangecompute(XL1n, XL2n, XS1n, XS2n, XF1n, XF2n, XR1n, XR2n); //computer filter DblOffMat lowpass(XS1n, XS2n); fdct_wrapping_lowpasscompute(XL1n, XL2n, lowpass); DblOffMat hghpass(XS1n, XS2n); for(int j=-XF2n; j<-XF2n+XS2n; j++) for(int i=-XF1n; i<-XF1n+XS1n; i++) hghpass(i,j) = sqrt(1-lowpass(i,j)*lowpass(i,j)); //separate CpxOffMat Xhgh(X); for(int j=-XF2n; j<-XF2n+XS2n; j++) for(int i=-XF1n; i<-XF1n+XS1n; i++) Xhgh(i,j) *= hghpass(i,j); CpxOffMat Xlow(XS1n, XS2n); for(int j=-XF2n; j<-XF2n+XS2n; j++) for(int i=-XF1n; i<-XF1n+XS1n; i++) Xlow(i,j) = X(i,j) * lowpass(i,j); //store and prepare for next level Xhghs[sc] = Xhgh; X = Xlow; XL1 = XL1/2; XL2 = XL2/2; } Xhghs[0] = X; //----------------------------------------------------------------------------- vector<int> nbangles(nbscales); if(allcurvelets==1) { //nbangles nbangles[0] = 1; for(int sc=1; sc<nbscales; sc++) nbangles[sc] = nbangles_coarse * pow2( int(ceil(double(sc-1)/2)) ); //c c.resize(nbscales); for(int sc=0; sc<nbscales; sc++) c[sc].resize( nbangles[sc] ); double XL1 = 4.0*N1/3.0; double XL2 = 4.0*N2/3.0; //range for(int sc=nbscales-1; sc>0; sc--) { fdct_wrapping_sepangle(XL1, XL2, nbangles[sc], Xhghs[sc], c[sc]); XL1 /= 2; XL2 /= 2; } fdct_wrapping_wavelet(Xhghs[0], c[0]); } else { //nbangles nbangles[0] = 1; for(int sc=1; sc<nbscales-1; sc++) nbangles[sc] = nbangles_coarse * pow2( int(ceil(double(sc-1)/2)) ); nbangles[nbscales-1] = 1; //c c.resize(nbscales); for(int sc=0; sc<nbscales; sc++) c[sc].resize( nbangles[sc] ); fdct_wrapping_wavelet(Xhghs[nbscales-1], c[nbscales-1]); double XL1 = 2.0*N1/3.0; double XL2 = 2.0*N2/3.0; //range for(int sc=nbscales-2; sc>0; sc--) { fdct_wrapping_sepangle(XL1, XL2, nbangles[sc], Xhghs[sc], c[sc]); XL1 /= 2; XL2 /= 2; } fdct_wrapping_wavelet(Xhghs[0], c[0]); } return 0; }