void F77_FUNC_(fftw3d_f77_create_plan,FFTW3D_F77_CREATE_PLAN) (fftwnd_plan *p, int *nx, int *ny, int *nz, int *idir, int *flags) { fftw_direction dir = *idir < 0 ? FFTW_FORWARD : FFTW_BACKWARD; *p = fftw3d_create_plan(*nz,*ny,*nx,dir,*flags); }
//------------------------------------------------------------------------------------ int fdct3d_inverse_center(int N1,int N2,int N3,int b, double L1,double L2,double L3, int s, CpxCrvletPrtd& C, CpxNumTnsBlkd& W) { int mpirank; MPI_Comm_rank(MPI_COMM_WORLD, &mpirank); vector< vector<int> >& Cowners = C.owners(); if(Cowners[0][0]==mpirank) { int S1, S2, S3; int F1, F2, F3; double R1, R2, R3; fdct3d_rangecompute(L1, L2, L3, S1, S2, S3, F1, F2, F3, R1, R2, R3); DblOffVec big1(S1); fdct3d_lowpass(L1, big1); DblOffVec big2(S2); fdct3d_lowpass(L2, big2); DblOffVec big3(S3); fdct3d_lowpass(L3, big3); CpxNumTns T(S1,S2,S3); CpxNumTns& Cblk = C.block(0,0); //center block T = Cblk; fftwnd_plan p = fftw3d_create_plan(S3,S2,S1, FFTW_FORWARD, FFTW_ESTIMATE | FFTW_IN_PLACE); fftwnd_one(p, (fftw_complex*)T.data(), NULL); fftwnd_destroy_plan(p); double sqrtprod = sqrt(double(S1*S2*S3)); for(int i=0; i<S1; i++) for(int j=0; j<S2; j++) for(int k=0; k<S3; k++) T(i,j,k) /= sqrtprod; CpxOffTns A(S1,S2,S3); fdct3d_fftshift(S1,S2,S3,T,A); for(int i=-S1/2; i<-S1/2+S1; i++) for(int j=-S2/2; j<-S2/2+S2; j++) for(int k=-S3/2; k<-S3/2+S3; k++) { int bi,bj,bk; int oi,oj,ok; fdct3d_position_aux(N1,N2,N3,b, i,j,k, bi,bj,bk,oi,oj,ok); CpxNumTns& Wblk = W.block(bi,bj,bk); Wblk(oi,oj,ok) += A(i,j,k) * (big1(i)*big2(j)*big3(k)); } //done } return 0; }
int F77_FUNC_ (create_plan_3d, CREATE_PLAN_3D) (fftwnd_plan *p, int *n, int *m, int *l, int *idir) { fftw_direction dir = ( (*idir < 0) ? FFTW_FORWARD : FFTW_BACKWARD ); *p = fftw3d_create_plan(*l, *m, *n, dir, FFTW_ESTIMATE | FFTW_IN_PLACE); if( *p == NULL ) { fprintf(stderr," *** CREATE_PLAN_3D: warning empty plan ***\n"); fprintf(stderr," *** input was (n,m,l,dir): %d %d %d %d ***\n", *l, *m, *n, *idir); } /* printf(" pointer size = %d, value = %d\n", sizeof ( *p ), *p ); */ return 0; }
int gmx_fft_init_3d(gmx_fft_t * pfft, int nx, int ny, int nz, enum gmx_fft_flag flags) { int i,j; gmx_fft_t fft; int fftw_flags; /* FFTW2 is slow to measure, so we do not use it */ /* If you change this, add an #ifndef for GMX_DISABLE_FFTW_MEASURE around it! */ fftw_flags = FFTW_ESTIMATE; if(pfft==NULL) { gmx_fatal(FARGS,"Invalid opaque FFT datatype pointer."); return EINVAL; } *pfft = NULL; if( (fft = (gmx_fft_t)malloc(sizeof(struct gmx_fft))) == NULL) { return ENOMEM; } fft->single[0][0] = NULL; fft->single[0][1] = NULL; fft->single[1][0] = NULL; fft->single[1][1] = NULL; fft->multi[0][0] = fftw3d_create_plan(nx,ny,nz,FFTW_BACKWARD,FFTW_OUT_OF_PLACE|fftw_flags); fft->multi[0][1] = fftw3d_create_plan(nx,ny,nz,FFTW_FORWARD,FFTW_OUT_OF_PLACE|fftw_flags); fft->multi[1][0] = fftw3d_create_plan(nx,ny,nz,FFTW_BACKWARD,FFTW_IN_PLACE|fftw_flags); fft->multi[1][1] = fftw3d_create_plan(nx,ny,nz,FFTW_FORWARD,FFTW_IN_PLACE|fftw_flags); for(i=0;i<2;i++) { for(j=0;j<2;j++) { if(fft->multi[i][j] == NULL) { gmx_fatal(FARGS,"Error initializing FFTW2 plan."); gmx_fft_destroy(fft); return -1; } } } /* No workspace needed for complex-to-complex FFTs */ fft->work = NULL; fft->nx = nx; fft->ny = ny; fft->nz = nz; fft->ndim = 3; *pfft = fft; return 0; }
void testnd_in_place(int rank, int *n, fftw_direction dir, fftwnd_plan validated_plan, int alternate_api, int specific, int force_buffered) { int istride; int N, dim, i; fftw_complex *in1, *in2, *out2; fftwnd_plan p; int flags = measure_flag | wisdom_flag | FFTW_IN_PLACE; if (coinflip()) flags |= FFTW_THREADSAFE; if (force_buffered) flags |= FFTWND_FORCE_BUFFERED; N = 1; for (dim = 0; dim < rank; ++dim) N *= n[dim]; in1 = (fftw_complex *) fftw_malloc(N * MAX_STRIDE * sizeof(fftw_complex)); in2 = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex)); out2 = (fftw_complex *) fftw_malloc(N * sizeof(fftw_complex)); if (!specific) { if (alternate_api && (rank == 2 || rank == 3)) { if (rank == 2) p = fftw2d_create_plan(n[0], n[1], dir, flags); else p = fftw3d_create_plan(n[0], n[1], n[2], dir, flags); } else /* standard api */ p = fftwnd_create_plan(rank, n, dir, flags); } else { /* specific plan creation */ if (alternate_api && (rank == 2 || rank == 3)) { if (rank == 2) p = fftw2d_create_plan_specific(n[0], n[1], dir, flags, in1, 1, (fftw_complex *) NULL, 1); else p = fftw3d_create_plan_specific(n[0], n[1], n[2], dir, flags, in1, 1, (fftw_complex *) NULL, 1); } else /* standard api */ p = fftwnd_create_plan_specific(rank, n, dir, flags, in1, 1, (fftw_complex *) NULL, 1); } for (istride = 1; istride <= MAX_STRIDE; ++istride) { /* * generate random inputs */ for (i = 0; i < N; ++i) { int j; c_re(in2[i]) = DRAND(); c_im(in2[i]) = DRAND(); for (j = 0; j < istride; ++j) { c_re(in1[i * istride + j]) = c_re(in2[i]); c_im(in1[i * istride + j]) = c_im(in2[i]); } } if (istride != 1 || istride != 1 || coinflip()) fftwnd(p, istride, in1, istride, 1, (fftw_complex *) NULL, 1, 1); else fftwnd_one(p, in1, NULL); fftwnd(validated_plan, 1, in2, 1, 1, out2, 1, 1); for (i = 0; i < istride; ++i) CHECK(compute_error_complex(in1 + i, istride, out2, 1, N) < TOLERANCE, "testnd_in_place: wrong answer"); } fftwnd_destroy_plan(p); fftw_free(out2); fftw_free(in2); fftw_free(in1); }
//------------------------------------------------------------------------------------ int fdct3d_inverse_angles(int N1,int N2,int N3,int b, double L1,double L2,double L3, int s,int nd, CpxCrvletPrtd& C, CpxNumTnsBlkd& W) { int mpirank; MPI_Comm_rank(MPI_COMM_WORLD, &mpirank); int mpisize; MPI_Comm_size(MPI_COMM_WORLD, &mpisize); vector< vector<int> >& Cowners = C.owners(); vector<int>& crvowners = Cowners[s]; //LEXING: the owner information for wedges in scale s int nf = 6; int wcnt = 0; int S1, S2, S3; int F1, F2, F3; double R1, R2, R3; fdct3d_rangecompute(L1, L2, L3, S1, S2, S3, F1, F2, F3, R1, R2, R3); DblOffVec big1(S1); fdct3d_lowpass(L1, big1); DblOffVec big2(S2); fdct3d_lowpass(L2, big2); DblOffVec big3(S3); fdct3d_lowpass(L3, big3); double Lh1 = L1/2; double Lh2 = L2/2; double Lh3 = L3/2; int Sh1, Sh2, Sh3; int Fh1, Fh2, Fh3; double Rh1, Rh2, Rh3; fdct3d_rangecompute(Lh1, Lh2, Lh3, Sh1, Sh2, Sh3, Fh1, Fh2, Fh3, Rh1, Rh2, Rh3); DblOffVec sma1(S1); fdct3d_lowpass(Lh1, sma1); DblOffVec sma2(S2); fdct3d_lowpass(Lh2, sma2); DblOffVec sma3(S3); fdct3d_lowpass(Lh3, sma3); double W1 = L1/nd; double W2 = L2/nd; double W3 = L3/nd; typedef pair<int,int> intpair; typedef pair<int, intpair> inttriple; map<inttriple, fftwnd_plan> planmap; //face 0: x,y,z for(int h=0; h<nd; h++) { //(y first z second) for(int g=0; g<nd; g++) { if(crvowners[wcnt]==mpirank) { double xs = R1/4-(W1/2)/4; double xe = R1; double ys = -R2 + (2*g-1)*W2/2; double ye = -R2 + (2*g+3)*W2/2; double zs = -R3 + (2*h-1)*W3/2; double ze = -R3 + (2*h+3)*W3/2; int xn = int(ceil(xe-xs)); int yn = int(ceil(ye-ys)); int zn = int(ceil(ze-zs)); double thts, thtm, thte; //y to x if(g==0) { thts = atan2(-1.0, 1.0-1.0/nd); thtm = atan2(-1.0+1.0/nd, 1.0); thte = atan2(-1.0+3.0/nd, 1.0); } else if(g==nd-1) { thts = atan2(-1.0+(2.0*g-1.0)/nd, 1.0); thtm = atan2(-1.0+(2.0*g+1.0)/nd, 1.0); thte = atan2(1.0, 1.0-1.0/nd); } else { thts = atan2(-1.0+(2.0*g-1.0)/nd, 1.0); thtm = atan2(-1.0+(2.0*g+1.0)/nd, 1.0); thte = atan2(-1.0+(2.0*g+3.0)/nd, 1.0); } double phis, phim, phie; //z to x if(h==0) { phis = atan2(-1.0, 1.0-1.0/nd); phim = atan2(-1.0+1.0/nd, 1.0); phie = atan2(-1.0+3.0/nd, 1.0); } else if(h==nd-1) { phis = atan2(-1.0+(2.0*h-1.0)/nd, 1.0); phim = atan2(-1.0+(2.0*h+1.0)/nd, 1.0); phie = atan2(1.0, 1.0-1.0/nd); } else { phis = atan2(-1.0+(2.0*h-1.0)/nd, 1.0); phim = atan2(-1.0+(2.0*h+1.0)/nd, 1.0); phie = atan2(-1.0+(2.0*h+3.0)/nd, 1.0); } int xh = xn/2; int yh = yn/2; int zh = zn/2; //half double R21 = R2/R1; double R31 = R3/R1; CpxNumTns tpdata(xn,yn,zn); CpxNumTns& Cblk = C.block(s,wcnt); tpdata = Cblk; //fft fftwnd_plan p = NULL; map<inttriple, fftwnd_plan>::iterator mit = planmap.find( inttriple(xn, intpair(yn,zn)) ); if(mit!=planmap.end()) { p = (*mit).second; } else { p = fftw3d_create_plan(zn, yn, xn, FFTW_FORWARD, FFTW_ESTIMATE | FFTW_IN_PLACE); planmap[ inttriple(xn, intpair(yn,zn)) ] = p; } fftwnd_one(p, (fftw_complex*)tpdata.data(), NULL); //cerr<<"wedge s"<<endl; double sqrtprod = sqrt(double(xn*yn*zn)); for(int i=0; i<xn; i++) for(int j=0; j<yn; j++) for(int k=0; k<zn; k++) tpdata(i,j,k) /= sqrtprod; CpxOffTns wpdata(xn,yn,zn); fdct3d_fftshift(xn,yn,zn,tpdata,wpdata); for(int xcur=(int)ceil(xs); xcur<xe; xcur++) { int yfm = (int)ceil( max(-R2, R21*xcur*tan(thts)) ); int yto = (int)floor( min(R2, R21*xcur*tan(thte)) ); int zfm = (int)ceil( max(-R3, R31*xcur*tan(phis)) ); int zto = (int)floor( min(R3, R31*xcur*tan(phie)) ); for(int ycur=yfm; ycur<=yto; ycur++) for(int zcur=zfm; zcur<=zto; zcur++) { int tmpx = xcur%xn; if(tmpx<-xh) tmpx+=xn; if(tmpx>=-xh+xn) tmpx-=xn; int tmpy = ycur%yn; if(tmpy<-yh) tmpy+=yn; if(tmpy>=-yh+yn) tmpy-=yn; int tmpz = zcur%zn; if(tmpz<-zh) tmpz+=zn; if(tmpz>=-zh+zn) tmpz-=zn; double thtcur = atan2(ycur/R2, xcur/R1); double phicur = atan2(zcur/R3, xcur/R1); double glbpou; fdct3d_globalpou(thtcur, phicur, M_PI/4-atan2(1.0-1.0/nd, 1.0), glbpou); double wtht; if(thtcur<thtm) { if(g==0) wtht = 1; else { double l,r; fdct3d_window( (thtcur-thts)/(thtm-thts), l, r); wtht = l; } } else { if(g==nd-1) wtht = 1; else { double l,r; fdct3d_window( (thtcur-thtm)/(thte-thtm), l, r); wtht = r; } } double wphi; if(phicur<phim) { if(h==0) wphi = 1; else { double l,r; fdct3d_window( (phicur-phis)/(phim-phis), l, r); wphi = l; } } else { if(h==nd-1) wphi = 1; else { double l,r; fdct3d_window( (phicur-phim)/(phie-phim), l, r); wphi = r; } } double pou = glbpou * wtht * wphi; wpdata(tmpx, tmpy, tmpz) *= pou; double ss = sma1(xcur)*sma2(ycur)*sma3(zcur); double bb = big1(xcur)*big2(ycur)*big3(zcur); int bi,bj,bk; int oi,oj,ok; fdct3d_position_aux(N1,N2,N3,b, xcur,ycur,zcur, bi,bj,bk,oi,oj,ok); CpxNumTns& Wblk = W.block(bi,bj,bk); Wblk(oi,oj,ok) += wpdata(tmpx,tmpy,tmpz) * bb * sqrt(1.0-ss*ss); } } //xcur } //if wcnt++; } } //end of face //face 1. y z x for(int f=0; f<nd; f++) { for(int h=0; h<nd; h++) { if(crvowners[wcnt]==mpirank) { double ys = R2/4-(W2/2)/4; double ye = R2; double zs = -R3 + (2*h-1)*W3/2; double ze = -R3 + (2*h+3)*W3/2; double xs = -R1 + (2*f-1)*W1/2; double xe = -R1 + (2*f+3)*W1/2; int xn = int(ceil(xe-xs)); int yn = int(ceil(ye-ys)); int zn = int(ceil(ze-zs)); double thts, thtm, thte; //z to y if(h==0) { thts = atan2(-1.0, 1.0-1.0/nd); thtm = atan2(-1.0+1.0/nd, 1.0); thte = atan2(-1.0+3.0/nd, 1.0); } else if(h==nd-1) { thts = atan2(-1.0+(2.0*h-1.0)/nd, 1.0); thtm = atan2(-1.0+(2.0*h+1.0)/nd, 1.0); thte = atan2(1.0, 1.0-1.0/nd); } else { thts = atan2(-1.0+(2.0*h-1.0)/nd, 1.0); thtm = atan2(-1.0+(2.0*h+1.0)/nd, 1.0); thte = atan2(-1.0+(2.0*h+3.0)/nd, 1.0); } double phis, phim, phie; //z to x if(f==0) { phis = atan2(-1.0, 1.0-1.0/nd); phim = atan2(-1.0+1.0/nd, 1.0); phie = atan2(-1.0+3.0/nd, 1.0); } else if(f==nd-1) { phis = atan2(-1.0+(2.0*f-1.0)/nd, 1.0); phim = atan2(-1.0+(2.0*f+1.0)/nd, 1.0); phie = atan2(1.0, 1.0-1.0/nd); } else { phis = atan2(-1.0+(2.0*f-1.0)/nd, 1.0); phim = atan2(-1.0+(2.0*f+1.0)/nd, 1.0); phie = atan2(-1.0+(2.0*f+3.0)/nd, 1.0); } int xh = xn/2; int yh = yn/2; int zh = zn/2; double R32 = R3/R2; double R12 = R1/R2; CpxNumTns tpdata(xn,yn,zn); CpxNumTns& Cblk = C.block(s,wcnt); tpdata = Cblk; //fft fftwnd_plan p = NULL; map<inttriple, fftwnd_plan>::iterator mit = planmap.find( inttriple(xn, intpair(yn,zn)) ); if(mit!=planmap.end()) { p = (*mit).second; } else { p = fftw3d_create_plan(zn, yn, xn, FFTW_FORWARD, FFTW_ESTIMATE | FFTW_IN_PLACE); planmap[ inttriple(xn, intpair(yn,zn)) ] = p; } fftwnd_one(p, (fftw_complex*)tpdata.data(), NULL); //cerr<<"wedge s"<<endl; double sqrtprod = sqrt(double(xn*yn*zn)); for(int i=0; i<xn; i++) for(int j=0; j<yn; j++) for(int k=0; k<zn; k++) tpdata(i,j,k) /= sqrtprod; CpxOffTns wpdata(xn,yn,zn); fdct3d_fftshift(xn,yn,zn,tpdata,wpdata); for(int ycur=(int)ceil(ys); ycur<ye; ycur++) { int zfm = (int)ceil( max(-R3, R32*ycur*tan(thts)) ); int zto = (int)floor( min(R3, R32*ycur*tan(thte)) ); int xfm = (int)ceil( max(-R1, R12*ycur*tan(phis)) ); int xto = (int)floor( min(R1, R12*ycur*tan(phie)) ); for(int zcur=zfm; zcur<=zto; zcur++) for(int xcur=xfm; xcur<=xto; xcur++) { int tmpx = xcur%xn; if(tmpx<-xh) tmpx+=xn; if(tmpx>=-xh+xn) tmpx-=xn; int tmpy = ycur%yn; if(tmpy<-yh) tmpy+=yn; if(tmpy>=-yh+yn) tmpy-=yn; int tmpz = zcur%zn; if(tmpz<-zh) tmpz+=zn; if(tmpz>=-zh+zn) tmpz-=zn; double thtcur = atan2(zcur/R3, ycur/R2); double phicur = atan2(xcur/R1, ycur/R2); double glbpou; fdct3d_globalpou(thtcur, phicur, M_PI/4-atan2(1.0-1.0/nd, 1.0), glbpou); //CHECK double wtht; if(thtcur<thtm) { if(h==0) wtht = 1; else { double l,r; fdct3d_window( (thtcur-thts)/(thtm-thts), l, r); wtht = l; } } else { if(h==nd-1) wtht = 1; else { double l,r; fdct3d_window( (thtcur-thtm)/(thte-thtm), l, r); wtht = r; } } double wphi; if(phicur<phim) { if(f==0) wphi = 1; else { double l,r; fdct3d_window( (phicur-phis)/(phim-phis), l, r); wphi = l; } } else { if(f==nd-1) wphi = 1; else { double l,r; fdct3d_window( (phicur-phim)/(phie-phim), l, r); wphi = r; } } double pou = glbpou * wtht * wphi; wpdata(tmpx, tmpy, tmpz) *= pou; double ss = sma1(xcur)*sma2(ycur)*sma3(zcur); double bb = big1(xcur)*big2(ycur)*big3(zcur); int bi,bj,bk; int oi,oj,ok; fdct3d_position_aux(N1,N2,N3,b, xcur,ycur,zcur, bi,bj,bk,oi,oj,ok); CpxNumTns& Wblk = W.block(bi,bj,bk); Wblk(oi,oj,ok) += wpdata(tmpx,tmpy,tmpz) * bb * sqrt(1.0-ss*ss); } } //ycur }//if wcnt++; } }//end of face //face 2. z x y for(int g=0; g<nd; g++) { for(int f=0; f<nd; f++) { if(crvowners[wcnt]==mpirank) { double zs = R3/4-(W3/2)/4; double ze = R3; double xs = -R1 + (2*f-1)*W1/2; double xe = -R1 + (2*f+3)*W1/2; double ys = -R2 + (2*g-1)*W2/2; double ye = -R2 + (2*g+3)*W2/2; int xn = int(ceil(xe-xs)); int yn = int(ceil(ye-ys)); int zn = int(ceil(ze-zs)); double thts, thtm, thte; //y to x if(f==0) { thts = atan2(-1.0, 1.0-1.0/nd); thtm = atan2(-1.0+1.0/nd, 1.0); thte = atan2(-1.0+3.0/nd, 1.0); } else if(f==nd-1) { thts = atan2(-1.0+(2.0*f-1.0)/nd, 1.0); thtm = atan2(-1.0+(2.0*f+1.0)/nd, 1.0); thte = atan2(1.0, 1.0-1.0/nd); } else { thts = atan2(-1.0+(2.0*f-1.0)/nd, 1.0); thtm = atan2(-1.0+(2.0*f+1.0)/nd, 1.0); thte = atan2(-1.0+(2.0*f+3.0)/nd, 1.0); } double phis, phim, phie; //z to x if(g==0) { phis = atan2(-1.0, 1.0-1.0/nd); phim = atan2(-1.0+1.0/nd, 1.0); phie = atan2(-1.0+3.0/nd, 1.0); } else if(g==nd-1) { phis = atan2(-1.0+(2.0*g-1.0)/nd, 1.0); phim = atan2(-1.0+(2.0*g+1.0)/nd, 1.0); phie = atan2(1.0, 1.0-1.0/nd); } else { phis = atan2(-1.0+(2.0*g-1.0)/nd, 1.0); phim = atan2(-1.0+(2.0*g+1.0)/nd, 1.0); phie = atan2(-1.0+(2.0*g+3.0)/nd, 1.0); } int xh = xn/2; int yh = yn/2; int zh = zn/2; double R13 = double(F1)/double(F3); double R23 = double(F2)/double(F3); CpxNumTns tpdata(xn,yn,zn); CpxNumTns& Cblk = C.block(s,wcnt); tpdata = Cblk; //fft fftwnd_plan p = NULL; map<inttriple, fftwnd_plan>::iterator mit = planmap.find( inttriple(xn, intpair(yn,zn)) ); if(mit!=planmap.end()) { p = (*mit).second; } else { p = fftw3d_create_plan(zn, yn, xn, FFTW_FORWARD, FFTW_ESTIMATE | FFTW_IN_PLACE); planmap[ inttriple(xn, intpair(yn,zn)) ] = p; } fftwnd_one(p, (fftw_complex*)tpdata.data(), NULL); //cerr<<"wedge s"<<endl; double sqrtprod = sqrt(double(xn*yn*zn)); for(int i=0; i<xn; i++) for(int j=0; j<yn; j++) for(int k=0; k<zn; k++) tpdata(i,j,k) /= sqrtprod; CpxOffTns wpdata(xn,yn,zn); fdct3d_fftshift(xn,yn,zn,tpdata,wpdata); for(int zcur=(int)ceil(zs); zcur<ze; zcur++) { int xfm = (int)ceil( max(-R1, R13*zcur*tan(thts)) ); int xto = (int)floor( min(R1, R13*zcur*tan(thte)) ); int yfm = (int)ceil( max(-R2, R23*zcur*tan(phis)) ); int yto = (int)floor( min(R2, R23*zcur*tan(phie)) ); for(int xcur=xfm; xcur<=xto; xcur++) for(int ycur=yfm; ycur<=yto; ycur++) { int tmpx = xcur%xn; if(tmpx<-xh) tmpx+=xn; if(tmpx>=-xh+xn) tmpx-=xn; int tmpy = ycur%yn; if(tmpy<-yh) tmpy+=yn; if(tmpy>=-yh+yn) tmpy-=yn; int tmpz = zcur%zn; if(tmpz<-zh) tmpz+=zn; if(tmpz>=-zh+zn) tmpz-=zn; double thtcur = atan2(xcur/R1, zcur/R3); double phicur = atan2(ycur/R2, zcur/R3); double glbpou; fdct3d_globalpou(thtcur, phicur, M_PI/4-atan2(1.0-1.0/nd, 1.0), glbpou); double wtht; if(thtcur<thtm) { if(f==0) wtht = 1; else { double l,r; fdct3d_window( (thtcur-thts)/(thtm-thts), l, r); wtht = l; } } else { if(f==nd-1) wtht = 1; else { double l,r; fdct3d_window( (thtcur-thtm)/(thte-thtm), l, r); wtht = r; } } double wphi; if(phicur<phim) { if(g==0) wphi = 1; else { double l,r; fdct3d_window( (phicur-phis)/(phim-phis), l, r); wphi = l; } } else { if(g==nd-1) wphi = 1; else { double l,r; fdct3d_window( (phicur-phim)/(phie-phim), l, r); wphi = r; } } double pou = glbpou * wtht * wphi; wpdata(tmpx, tmpy, tmpz) *= pou; double ss = sma1(xcur)*sma2(ycur)*sma3(zcur); double bb = big1(xcur)*big2(ycur)*big3(zcur); int bi,bj,bk; int oi,oj,ok; fdct3d_position_aux(N1,N2,N3,b, xcur,ycur,zcur, bi,bj,bk,oi,oj,ok); CpxNumTns& Wblk = W.block(bi,bj,bk); Wblk(oi,oj,ok) += wpdata(tmpx,tmpy,tmpz) * bb * sqrt(1.0-ss*ss); } }//zcur }//if wcnt++; } }//end of face //face 3: -x,-y,-z for(int h=nd-1; h>=0; h--) { for(int g=nd-1; g>=0; g--) { if(crvowners[wcnt]==mpirank) { double xs = -R1; double xe = -R1/4+(W1/2)/4; double ys = -R2 + (2*g-1)*W2/2; double ye = -R2 + (2*g+3)*W2/2; double zs = -R3 + (2*h-1)*W3/2; double ze = -R3 + (2*h+3)*W3/2; int xn = int(ceil(xe-xs)); int yn = int(ceil(ye-ys)); int zn = int(ceil(ze-zs)); double thts, thtm, thte; //y to x if(g==0) { thts = atan2(-1.0, 1.0-1.0/nd); thtm = atan2(-1.0+1.0/nd, 1.0); thte = atan2(-1.0+3.0/nd, 1.0); } else if(g==nd-1) { thts = atan2(-1.0+(2.0*g-1.0)/nd, 1.0); thtm = atan2(-1.0+(2.0*g+1.0)/nd, 1.0); thte = atan2(1.0, 1.0-1.0/nd); } else { thts = atan2(-1.0+(2.0*g-1.0)/nd, 1.0); thtm = atan2(-1.0+(2.0*g+1.0)/nd, 1.0); thte = atan2(-1.0+(2.0*g+3.0)/nd, 1.0); } double phis, phim, phie; //z to x if(h==0) { phis = atan2(-1.0, 1.0-1.0/nd); phim = atan2(-1.0+1.0/nd, 1.0); phie = atan2(-1.0+3.0/nd, 1.0); } else if(h==nd-1) { phis = atan2(-1.0+(2.0*h-1.0)/nd, 1.0); phim = atan2(-1.0+(2.0*h+1.0)/nd, 1.0); phie = atan2(1.0, 1.0-1.0/nd); } else { phis = atan2(-1.0+(2.0*h-1.0)/nd, 1.0); phim = atan2(-1.0+(2.0*h+1.0)/nd, 1.0); phie = atan2(-1.0+(2.0*h+3.0)/nd, 1.0); } int xh = xn/2; int yh = yn/2; int zh = zn/2; double R21 = R2/R1; double R31 = R3/R1; CpxNumTns tpdata(xn,yn,zn); CpxNumTns& Cblk = C.block(s,wcnt); tpdata = Cblk; //fft fftwnd_plan p = NULL; map<inttriple, fftwnd_plan>::iterator mit = planmap.find( inttriple(xn, intpair(yn,zn)) ); if(mit!=planmap.end()) { p = (*mit).second; } else { p = fftw3d_create_plan(zn, yn, xn, FFTW_FORWARD, FFTW_ESTIMATE | FFTW_IN_PLACE); planmap[ inttriple(xn, intpair(yn,zn)) ] = p; } fftwnd_one(p, (fftw_complex*)tpdata.data(), NULL); //cerr<<"wedge s"<<endl; double sqrtprod = sqrt(double(xn*yn*zn)); for(int i=0; i<xn; i++) for(int j=0; j<yn; j++) for(int k=0; k<zn; k++) tpdata(i,j,k) /= sqrtprod; CpxOffTns wpdata(xn,yn,zn); fdct3d_fftshift(xn,yn,zn,tpdata,wpdata); for(int xcur=(int)ceil(xs); xcur<xe; xcur++) { int yfm = (int)ceil( max(-R2, R21*(-xcur)*tan(thts)) ); int yto = (int)floor( min(R2, R21*(-xcur)*tan(thte)) ); int zfm = (int)ceil( max(-R3, R31*(-xcur)*tan(phis)) ); int zto = (int)floor( min(R3, R31*(-xcur)*tan(phie)) ); for(int ycur=yfm; ycur<=yto; ycur++) for(int zcur=zfm; zcur<=zto; zcur++) { int tmpx = xcur%xn; if(tmpx<-xh) tmpx+=xn; if(tmpx>=-xh+xn) tmpx-=xn; int tmpy = ycur%yn; if(tmpy<-yh) tmpy+=yn; if(tmpy>=-yh+yn) tmpy-=yn; int tmpz = zcur%zn; if(tmpz<-zh) tmpz+=zn; if(tmpz>=-zh+zn) tmpz-=zn; double thtcur = atan2(ycur/R2, (-xcur)/R1); double phicur = atan2(zcur/R3, (-xcur)/R1); double glbpou; fdct3d_globalpou(thtcur, phicur, M_PI/4-atan2(1.0-1.0/nd, 1.0), glbpou); double wtht; if(thtcur<thtm) { if(g==0) wtht = 1; else { double l,r; fdct3d_window( (thtcur-thts)/(thtm-thts), l, r); wtht = l; } } else { if(g==nd-1) wtht = 1; else { double l,r; fdct3d_window( (thtcur-thtm)/(thte-thtm), l, r); wtht = r; } } double wphi; if(phicur<phim) { if(h==0) wphi = 1; else { double l,r; fdct3d_window( (phicur-phis)/(phim-phis), l, r); wphi = l; } } else { if(h==nd-1) wphi = 1; else { double l,r; fdct3d_window( (phicur-phim)/(phie-phim), l, r); wphi = r; } } double pou = glbpou * wtht * wphi; wpdata(tmpx, tmpy, tmpz) *= pou; double ss = sma1(xcur)*sma2(ycur)*sma3(zcur); double bb = big1(xcur)*big2(ycur)*big3(zcur); int bi,bj,bk; int oi,oj,ok; fdct3d_position_aux(N1,N2,N3,b, xcur,ycur,zcur, bi,bj,bk,oi,oj,ok); CpxNumTns& Wblk = W.block(bi,bj,bk); Wblk(oi,oj,ok) += wpdata(tmpx,tmpy,tmpz) * bb * sqrt(1.0-ss*ss); } } //xcur } //if wcnt++; } } //end of face //face 4: -y,-z,-x for(int f=nd-1; f>=0; f--) { for(int h=nd-1; h>=0; h--) { if(crvowners[wcnt]==mpirank) { double ys = -R2; double ye = -R2/4+(W2/2)/4; double zs = -R3 + (2*h-1)*W3/2; double ze = -R3 + (2*h+3)*W3/2; double xs = -R1 + (2*f-1)*W1/2; double xe = -R1 + (2*f+3)*W1/2; int xn = int(ceil(xe-xs)); int yn = int(ceil(ye-ys)); int zn = int(ceil(ze-zs)); double thts, thtm, thte; //z to y if(h==0) { thts = atan2(-1.0, 1.0-1.0/nd); thtm = atan2(-1.0+1.0/nd, 1.0); thte = atan2(-1.0+3.0/nd, 1.0); } else if(h==nd-1) { thts = atan2(-1.0+(2.0*h-1.0)/nd, 1.0); thtm = atan2(-1.0+(2.0*h+1.0)/nd, 1.0); thte = atan2(1.0, 1.0-1.0/nd); } else { thts = atan2(-1.0+(2.0*h-1.0)/nd, 1.0); thtm = atan2(-1.0+(2.0*h+1.0)/nd, 1.0); thte = atan2(-1.0+(2.0*h+3.0)/nd, 1.0); } double phis, phim, phie; //z to x if(f==0) { phis = atan2(-1.0, 1.0-1.0/nd); phim = atan2(-1.0+1.0/nd, 1.0); phie = atan2(-1.0+3.0/nd, 1.0); } else if(f==nd-1) { phis = atan2(-1.0+(2.0*f-1.0)/nd, 1.0); phim = atan2(-1.0+(2.0*f+1.0)/nd, 1.0); phie = atan2(1.0, 1.0-1.0/nd); } else { phis = atan2(-1.0+(2.0*f-1.0)/nd, 1.0); phim = atan2(-1.0+(2.0*f+1.0)/nd, 1.0); phie = atan2(-1.0+(2.0*f+3.0)/nd, 1.0); } int xh = xn/2; int yh = yn/2; int zh = zn/2; double R32 = double(F3)/double(F2); double R12 = double(F1)/double(F2); CpxNumTns tpdata(xn,yn,zn); CpxNumTns& Cblk = C.block(s,wcnt); tpdata = Cblk; //fft fftwnd_plan p = NULL; map<inttriple, fftwnd_plan>::iterator mit = planmap.find( inttriple(xn, intpair(yn,zn)) ); if(mit!=planmap.end()) { p = (*mit).second; } else { p = fftw3d_create_plan(zn, yn, xn, FFTW_FORWARD, FFTW_ESTIMATE | FFTW_IN_PLACE); planmap[ inttriple(xn, intpair(yn,zn)) ] = p; } fftwnd_one(p, (fftw_complex*)tpdata.data(), NULL); //cerr<<"wedge s"<<endl; double sqrtprod = sqrt(double(xn*yn*zn)); for(int i=0; i<xn; i++) for(int j=0; j<yn; j++) for(int k=0; k<zn; k++) tpdata(i,j,k) /= sqrtprod; CpxOffTns wpdata(xn,yn,zn); fdct3d_fftshift(xn,yn,zn,tpdata,wpdata); for(int ycur=(int)ceil(ys); ycur<ye; ycur++) { int zfm = (int)ceil( max(-R3, R32*(-ycur)*tan(thts)) ); int zto = (int)floor( min(R3, R32*(-ycur)*tan(thte)) ); int xfm = (int)ceil( max(-R1, R12*(-ycur)*tan(phis)) ); int xto = (int)floor( min(R1, R12*(-ycur)*tan(phie)) ); for(int zcur=zfm; zcur<=zto; zcur++) for(int xcur=xfm; xcur<=xto; xcur++) { int tmpx = xcur%xn; if(tmpx<-xh) tmpx+=xn; if(tmpx>=-xh+xn) tmpx-=xn; int tmpy = ycur%yn; if(tmpy<-yh) tmpy+=yn; if(tmpy>=-yh+yn) tmpy-=yn; int tmpz = zcur%zn; if(tmpz<-zh) tmpz+=zn; if(tmpz>=-zh+zn) tmpz-=zn; double thtcur = atan2(zcur/R3, (-ycur)/R2); double phicur = atan2(xcur/R1, (-ycur)/R2); double glbpou; fdct3d_globalpou(thtcur, phicur, M_PI/4-atan2(1.0-1.0/nd, 1.0), glbpou); //CHECK double wtht; if(thtcur<thtm) { if(h==0) wtht = 1; else { double l,r; fdct3d_window( (thtcur-thts)/(thtm-thts), l, r); wtht = l; } } else { if(h==nd-1) wtht = 1; else { double l,r; fdct3d_window( (thtcur-thtm)/(thte-thtm), l, r); wtht = r; } } double wphi; if(phicur<phim) { if(f==0) wphi = 1; else { double l,r; fdct3d_window( (phicur-phis)/(phim-phis), l, r); wphi = l; } } else { if(f==nd-1) wphi = 1; else { double l,r; fdct3d_window( (phicur-phim)/(phie-phim), l, r); wphi = r; } } double pou = glbpou * wtht * wphi; wpdata(tmpx, tmpy, tmpz) *= pou; double ss = sma1(xcur)*sma2(ycur)*sma3(zcur); double bb = big1(xcur)*big2(ycur)*big3(zcur); int bi,bj,bk; int oi,oj,ok; fdct3d_position_aux(N1,N2,N3,b, xcur,ycur,zcur, bi,bj,bk,oi,oj,ok); CpxNumTns& Wblk = W.block(bi,bj,bk); Wblk(oi,oj,ok) += wpdata(tmpx,tmpy,tmpz) * bb * sqrt(1.0-ss*ss); } } //ycur }//if wcnt++; } }//end of face //face 5.-z,-x,-y for(int g=nd-1; g>=0; g--) { for(int f=nd-1; f>=0; f--) { if(crvowners[wcnt]==mpirank) { double zs = -R3; double ze = -R3/4+(W3/2)/4; double xs = -R1 + (2*f-1)*W1/2; double xe = -R1 + (2*f+3)*W1/2; double ys = -R2 + (2*g-1)*W2/2; double ye = -R2 + (2*g+3)*W2/2; int xn = int(ceil(xe-xs)); int yn = int(ceil(ye-ys)); int zn = int(ceil(ze-zs)); double thts, thtm, thte; //y to x if(f==0) { thts = atan2(-1.0, 1.0-1.0/nd); thtm = atan2(-1.0+1.0/nd, 1.0); thte = atan2(-1.0+3.0/nd, 1.0); } else if(f==nd-1) { thts = atan2(-1.0+(2.0*f-1.0)/nd, 1.0); thtm = atan2(-1.0+(2.0*f+1.0)/nd, 1.0); thte = atan2(1.0, 1.0-1.0/nd); } else { thts = atan2(-1.0+(2.0*f-1.0)/nd, 1.0); thtm = atan2(-1.0+(2.0*f+1.0)/nd, 1.0); thte = atan2(-1.0+(2.0*f+3.0)/nd, 1.0); } double phis, phim, phie; //z to x if(g==0) { phis = atan2(-1.0, 1.0-1.0/nd); phim = atan2(-1.0+1.0/nd, 1.0); phie = atan2(-1.0+3.0/nd, 1.0); } else if(g==nd-1) { phis = atan2(-1.0+(2.0*g-1.0)/nd, 1.0); phim = atan2(-1.0+(2.0*g+1.0)/nd, 1.0); phie = atan2(1.0, 1.0-1.0/nd); } else { phis = atan2(-1.0+(2.0*g-1.0)/nd, 1.0); phim = atan2(-1.0+(2.0*g+1.0)/nd, 1.0); phie = atan2(-1.0+(2.0*g+3.0)/nd, 1.0); } int xh = xn/2; int yh = yn/2; int zh = zn/2; double R13 = double(F1)/double(F3); double R23 = double(F2)/double(F3); CpxNumTns tpdata(xn,yn,zn); CpxNumTns& Cblk = C.block(s,wcnt); tpdata = Cblk; //fft fftwnd_plan p = NULL; map<inttriple, fftwnd_plan>::iterator mit = planmap.find( inttriple(xn, intpair(yn,zn)) ); if(mit!=planmap.end()) { p = (*mit).second; } else { p = fftw3d_create_plan(zn, yn, xn, FFTW_FORWARD, FFTW_ESTIMATE | FFTW_IN_PLACE); planmap[ inttriple(xn, intpair(yn,zn)) ] = p; } fftwnd_one(p, (fftw_complex*)tpdata.data(), NULL); //cerr<<"wedge s"<<endl; double sqrtprod = sqrt(double(xn*yn*zn)); for(int i=0; i<xn; i++) for(int j=0; j<yn; j++) for(int k=0; k<zn; k++) tpdata(i,j,k) /= sqrtprod; CpxOffTns wpdata(xn,yn,zn); fdct3d_fftshift(xn,yn,zn,tpdata,wpdata); for(int zcur=(int)ceil(zs); zcur<ze; zcur++) { int xfm = (int)ceil( max(-R1, R13*(-zcur)*tan(thts)) ); int xto = (int)floor( min(R1, R13*(-zcur)*tan(thte)) ); int yfm = (int)ceil( max(-R2, R23*(-zcur)*tan(phis)) ); int yto = (int)floor( min(R2, R23*(-zcur)*tan(phie)) ); for(int xcur=xfm; xcur<=xto; xcur++) for(int ycur=yfm; ycur<=yto; ycur++) { int tmpx = xcur%xn; if(tmpx<-xh) tmpx+=xn; if(tmpx>=-xh+xn) tmpx-=xn; int tmpy = ycur%yn; if(tmpy<-yh) tmpy+=yn; if(tmpy>=-yh+yn) tmpy-=yn; int tmpz = zcur%zn; if(tmpz<-zh) tmpz+=zn; if(tmpz>=-zh+zn) tmpz-=zn; double thtcur = atan2(xcur/R1, (-zcur)/R3); double phicur = atan2(ycur/R2, (-zcur)/R3); double glbpou; fdct3d_globalpou(thtcur, phicur, M_PI/4-atan2(1.0-1.0/nd, 1.0), glbpou); double wtht; if(thtcur<thtm) { if(f==0) wtht = 1; else { double l,r; fdct3d_window( (thtcur-thts)/(thtm-thts), l, r); wtht = l; } } else { if(f==nd-1) wtht = 1; else { double l,r; fdct3d_window( (thtcur-thtm)/(thte-thtm), l, r); wtht = r; } } double wphi; if(phicur<phim) { if(g==0) wphi = 1; else { double l,r; fdct3d_window( (phicur-phis)/(phim-phis), l, r); wphi = l; } } else { if(g==nd-1) wphi = 1; else { double l,r; fdct3d_window( (phicur-phim)/(phie-phim), l, r); wphi = r; } } double pou = glbpou * wtht * wphi; wpdata(tmpx, tmpy, tmpz) *= pou; double ss = sma1(xcur)*sma2(ycur)*sma3(zcur); double bb = big1(xcur)*big2(ycur)*big3(zcur); int bi,bj,bk; int oi,oj,ok; fdct3d_position_aux(N1,N2,N3,b, xcur,ycur,zcur, bi,bj,bk,oi,oj,ok); CpxNumTns& Wblk = W.block(bi,bj,bk); Wblk(oi,oj,ok) += wpdata(tmpx,tmpy,tmpz) * bb * sqrt(1.0-ss*ss); } }//zcur }//if wcnt++; } }//end of face iA(wcnt==nd*nd*nf); //remove plans for(map<inttriple, fftwnd_plan>::iterator mit=planmap.begin(); mit!=planmap.end(); mit++) { fftwnd_plan p = (*mit).second; fftwnd_destroy_plan(p); } return 0; }