//--------------------------------------------- int CpxCrvletPrtd::check() { vector< vector<int> >& c = _nx; for(int s=0; s<c.size(); s++) for(int w=0; w<c[s].size(); w++) if(_exists[s][w]==true) { CpxNumTns& tmp = _blocks[s][w]; iA( tmp.m()==_nx[s][w] && tmp.n()==_ny[s][w] && tmp.p()==_nz[s][w] ); } return 0; }
int CpxCrvletPrtd::shift(vector< vector<int> >& newowners) { vector< vector<int> >& c = _nx; for(int s=0; s<c.size(); s++) for(int w=0; w<c[s].size(); w++) { if(newowners[s][w]==mpirank()) { iA(_exists[s][w]==true); } } _owners = newowners; return 0; }
main() { static int ia[10] = { 12,7,14,9,128,17,6,3,27,5 }; Array_RC<int> iA( ia,10 ); cout << "class template instantiation Array_RC<int>" << endl; try_array( iA ); return 0; }
// ---------------------------------------------------------------------- int Dense3d::evaluate(const DblNumVec& srcDen, DblNumVec& trgVal) { //----------------------------------- iA(srcDen.m()==srcDOF()*(*_srcPos).n()); iA(trgVal.m()==trgDOF()*(*_trgPos).n()); int dim = this->dim(); int srcDOF = this->srcDOF(); int trgDOF = this->trgDOF(); /* Number of sources */ int numSrc = (*_srcPos).n(); /* Number of targets */ int numTrg = (*_trgPos).n(); DblNumMat inter(trgDOF, numSrc*srcDOF); for(int i=0; i<numTrg; i++) { DblNumMat onePosMat(dim, 1, false, (*_trgPos).clmdata(i)); DblNumVec oneValVec(trgDOF, false, trgVal.data()+trgDOF*i); iC( _knl.kernel((*_srcPos), (*_srcNor), onePosMat, inter) ); iC( dgemv(1.0, inter, srcDen, 0.0, oneValVec) ); } return 0; }
main() { static int ia[ 10 ] = { 12,7,14,9,128,17,6,3,27,5 }; static string sa[ 7 ] = { "Eeyore", "Pooh", "Tigger", "Piglet", "Owl", "Gopher", "Heffalump" }; Array_Sort<int> iA( ia,10 ); Array_Sort<string> SA( sa,7 ); cout << "class template instantiation Array_Sort<int>" << endl; try_array( iA ); cout << "class template instantiation Array_Sort<string>" << endl; try_array( SA ); return 0; }
// ---------------------------------------------------------------------- int Dense3d::setup(map<string,string>&) { iA(_srcPos!=NULL && _srcNor!=NULL && _trgPos!=NULL); iA((*_srcPos).m()==dim() && (*_trgPos).m()==dim()); //nothing to do return 0; }
//------------------------------------------------------------------------------------ int fdct3d_inverse_angles(int N1,int N2,int N3,int b, double L1,double L2,double L3, int s,int nd, CpxCrvletPrtd& C, CpxNumTnsBlkd& W) { int mpirank; MPI_Comm_rank(MPI_COMM_WORLD, &mpirank); int mpisize; MPI_Comm_size(MPI_COMM_WORLD, &mpisize); vector< vector<int> >& Cowners = C.owners(); vector<int>& crvowners = Cowners[s]; //LEXING: the owner information for wedges in scale s int nf = 6; int wcnt = 0; int S1, S2, S3; int F1, F2, F3; double R1, R2, R3; fdct3d_rangecompute(L1, L2, L3, S1, S2, S3, F1, F2, F3, R1, R2, R3); DblOffVec big1(S1); fdct3d_lowpass(L1, big1); DblOffVec big2(S2); fdct3d_lowpass(L2, big2); DblOffVec big3(S3); fdct3d_lowpass(L3, big3); double Lh1 = L1/2; double Lh2 = L2/2; double Lh3 = L3/2; int Sh1, Sh2, Sh3; int Fh1, Fh2, Fh3; double Rh1, Rh2, Rh3; fdct3d_rangecompute(Lh1, Lh2, Lh3, Sh1, Sh2, Sh3, Fh1, Fh2, Fh3, Rh1, Rh2, Rh3); DblOffVec sma1(S1); fdct3d_lowpass(Lh1, sma1); DblOffVec sma2(S2); fdct3d_lowpass(Lh2, sma2); DblOffVec sma3(S3); fdct3d_lowpass(Lh3, sma3); double W1 = L1/nd; double W2 = L2/nd; double W3 = L3/nd; typedef pair<int,int> intpair; typedef pair<int, intpair> inttriple; map<inttriple, fftwnd_plan> planmap; //face 0: x,y,z for(int h=0; h<nd; h++) { //(y first z second) for(int g=0; g<nd; g++) { if(crvowners[wcnt]==mpirank) { double xs = R1/4-(W1/2)/4; double xe = R1; double ys = -R2 + (2*g-1)*W2/2; double ye = -R2 + (2*g+3)*W2/2; double zs = -R3 + (2*h-1)*W3/2; double ze = -R3 + (2*h+3)*W3/2; int xn = int(ceil(xe-xs)); int yn = int(ceil(ye-ys)); int zn = int(ceil(ze-zs)); double thts, thtm, thte; //y to x if(g==0) { thts = atan2(-1.0, 1.0-1.0/nd); thtm = atan2(-1.0+1.0/nd, 1.0); thte = atan2(-1.0+3.0/nd, 1.0); } else if(g==nd-1) { thts = atan2(-1.0+(2.0*g-1.0)/nd, 1.0); thtm = atan2(-1.0+(2.0*g+1.0)/nd, 1.0); thte = atan2(1.0, 1.0-1.0/nd); } else { thts = atan2(-1.0+(2.0*g-1.0)/nd, 1.0); thtm = atan2(-1.0+(2.0*g+1.0)/nd, 1.0); thte = atan2(-1.0+(2.0*g+3.0)/nd, 1.0); } double phis, phim, phie; //z to x if(h==0) { phis = atan2(-1.0, 1.0-1.0/nd); phim = atan2(-1.0+1.0/nd, 1.0); phie = atan2(-1.0+3.0/nd, 1.0); } else if(h==nd-1) { phis = atan2(-1.0+(2.0*h-1.0)/nd, 1.0); phim = atan2(-1.0+(2.0*h+1.0)/nd, 1.0); phie = atan2(1.0, 1.0-1.0/nd); } else { phis = atan2(-1.0+(2.0*h-1.0)/nd, 1.0); phim = atan2(-1.0+(2.0*h+1.0)/nd, 1.0); phie = atan2(-1.0+(2.0*h+3.0)/nd, 1.0); } int xh = xn/2; int yh = yn/2; int zh = zn/2; //half double R21 = R2/R1; double R31 = R3/R1; CpxNumTns tpdata(xn,yn,zn); CpxNumTns& Cblk = C.block(s,wcnt); tpdata = Cblk; //fft fftwnd_plan p = NULL; map<inttriple, fftwnd_plan>::iterator mit = planmap.find( inttriple(xn, intpair(yn,zn)) ); if(mit!=planmap.end()) { p = (*mit).second; } else { p = fftw3d_create_plan(zn, yn, xn, FFTW_FORWARD, FFTW_ESTIMATE | FFTW_IN_PLACE); planmap[ inttriple(xn, intpair(yn,zn)) ] = p; } fftwnd_one(p, (fftw_complex*)tpdata.data(), NULL); //cerr<<"wedge s"<<endl; double sqrtprod = sqrt(double(xn*yn*zn)); for(int i=0; i<xn; i++) for(int j=0; j<yn; j++) for(int k=0; k<zn; k++) tpdata(i,j,k) /= sqrtprod; CpxOffTns wpdata(xn,yn,zn); fdct3d_fftshift(xn,yn,zn,tpdata,wpdata); for(int xcur=(int)ceil(xs); xcur<xe; xcur++) { int yfm = (int)ceil( max(-R2, R21*xcur*tan(thts)) ); int yto = (int)floor( min(R2, R21*xcur*tan(thte)) ); int zfm = (int)ceil( max(-R3, R31*xcur*tan(phis)) ); int zto = (int)floor( min(R3, R31*xcur*tan(phie)) ); for(int ycur=yfm; ycur<=yto; ycur++) for(int zcur=zfm; zcur<=zto; zcur++) { int tmpx = xcur%xn; if(tmpx<-xh) tmpx+=xn; if(tmpx>=-xh+xn) tmpx-=xn; int tmpy = ycur%yn; if(tmpy<-yh) tmpy+=yn; if(tmpy>=-yh+yn) tmpy-=yn; int tmpz = zcur%zn; if(tmpz<-zh) tmpz+=zn; if(tmpz>=-zh+zn) tmpz-=zn; double thtcur = atan2(ycur/R2, xcur/R1); double phicur = atan2(zcur/R3, xcur/R1); double glbpou; fdct3d_globalpou(thtcur, phicur, M_PI/4-atan2(1.0-1.0/nd, 1.0), glbpou); double wtht; if(thtcur<thtm) { if(g==0) wtht = 1; else { double l,r; fdct3d_window( (thtcur-thts)/(thtm-thts), l, r); wtht = l; } } else { if(g==nd-1) wtht = 1; else { double l,r; fdct3d_window( (thtcur-thtm)/(thte-thtm), l, r); wtht = r; } } double wphi; if(phicur<phim) { if(h==0) wphi = 1; else { double l,r; fdct3d_window( (phicur-phis)/(phim-phis), l, r); wphi = l; } } else { if(h==nd-1) wphi = 1; else { double l,r; fdct3d_window( (phicur-phim)/(phie-phim), l, r); wphi = r; } } double pou = glbpou * wtht * wphi; wpdata(tmpx, tmpy, tmpz) *= pou; double ss = sma1(xcur)*sma2(ycur)*sma3(zcur); double bb = big1(xcur)*big2(ycur)*big3(zcur); int bi,bj,bk; int oi,oj,ok; fdct3d_position_aux(N1,N2,N3,b, xcur,ycur,zcur, bi,bj,bk,oi,oj,ok); CpxNumTns& Wblk = W.block(bi,bj,bk); Wblk(oi,oj,ok) += wpdata(tmpx,tmpy,tmpz) * bb * sqrt(1.0-ss*ss); } } //xcur } //if wcnt++; } } //end of face //face 1. y z x for(int f=0; f<nd; f++) { for(int h=0; h<nd; h++) { if(crvowners[wcnt]==mpirank) { double ys = R2/4-(W2/2)/4; double ye = R2; double zs = -R3 + (2*h-1)*W3/2; double ze = -R3 + (2*h+3)*W3/2; double xs = -R1 + (2*f-1)*W1/2; double xe = -R1 + (2*f+3)*W1/2; int xn = int(ceil(xe-xs)); int yn = int(ceil(ye-ys)); int zn = int(ceil(ze-zs)); double thts, thtm, thte; //z to y if(h==0) { thts = atan2(-1.0, 1.0-1.0/nd); thtm = atan2(-1.0+1.0/nd, 1.0); thte = atan2(-1.0+3.0/nd, 1.0); } else if(h==nd-1) { thts = atan2(-1.0+(2.0*h-1.0)/nd, 1.0); thtm = atan2(-1.0+(2.0*h+1.0)/nd, 1.0); thte = atan2(1.0, 1.0-1.0/nd); } else { thts = atan2(-1.0+(2.0*h-1.0)/nd, 1.0); thtm = atan2(-1.0+(2.0*h+1.0)/nd, 1.0); thte = atan2(-1.0+(2.0*h+3.0)/nd, 1.0); } double phis, phim, phie; //z to x if(f==0) { phis = atan2(-1.0, 1.0-1.0/nd); phim = atan2(-1.0+1.0/nd, 1.0); phie = atan2(-1.0+3.0/nd, 1.0); } else if(f==nd-1) { phis = atan2(-1.0+(2.0*f-1.0)/nd, 1.0); phim = atan2(-1.0+(2.0*f+1.0)/nd, 1.0); phie = atan2(1.0, 1.0-1.0/nd); } else { phis = atan2(-1.0+(2.0*f-1.0)/nd, 1.0); phim = atan2(-1.0+(2.0*f+1.0)/nd, 1.0); phie = atan2(-1.0+(2.0*f+3.0)/nd, 1.0); } int xh = xn/2; int yh = yn/2; int zh = zn/2; double R32 = R3/R2; double R12 = R1/R2; CpxNumTns tpdata(xn,yn,zn); CpxNumTns& Cblk = C.block(s,wcnt); tpdata = Cblk; //fft fftwnd_plan p = NULL; map<inttriple, fftwnd_plan>::iterator mit = planmap.find( inttriple(xn, intpair(yn,zn)) ); if(mit!=planmap.end()) { p = (*mit).second; } else { p = fftw3d_create_plan(zn, yn, xn, FFTW_FORWARD, FFTW_ESTIMATE | FFTW_IN_PLACE); planmap[ inttriple(xn, intpair(yn,zn)) ] = p; } fftwnd_one(p, (fftw_complex*)tpdata.data(), NULL); //cerr<<"wedge s"<<endl; double sqrtprod = sqrt(double(xn*yn*zn)); for(int i=0; i<xn; i++) for(int j=0; j<yn; j++) for(int k=0; k<zn; k++) tpdata(i,j,k) /= sqrtprod; CpxOffTns wpdata(xn,yn,zn); fdct3d_fftshift(xn,yn,zn,tpdata,wpdata); for(int ycur=(int)ceil(ys); ycur<ye; ycur++) { int zfm = (int)ceil( max(-R3, R32*ycur*tan(thts)) ); int zto = (int)floor( min(R3, R32*ycur*tan(thte)) ); int xfm = (int)ceil( max(-R1, R12*ycur*tan(phis)) ); int xto = (int)floor( min(R1, R12*ycur*tan(phie)) ); for(int zcur=zfm; zcur<=zto; zcur++) for(int xcur=xfm; xcur<=xto; xcur++) { int tmpx = xcur%xn; if(tmpx<-xh) tmpx+=xn; if(tmpx>=-xh+xn) tmpx-=xn; int tmpy = ycur%yn; if(tmpy<-yh) tmpy+=yn; if(tmpy>=-yh+yn) tmpy-=yn; int tmpz = zcur%zn; if(tmpz<-zh) tmpz+=zn; if(tmpz>=-zh+zn) tmpz-=zn; double thtcur = atan2(zcur/R3, ycur/R2); double phicur = atan2(xcur/R1, ycur/R2); double glbpou; fdct3d_globalpou(thtcur, phicur, M_PI/4-atan2(1.0-1.0/nd, 1.0), glbpou); //CHECK double wtht; if(thtcur<thtm) { if(h==0) wtht = 1; else { double l,r; fdct3d_window( (thtcur-thts)/(thtm-thts), l, r); wtht = l; } } else { if(h==nd-1) wtht = 1; else { double l,r; fdct3d_window( (thtcur-thtm)/(thte-thtm), l, r); wtht = r; } } double wphi; if(phicur<phim) { if(f==0) wphi = 1; else { double l,r; fdct3d_window( (phicur-phis)/(phim-phis), l, r); wphi = l; } } else { if(f==nd-1) wphi = 1; else { double l,r; fdct3d_window( (phicur-phim)/(phie-phim), l, r); wphi = r; } } double pou = glbpou * wtht * wphi; wpdata(tmpx, tmpy, tmpz) *= pou; double ss = sma1(xcur)*sma2(ycur)*sma3(zcur); double bb = big1(xcur)*big2(ycur)*big3(zcur); int bi,bj,bk; int oi,oj,ok; fdct3d_position_aux(N1,N2,N3,b, xcur,ycur,zcur, bi,bj,bk,oi,oj,ok); CpxNumTns& Wblk = W.block(bi,bj,bk); Wblk(oi,oj,ok) += wpdata(tmpx,tmpy,tmpz) * bb * sqrt(1.0-ss*ss); } } //ycur }//if wcnt++; } }//end of face //face 2. z x y for(int g=0; g<nd; g++) { for(int f=0; f<nd; f++) { if(crvowners[wcnt]==mpirank) { double zs = R3/4-(W3/2)/4; double ze = R3; double xs = -R1 + (2*f-1)*W1/2; double xe = -R1 + (2*f+3)*W1/2; double ys = -R2 + (2*g-1)*W2/2; double ye = -R2 + (2*g+3)*W2/2; int xn = int(ceil(xe-xs)); int yn = int(ceil(ye-ys)); int zn = int(ceil(ze-zs)); double thts, thtm, thte; //y to x if(f==0) { thts = atan2(-1.0, 1.0-1.0/nd); thtm = atan2(-1.0+1.0/nd, 1.0); thte = atan2(-1.0+3.0/nd, 1.0); } else if(f==nd-1) { thts = atan2(-1.0+(2.0*f-1.0)/nd, 1.0); thtm = atan2(-1.0+(2.0*f+1.0)/nd, 1.0); thte = atan2(1.0, 1.0-1.0/nd); } else { thts = atan2(-1.0+(2.0*f-1.0)/nd, 1.0); thtm = atan2(-1.0+(2.0*f+1.0)/nd, 1.0); thte = atan2(-1.0+(2.0*f+3.0)/nd, 1.0); } double phis, phim, phie; //z to x if(g==0) { phis = atan2(-1.0, 1.0-1.0/nd); phim = atan2(-1.0+1.0/nd, 1.0); phie = atan2(-1.0+3.0/nd, 1.0); } else if(g==nd-1) { phis = atan2(-1.0+(2.0*g-1.0)/nd, 1.0); phim = atan2(-1.0+(2.0*g+1.0)/nd, 1.0); phie = atan2(1.0, 1.0-1.0/nd); } else { phis = atan2(-1.0+(2.0*g-1.0)/nd, 1.0); phim = atan2(-1.0+(2.0*g+1.0)/nd, 1.0); phie = atan2(-1.0+(2.0*g+3.0)/nd, 1.0); } int xh = xn/2; int yh = yn/2; int zh = zn/2; double R13 = double(F1)/double(F3); double R23 = double(F2)/double(F3); CpxNumTns tpdata(xn,yn,zn); CpxNumTns& Cblk = C.block(s,wcnt); tpdata = Cblk; //fft fftwnd_plan p = NULL; map<inttriple, fftwnd_plan>::iterator mit = planmap.find( inttriple(xn, intpair(yn,zn)) ); if(mit!=planmap.end()) { p = (*mit).second; } else { p = fftw3d_create_plan(zn, yn, xn, FFTW_FORWARD, FFTW_ESTIMATE | FFTW_IN_PLACE); planmap[ inttriple(xn, intpair(yn,zn)) ] = p; } fftwnd_one(p, (fftw_complex*)tpdata.data(), NULL); //cerr<<"wedge s"<<endl; double sqrtprod = sqrt(double(xn*yn*zn)); for(int i=0; i<xn; i++) for(int j=0; j<yn; j++) for(int k=0; k<zn; k++) tpdata(i,j,k) /= sqrtprod; CpxOffTns wpdata(xn,yn,zn); fdct3d_fftshift(xn,yn,zn,tpdata,wpdata); for(int zcur=(int)ceil(zs); zcur<ze; zcur++) { int xfm = (int)ceil( max(-R1, R13*zcur*tan(thts)) ); int xto = (int)floor( min(R1, R13*zcur*tan(thte)) ); int yfm = (int)ceil( max(-R2, R23*zcur*tan(phis)) ); int yto = (int)floor( min(R2, R23*zcur*tan(phie)) ); for(int xcur=xfm; xcur<=xto; xcur++) for(int ycur=yfm; ycur<=yto; ycur++) { int tmpx = xcur%xn; if(tmpx<-xh) tmpx+=xn; if(tmpx>=-xh+xn) tmpx-=xn; int tmpy = ycur%yn; if(tmpy<-yh) tmpy+=yn; if(tmpy>=-yh+yn) tmpy-=yn; int tmpz = zcur%zn; if(tmpz<-zh) tmpz+=zn; if(tmpz>=-zh+zn) tmpz-=zn; double thtcur = atan2(xcur/R1, zcur/R3); double phicur = atan2(ycur/R2, zcur/R3); double glbpou; fdct3d_globalpou(thtcur, phicur, M_PI/4-atan2(1.0-1.0/nd, 1.0), glbpou); double wtht; if(thtcur<thtm) { if(f==0) wtht = 1; else { double l,r; fdct3d_window( (thtcur-thts)/(thtm-thts), l, r); wtht = l; } } else { if(f==nd-1) wtht = 1; else { double l,r; fdct3d_window( (thtcur-thtm)/(thte-thtm), l, r); wtht = r; } } double wphi; if(phicur<phim) { if(g==0) wphi = 1; else { double l,r; fdct3d_window( (phicur-phis)/(phim-phis), l, r); wphi = l; } } else { if(g==nd-1) wphi = 1; else { double l,r; fdct3d_window( (phicur-phim)/(phie-phim), l, r); wphi = r; } } double pou = glbpou * wtht * wphi; wpdata(tmpx, tmpy, tmpz) *= pou; double ss = sma1(xcur)*sma2(ycur)*sma3(zcur); double bb = big1(xcur)*big2(ycur)*big3(zcur); int bi,bj,bk; int oi,oj,ok; fdct3d_position_aux(N1,N2,N3,b, xcur,ycur,zcur, bi,bj,bk,oi,oj,ok); CpxNumTns& Wblk = W.block(bi,bj,bk); Wblk(oi,oj,ok) += wpdata(tmpx,tmpy,tmpz) * bb * sqrt(1.0-ss*ss); } }//zcur }//if wcnt++; } }//end of face //face 3: -x,-y,-z for(int h=nd-1; h>=0; h--) { for(int g=nd-1; g>=0; g--) { if(crvowners[wcnt]==mpirank) { double xs = -R1; double xe = -R1/4+(W1/2)/4; double ys = -R2 + (2*g-1)*W2/2; double ye = -R2 + (2*g+3)*W2/2; double zs = -R3 + (2*h-1)*W3/2; double ze = -R3 + (2*h+3)*W3/2; int xn = int(ceil(xe-xs)); int yn = int(ceil(ye-ys)); int zn = int(ceil(ze-zs)); double thts, thtm, thte; //y to x if(g==0) { thts = atan2(-1.0, 1.0-1.0/nd); thtm = atan2(-1.0+1.0/nd, 1.0); thte = atan2(-1.0+3.0/nd, 1.0); } else if(g==nd-1) { thts = atan2(-1.0+(2.0*g-1.0)/nd, 1.0); thtm = atan2(-1.0+(2.0*g+1.0)/nd, 1.0); thte = atan2(1.0, 1.0-1.0/nd); } else { thts = atan2(-1.0+(2.0*g-1.0)/nd, 1.0); thtm = atan2(-1.0+(2.0*g+1.0)/nd, 1.0); thte = atan2(-1.0+(2.0*g+3.0)/nd, 1.0); } double phis, phim, phie; //z to x if(h==0) { phis = atan2(-1.0, 1.0-1.0/nd); phim = atan2(-1.0+1.0/nd, 1.0); phie = atan2(-1.0+3.0/nd, 1.0); } else if(h==nd-1) { phis = atan2(-1.0+(2.0*h-1.0)/nd, 1.0); phim = atan2(-1.0+(2.0*h+1.0)/nd, 1.0); phie = atan2(1.0, 1.0-1.0/nd); } else { phis = atan2(-1.0+(2.0*h-1.0)/nd, 1.0); phim = atan2(-1.0+(2.0*h+1.0)/nd, 1.0); phie = atan2(-1.0+(2.0*h+3.0)/nd, 1.0); } int xh = xn/2; int yh = yn/2; int zh = zn/2; double R21 = R2/R1; double R31 = R3/R1; CpxNumTns tpdata(xn,yn,zn); CpxNumTns& Cblk = C.block(s,wcnt); tpdata = Cblk; //fft fftwnd_plan p = NULL; map<inttriple, fftwnd_plan>::iterator mit = planmap.find( inttriple(xn, intpair(yn,zn)) ); if(mit!=planmap.end()) { p = (*mit).second; } else { p = fftw3d_create_plan(zn, yn, xn, FFTW_FORWARD, FFTW_ESTIMATE | FFTW_IN_PLACE); planmap[ inttriple(xn, intpair(yn,zn)) ] = p; } fftwnd_one(p, (fftw_complex*)tpdata.data(), NULL); //cerr<<"wedge s"<<endl; double sqrtprod = sqrt(double(xn*yn*zn)); for(int i=0; i<xn; i++) for(int j=0; j<yn; j++) for(int k=0; k<zn; k++) tpdata(i,j,k) /= sqrtprod; CpxOffTns wpdata(xn,yn,zn); fdct3d_fftshift(xn,yn,zn,tpdata,wpdata); for(int xcur=(int)ceil(xs); xcur<xe; xcur++) { int yfm = (int)ceil( max(-R2, R21*(-xcur)*tan(thts)) ); int yto = (int)floor( min(R2, R21*(-xcur)*tan(thte)) ); int zfm = (int)ceil( max(-R3, R31*(-xcur)*tan(phis)) ); int zto = (int)floor( min(R3, R31*(-xcur)*tan(phie)) ); for(int ycur=yfm; ycur<=yto; ycur++) for(int zcur=zfm; zcur<=zto; zcur++) { int tmpx = xcur%xn; if(tmpx<-xh) tmpx+=xn; if(tmpx>=-xh+xn) tmpx-=xn; int tmpy = ycur%yn; if(tmpy<-yh) tmpy+=yn; if(tmpy>=-yh+yn) tmpy-=yn; int tmpz = zcur%zn; if(tmpz<-zh) tmpz+=zn; if(tmpz>=-zh+zn) tmpz-=zn; double thtcur = atan2(ycur/R2, (-xcur)/R1); double phicur = atan2(zcur/R3, (-xcur)/R1); double glbpou; fdct3d_globalpou(thtcur, phicur, M_PI/4-atan2(1.0-1.0/nd, 1.0), glbpou); double wtht; if(thtcur<thtm) { if(g==0) wtht = 1; else { double l,r; fdct3d_window( (thtcur-thts)/(thtm-thts), l, r); wtht = l; } } else { if(g==nd-1) wtht = 1; else { double l,r; fdct3d_window( (thtcur-thtm)/(thte-thtm), l, r); wtht = r; } } double wphi; if(phicur<phim) { if(h==0) wphi = 1; else { double l,r; fdct3d_window( (phicur-phis)/(phim-phis), l, r); wphi = l; } } else { if(h==nd-1) wphi = 1; else { double l,r; fdct3d_window( (phicur-phim)/(phie-phim), l, r); wphi = r; } } double pou = glbpou * wtht * wphi; wpdata(tmpx, tmpy, tmpz) *= pou; double ss = sma1(xcur)*sma2(ycur)*sma3(zcur); double bb = big1(xcur)*big2(ycur)*big3(zcur); int bi,bj,bk; int oi,oj,ok; fdct3d_position_aux(N1,N2,N3,b, xcur,ycur,zcur, bi,bj,bk,oi,oj,ok); CpxNumTns& Wblk = W.block(bi,bj,bk); Wblk(oi,oj,ok) += wpdata(tmpx,tmpy,tmpz) * bb * sqrt(1.0-ss*ss); } } //xcur } //if wcnt++; } } //end of face //face 4: -y,-z,-x for(int f=nd-1; f>=0; f--) { for(int h=nd-1; h>=0; h--) { if(crvowners[wcnt]==mpirank) { double ys = -R2; double ye = -R2/4+(W2/2)/4; double zs = -R3 + (2*h-1)*W3/2; double ze = -R3 + (2*h+3)*W3/2; double xs = -R1 + (2*f-1)*W1/2; double xe = -R1 + (2*f+3)*W1/2; int xn = int(ceil(xe-xs)); int yn = int(ceil(ye-ys)); int zn = int(ceil(ze-zs)); double thts, thtm, thte; //z to y if(h==0) { thts = atan2(-1.0, 1.0-1.0/nd); thtm = atan2(-1.0+1.0/nd, 1.0); thte = atan2(-1.0+3.0/nd, 1.0); } else if(h==nd-1) { thts = atan2(-1.0+(2.0*h-1.0)/nd, 1.0); thtm = atan2(-1.0+(2.0*h+1.0)/nd, 1.0); thte = atan2(1.0, 1.0-1.0/nd); } else { thts = atan2(-1.0+(2.0*h-1.0)/nd, 1.0); thtm = atan2(-1.0+(2.0*h+1.0)/nd, 1.0); thte = atan2(-1.0+(2.0*h+3.0)/nd, 1.0); } double phis, phim, phie; //z to x if(f==0) { phis = atan2(-1.0, 1.0-1.0/nd); phim = atan2(-1.0+1.0/nd, 1.0); phie = atan2(-1.0+3.0/nd, 1.0); } else if(f==nd-1) { phis = atan2(-1.0+(2.0*f-1.0)/nd, 1.0); phim = atan2(-1.0+(2.0*f+1.0)/nd, 1.0); phie = atan2(1.0, 1.0-1.0/nd); } else { phis = atan2(-1.0+(2.0*f-1.0)/nd, 1.0); phim = atan2(-1.0+(2.0*f+1.0)/nd, 1.0); phie = atan2(-1.0+(2.0*f+3.0)/nd, 1.0); } int xh = xn/2; int yh = yn/2; int zh = zn/2; double R32 = double(F3)/double(F2); double R12 = double(F1)/double(F2); CpxNumTns tpdata(xn,yn,zn); CpxNumTns& Cblk = C.block(s,wcnt); tpdata = Cblk; //fft fftwnd_plan p = NULL; map<inttriple, fftwnd_plan>::iterator mit = planmap.find( inttriple(xn, intpair(yn,zn)) ); if(mit!=planmap.end()) { p = (*mit).second; } else { p = fftw3d_create_plan(zn, yn, xn, FFTW_FORWARD, FFTW_ESTIMATE | FFTW_IN_PLACE); planmap[ inttriple(xn, intpair(yn,zn)) ] = p; } fftwnd_one(p, (fftw_complex*)tpdata.data(), NULL); //cerr<<"wedge s"<<endl; double sqrtprod = sqrt(double(xn*yn*zn)); for(int i=0; i<xn; i++) for(int j=0; j<yn; j++) for(int k=0; k<zn; k++) tpdata(i,j,k) /= sqrtprod; CpxOffTns wpdata(xn,yn,zn); fdct3d_fftshift(xn,yn,zn,tpdata,wpdata); for(int ycur=(int)ceil(ys); ycur<ye; ycur++) { int zfm = (int)ceil( max(-R3, R32*(-ycur)*tan(thts)) ); int zto = (int)floor( min(R3, R32*(-ycur)*tan(thte)) ); int xfm = (int)ceil( max(-R1, R12*(-ycur)*tan(phis)) ); int xto = (int)floor( min(R1, R12*(-ycur)*tan(phie)) ); for(int zcur=zfm; zcur<=zto; zcur++) for(int xcur=xfm; xcur<=xto; xcur++) { int tmpx = xcur%xn; if(tmpx<-xh) tmpx+=xn; if(tmpx>=-xh+xn) tmpx-=xn; int tmpy = ycur%yn; if(tmpy<-yh) tmpy+=yn; if(tmpy>=-yh+yn) tmpy-=yn; int tmpz = zcur%zn; if(tmpz<-zh) tmpz+=zn; if(tmpz>=-zh+zn) tmpz-=zn; double thtcur = atan2(zcur/R3, (-ycur)/R2); double phicur = atan2(xcur/R1, (-ycur)/R2); double glbpou; fdct3d_globalpou(thtcur, phicur, M_PI/4-atan2(1.0-1.0/nd, 1.0), glbpou); //CHECK double wtht; if(thtcur<thtm) { if(h==0) wtht = 1; else { double l,r; fdct3d_window( (thtcur-thts)/(thtm-thts), l, r); wtht = l; } } else { if(h==nd-1) wtht = 1; else { double l,r; fdct3d_window( (thtcur-thtm)/(thte-thtm), l, r); wtht = r; } } double wphi; if(phicur<phim) { if(f==0) wphi = 1; else { double l,r; fdct3d_window( (phicur-phis)/(phim-phis), l, r); wphi = l; } } else { if(f==nd-1) wphi = 1; else { double l,r; fdct3d_window( (phicur-phim)/(phie-phim), l, r); wphi = r; } } double pou = glbpou * wtht * wphi; wpdata(tmpx, tmpy, tmpz) *= pou; double ss = sma1(xcur)*sma2(ycur)*sma3(zcur); double bb = big1(xcur)*big2(ycur)*big3(zcur); int bi,bj,bk; int oi,oj,ok; fdct3d_position_aux(N1,N2,N3,b, xcur,ycur,zcur, bi,bj,bk,oi,oj,ok); CpxNumTns& Wblk = W.block(bi,bj,bk); Wblk(oi,oj,ok) += wpdata(tmpx,tmpy,tmpz) * bb * sqrt(1.0-ss*ss); } } //ycur }//if wcnt++; } }//end of face //face 5.-z,-x,-y for(int g=nd-1; g>=0; g--) { for(int f=nd-1; f>=0; f--) { if(crvowners[wcnt]==mpirank) { double zs = -R3; double ze = -R3/4+(W3/2)/4; double xs = -R1 + (2*f-1)*W1/2; double xe = -R1 + (2*f+3)*W1/2; double ys = -R2 + (2*g-1)*W2/2; double ye = -R2 + (2*g+3)*W2/2; int xn = int(ceil(xe-xs)); int yn = int(ceil(ye-ys)); int zn = int(ceil(ze-zs)); double thts, thtm, thte; //y to x if(f==0) { thts = atan2(-1.0, 1.0-1.0/nd); thtm = atan2(-1.0+1.0/nd, 1.0); thte = atan2(-1.0+3.0/nd, 1.0); } else if(f==nd-1) { thts = atan2(-1.0+(2.0*f-1.0)/nd, 1.0); thtm = atan2(-1.0+(2.0*f+1.0)/nd, 1.0); thte = atan2(1.0, 1.0-1.0/nd); } else { thts = atan2(-1.0+(2.0*f-1.0)/nd, 1.0); thtm = atan2(-1.0+(2.0*f+1.0)/nd, 1.0); thte = atan2(-1.0+(2.0*f+3.0)/nd, 1.0); } double phis, phim, phie; //z to x if(g==0) { phis = atan2(-1.0, 1.0-1.0/nd); phim = atan2(-1.0+1.0/nd, 1.0); phie = atan2(-1.0+3.0/nd, 1.0); } else if(g==nd-1) { phis = atan2(-1.0+(2.0*g-1.0)/nd, 1.0); phim = atan2(-1.0+(2.0*g+1.0)/nd, 1.0); phie = atan2(1.0, 1.0-1.0/nd); } else { phis = atan2(-1.0+(2.0*g-1.0)/nd, 1.0); phim = atan2(-1.0+(2.0*g+1.0)/nd, 1.0); phie = atan2(-1.0+(2.0*g+3.0)/nd, 1.0); } int xh = xn/2; int yh = yn/2; int zh = zn/2; double R13 = double(F1)/double(F3); double R23 = double(F2)/double(F3); CpxNumTns tpdata(xn,yn,zn); CpxNumTns& Cblk = C.block(s,wcnt); tpdata = Cblk; //fft fftwnd_plan p = NULL; map<inttriple, fftwnd_plan>::iterator mit = planmap.find( inttriple(xn, intpair(yn,zn)) ); if(mit!=planmap.end()) { p = (*mit).second; } else { p = fftw3d_create_plan(zn, yn, xn, FFTW_FORWARD, FFTW_ESTIMATE | FFTW_IN_PLACE); planmap[ inttriple(xn, intpair(yn,zn)) ] = p; } fftwnd_one(p, (fftw_complex*)tpdata.data(), NULL); //cerr<<"wedge s"<<endl; double sqrtprod = sqrt(double(xn*yn*zn)); for(int i=0; i<xn; i++) for(int j=0; j<yn; j++) for(int k=0; k<zn; k++) tpdata(i,j,k) /= sqrtprod; CpxOffTns wpdata(xn,yn,zn); fdct3d_fftshift(xn,yn,zn,tpdata,wpdata); for(int zcur=(int)ceil(zs); zcur<ze; zcur++) { int xfm = (int)ceil( max(-R1, R13*(-zcur)*tan(thts)) ); int xto = (int)floor( min(R1, R13*(-zcur)*tan(thte)) ); int yfm = (int)ceil( max(-R2, R23*(-zcur)*tan(phis)) ); int yto = (int)floor( min(R2, R23*(-zcur)*tan(phie)) ); for(int xcur=xfm; xcur<=xto; xcur++) for(int ycur=yfm; ycur<=yto; ycur++) { int tmpx = xcur%xn; if(tmpx<-xh) tmpx+=xn; if(tmpx>=-xh+xn) tmpx-=xn; int tmpy = ycur%yn; if(tmpy<-yh) tmpy+=yn; if(tmpy>=-yh+yn) tmpy-=yn; int tmpz = zcur%zn; if(tmpz<-zh) tmpz+=zn; if(tmpz>=-zh+zn) tmpz-=zn; double thtcur = atan2(xcur/R1, (-zcur)/R3); double phicur = atan2(ycur/R2, (-zcur)/R3); double glbpou; fdct3d_globalpou(thtcur, phicur, M_PI/4-atan2(1.0-1.0/nd, 1.0), glbpou); double wtht; if(thtcur<thtm) { if(f==0) wtht = 1; else { double l,r; fdct3d_window( (thtcur-thts)/(thtm-thts), l, r); wtht = l; } } else { if(f==nd-1) wtht = 1; else { double l,r; fdct3d_window( (thtcur-thtm)/(thte-thtm), l, r); wtht = r; } } double wphi; if(phicur<phim) { if(g==0) wphi = 1; else { double l,r; fdct3d_window( (phicur-phis)/(phim-phis), l, r); wphi = l; } } else { if(g==nd-1) wphi = 1; else { double l,r; fdct3d_window( (phicur-phim)/(phie-phim), l, r); wphi = r; } } double pou = glbpou * wtht * wphi; wpdata(tmpx, tmpy, tmpz) *= pou; double ss = sma1(xcur)*sma2(ycur)*sma3(zcur); double bb = big1(xcur)*big2(ycur)*big3(zcur); int bi,bj,bk; int oi,oj,ok; fdct3d_position_aux(N1,N2,N3,b, xcur,ycur,zcur, bi,bj,bk,oi,oj,ok); CpxNumTns& Wblk = W.block(bi,bj,bk); Wblk(oi,oj,ok) += wpdata(tmpx,tmpy,tmpz) * bb * sqrt(1.0-ss*ss); } }//zcur }//if wcnt++; } }//end of face iA(wcnt==nd*nd*nf); //remove plans for(map<inttriple, fftwnd_plan>::iterator mit=planmap.begin(); mit!=planmap.end(); mit++) { fftwnd_plan p = (*mit).second; fftwnd_destroy_plan(p); } return 0; }
int CpxCrvletPrtd::scatter(vector< vector<bool> >& newexists) { //LEXING: usually only called once vector< vector<int> >& c = _nx; //1. the global vector vector<int> glblszs(mpisize(), 0); int glbnum = 0; for(int s=0; s<c.size(); s++) for(int w=0; w<c[s].size(); w++) { int pi = _owners[s][w]; glblszs[pi] += _sizes[s][w]; glbnum += _sizes[s][w]; } vector<int> glbaccs(mpisize(), 0); int tmp = 0; for(int pi=0; pi<mpisize(); pi++) { glbaccs[pi] = tmp; tmp += glblszs[pi]; } vector< vector<int> > glbstts(c); //not cleared, but okay for(int s=0; s<c.size(); s++) for(int w=0; w<c[s].size(); w++) { int pi = _owners[s][w]; glbstts[s][w] = glbaccs[pi]; glbaccs[pi] += _sizes[s][w]; } int lclsum = 0; vector<int> l2gmap; for(int s=0; s<c.size(); s++) for(int w=0; w<c[s].size(); w++) { if(newexists[s][w]==true && _exists[s][w]==false) { lclsum += _sizes[s][w]; for(int g=0; g<_sizes[s][w]; g++) l2gmap.push_back( glbstts[s][w] + g ); } } iA(l2gmap.size()==lclsum); IS lclis; iC( ISCreateStride(PETSC_COMM_SELF, l2gmap.size(), 0, 1, &lclis) ); IS glbis; iC( ISCreateGeneral(PETSC_COMM_WORLD, l2gmap.size(), &(l2gmap[0]), &glbis) ); l2gmap.clear(); //SAVE SPACE //2. allocate a global vector, and copy data Vec glbvec; iC( VecCreateMPI(PETSC_COMM_WORLD, glblszs[mpirank()], PETSC_DETERMINE, &glbvec) ); double* glbarr; iC( VecGetArray(glbvec, &glbarr) ); double* glbptr = glbarr; for(int s=0; s<c.size(); s++) for(int w=0; w<c[s].size(); w++) { int pi = _owners[s][w]; if(pi==mpirank()) { double* tmpptr = (double*)(_blocks[s][w].data()); for(int g=0; g<_sizes[s][w]; g++) { *glbptr = tmpptr[g]; glbptr++; } } } iC( VecRestoreArray(glbvec, &glbarr) ); Vec lclvec; iC( VecCreateSeq(PETSC_COMM_SELF, lclsum, &lclvec) ); //3. vec scatter VecScatter sc; iC( VecScatterCreate(glbvec, glbis, lclvec, lclis, &sc) ); iC( ISDestroy(lclis) ); iC( ISDestroy(glbis) ); //SAVE SPACE iC( VecScatterBegin(glbvec, lclvec, INSERT_VALUES, SCATTER_FORWARD, sc) ); iC( VecScatterEnd( glbvec, lclvec, INSERT_VALUES, SCATTER_FORWARD, sc) ); iC( VecScatterDestroy(sc) ); //SAVE SPACE iC( VecDestroy(glbvec) ); //4. store double* lclarr; iC( VecGetArray(lclvec, &lclarr) ); double* lclptr = lclarr; for(int s=0; s<c.size(); s++) for(int w=0; w<c[s].size(); w++) { if(newexists[s][w]==true && _exists[s][w]==false) { _blocks[s][w].resize(_nx[s][w], _ny[s][w], _nz[s][w]); double* tmpptr = (double*)(_blocks[s][w].data()); for(int g=0; g<_sizes[s][w]; g++) { tmpptr[g] = *lclptr; lclptr++; } _exists[s][w] = true; //VERY IMPORTANT } } iC( VecRestoreArray(lclvec, &lclarr) ); iC( VecDestroy(lclvec) ); return 0; }
int CpxCrvletPrtd::combine() { //LEXING: usually only called once vector< vector<int> >& c = _nx; //1. the global vector vector<int> glblszs(mpisize(), 0); int glbnum = 0; for(int s=0; s<c.size(); s++) for(int w=0; w<c[s].size(); w++) { int pi = _owners[s][w]; glblszs[pi] += _sizes[s][w]; glbnum += _sizes[s][w]; } vector<int> glbaccs(mpisize(), 0); int tmp = 0; for(int pi=0; pi<mpisize(); pi++) { glbaccs[pi] = tmp; tmp += glblszs[pi]; } vector< vector<int> > glbstts(c); for(int s=0; s<c.size(); s++) for(int w=0; w<c[s].size(); w++) { int pi = _owners[s][w]; glbstts[s][w] = glbaccs[pi]; glbaccs[pi] += _sizes[s][w]; } int lclsum = 0; vector<int> l2gmap; for(int s=0; s<c.size(); s++) for(int w=0; w<c[s].size(); w++) { if(_exists[s][w]==true && _owners[s][w]!=mpirank()) { lclsum += _sizes[s][w]; for(int g=0; g<_sizes[s][w]; g++) l2gmap.push_back( glbstts[s][w] + g ); } } iA(l2gmap.size()==lclsum); IS lclis; iC( ISCreateStride(PETSC_COMM_SELF, l2gmap.size(), 0, 1, &lclis) ); IS glbis; iC( ISCreateGeneral(PETSC_COMM_WORLD, l2gmap.size(), &(l2gmap[0]), &glbis) ); l2gmap.clear(); //SAVE SPACE //2. allocate a global vector and a local vector, put data in local Vec glbvec; iC( VecCreateMPI(PETSC_COMM_WORLD, glblszs[mpirank()], PETSC_DETERMINE, &glbvec) ); Vec lclvec; iC( VecCreateSeq(PETSC_COMM_SELF, lclsum, &lclvec) ); double* lclarr; iC( VecGetArray(lclvec, &lclarr) ); double* lclptr = lclarr; for(int s=0; s<c.size(); s++) for(int w=0; w<c[s].size(); w++) { if(_exists[s][w]==true && _owners[s][w]!=mpirank()) { double* tmpptr = (double*)(_blocks[s][w].data()); for(int g=0; g<_sizes[s][w]; g++) { *lclptr = tmpptr[g]; lclptr++; } } } iC( VecRestoreArray(lclvec, &lclarr) ); //3. vec scatter VecScatter sc; iC( VecScatterCreate(glbvec, glbis, lclvec, lclis, &sc) ); iC( ISDestroy(lclis) ); iC( ISDestroy(glbis) ); //SAVE SPACE iC( VecScatterBegin(glbvec, lclvec, ADD_VALUES, SCATTER_REVERSE, sc) ); iC( VecScatterEnd( glbvec, lclvec, ADD_VALUES, SCATTER_REVERSE, sc) ); iC( VecScatterDestroy(sc) ); //SAVE SPACE iC( VecDestroy(lclvec) ); //4. store double* glbarr; iC( VecGetArray(glbvec, &glbarr) ); double* glbptr = glbarr; for(int s=0; s<c.size(); s++) for(int w=0; w<c[s].size(); w++) { int pi = _owners[s][w]; if(pi==mpirank()) { double* tmpptr = (double*)(_blocks[s][w].data()); for(int g=0; g<_sizes[s][w]; g++) { tmpptr[g] += *glbptr; glbptr++; //LEXING: += is very important } } } iC( VecRestoreArray(glbvec, &glbarr) ); iC( VecDestroy(glbvec) ); //IMPORTANT for(int s=0; s<c.size(); s++) for(int w=0; w<c[s].size(); w++) { if(_owners[s][w]!=mpirank()) { _blocks[s][w].resize(0,0,0); _exists[s][w] = false; } } return 0; }