Ejemplo n.º 1
0
//---------------------------------------------
int CpxCrvletPrtd::check()
{
  vector< vector<int> >& c = _nx;
  for(int s=0; s<c.size(); s++)
	 for(int w=0; w<c[s].size(); w++)
		if(_exists[s][w]==true) {
		  CpxNumTns& tmp = _blocks[s][w];
		  iA( tmp.m()==_nx[s][w] && tmp.n()==_ny[s][w] && tmp.p()==_nz[s][w] );
		}
  return 0;
}
Ejemplo n.º 2
0
int CpxCrvletPrtd::shift(vector< vector<int> >& newowners)
{
  vector< vector<int> >& c = _nx;
  for(int s=0; s<c.size(); s++)	 for(int w=0; w<c[s].size(); w++) {
	 if(newowners[s][w]==mpirank()) {
		iA(_exists[s][w]==true);
	 }
  }
  _owners = newowners;
  return 0;
}
Ejemplo n.º 3
0
main()
{
    static int ia[10] = { 12,7,14,9,128,17,6,3,27,5 };

    Array_RC<int> iA( ia,10 );

    cout << "class template instantiation Array_RC<int>"
 	 << endl;

    try_array( iA );

    return 0;
}
Ejemplo n.º 4
0
// ---------------------------------------------------------------------- 
int Dense3d::evaluate(const DblNumVec& srcDen, DblNumVec& trgVal) 
{
  //-----------------------------------
  iA(srcDen.m()==srcDOF()*(*_srcPos).n());  iA(trgVal.m()==trgDOF()*(*_trgPos).n());

  int dim  = this->dim();
  int srcDOF = this->srcDOF();
  int trgDOF = this->trgDOF();
  /* Number of sources */
  int numSrc = (*_srcPos).n();
  /* Number of targets */
  int numTrg = (*_trgPos).n();

  DblNumMat inter(trgDOF, numSrc*srcDOF);
  for(int i=0; i<numTrg; i++) {
    DblNumMat onePosMat(dim, 1, false, (*_trgPos).clmdata(i));
    DblNumVec oneValVec(trgDOF, false, trgVal.data()+trgDOF*i);
    iC( _knl.kernel((*_srcPos), (*_srcNor), onePosMat, inter) );
    iC( dgemv(1.0, inter, srcDen, 0.0, oneValVec) );
  }

  return 0;
}
Ejemplo n.º 5
0
main()
{
    static int ia[ 10 ] = { 12,7,14,9,128,17,6,3,27,5 };
    static string sa[ 7 ] = {
  	"Eeyore", "Pooh", "Tigger",
  	"Piglet", "Owl", "Gopher", "Heffalump"
    };

    Array_Sort<int> iA( ia,10 );
    Array_Sort<string> SA( sa,7 );

    cout << "class template instantiation Array_Sort<int>" << endl;
    try_array( iA );

    cout << "class template instantiation Array_Sort<string>" << endl;
    try_array( SA );

    return 0;
}
Ejemplo n.º 6
0
// ----------------------------------------------------------------------
int Dense3d::setup(map<string,string>&)
{
  iA(_srcPos!=NULL && _srcNor!=NULL && _trgPos!=NULL);
  iA((*_srcPos).m()==dim() && (*_trgPos).m()==dim());  //nothing to do
  return 0;
}
Ejemplo n.º 7
0
//------------------------------------------------------------------------------------
int fdct3d_inverse_angles(int N1,int N2,int N3,int b, double L1,double L2,double L3, int s,int nd,
							  CpxCrvletPrtd& C, CpxNumTnsBlkd& W)
{
  int mpirank;  MPI_Comm_rank(MPI_COMM_WORLD, &mpirank);
  int mpisize;  MPI_Comm_size(MPI_COMM_WORLD, &mpisize);
  vector< vector<int> >& Cowners = C.owners();
  vector<int>& crvowners = Cowners[s]; //LEXING: the owner information for wedges in scale s
  
  int nf = 6;
  int wcnt = 0;
  int S1, S2, S3;	 int F1, F2, F3;	 double R1, R2, R3;	 fdct3d_rangecompute(L1, L2, L3, S1, S2, S3, F1, F2, F3, R1, R2, R3);
  DblOffVec big1(S1);  fdct3d_lowpass(L1, big1);
  DblOffVec big2(S2);  fdct3d_lowpass(L2, big2);
  DblOffVec big3(S3);  fdct3d_lowpass(L3, big3);
  
  double Lh1 = L1/2;  double Lh2 = L2/2;  double Lh3 = L3/2;
  int Sh1, Sh2, Sh3;	 int Fh1, Fh2, Fh3;	 double Rh1, Rh2, Rh3;	 fdct3d_rangecompute(Lh1, Lh2, Lh3, Sh1, Sh2, Sh3, Fh1, Fh2, Fh3, Rh1, Rh2, Rh3);
  DblOffVec sma1(S1);  fdct3d_lowpass(Lh1, sma1);
  DblOffVec sma2(S2);  fdct3d_lowpass(Lh2, sma2);
  DblOffVec sma3(S3);  fdct3d_lowpass(Lh3, sma3);
  
  double W1 = L1/nd;  double W2 = L2/nd;  double W3 = L3/nd;
  
  typedef pair<int,int> intpair;  typedef pair<int, intpair> inttriple;
  map<inttriple, fftwnd_plan> planmap;

  //face 0: x,y,z
  for(int h=0; h<nd; h++) { //(y first z second)
	 for(int g=0; g<nd; g++) {
		if(crvowners[wcnt]==mpirank) {
		  double xs = R1/4-(W1/2)/4;		double xe = R1;
		  double ys = -R2 + (2*g-1)*W2/2;		double ye = -R2 + (2*g+3)*W2/2;
		  double zs = -R3 + (2*h-1)*W3/2;		double ze = -R3 + (2*h+3)*W3/2;
		  int xn = int(ceil(xe-xs));		  int yn = int(ceil(ye-ys));		  int zn = int(ceil(ze-zs));
		  double thts, thtm, thte; //y to x
		  if(g==0) {
			 thts = atan2(-1.0, 1.0-1.0/nd);			 thtm = atan2(-1.0+1.0/nd, 1.0);			 thte = atan2(-1.0+3.0/nd, 1.0);
		  } else if(g==nd-1) {
			 thts = atan2(-1.0+(2.0*g-1.0)/nd, 1.0);			 thtm = atan2(-1.0+(2.0*g+1.0)/nd, 1.0);			 thte = atan2(1.0, 1.0-1.0/nd);
		  } else {
			 thts = atan2(-1.0+(2.0*g-1.0)/nd, 1.0);			 thtm = atan2(-1.0+(2.0*g+1.0)/nd, 1.0);			 thte = atan2(-1.0+(2.0*g+3.0)/nd, 1.0);
		  }
		  double phis, phim, phie; //z to x
		  if(h==0) {
			 phis = atan2(-1.0, 1.0-1.0/nd);			 phim = atan2(-1.0+1.0/nd, 1.0);			 phie = atan2(-1.0+3.0/nd, 1.0);
		  } else if(h==nd-1) {
			 phis = atan2(-1.0+(2.0*h-1.0)/nd, 1.0);			 phim = atan2(-1.0+(2.0*h+1.0)/nd, 1.0);			 phie = atan2(1.0, 1.0-1.0/nd);
		  } else {
			 phis = atan2(-1.0+(2.0*h-1.0)/nd, 1.0);			 phim = atan2(-1.0+(2.0*h+1.0)/nd, 1.0);			 phie = atan2(-1.0+(2.0*h+3.0)/nd, 1.0);
		  }
		  int xh = xn/2;		  int yh = yn/2;		  int zh = zn/2; //half
		  double R21 = R2/R1;		  double R31 = R3/R1;
		  
		  CpxNumTns tpdata(xn,yn,zn);
		  CpxNumTns& Cblk = C.block(s,wcnt);
		  tpdata = Cblk;
		  //fft
		  fftwnd_plan p = NULL;
		  map<inttriple, fftwnd_plan>::iterator mit = planmap.find( inttriple(xn, intpair(yn,zn)) );
		  if(mit!=planmap.end()) {			 p = (*mit).second;
		  } else {
			 p = fftw3d_create_plan(zn, yn, xn, FFTW_FORWARD, FFTW_ESTIMATE | FFTW_IN_PLACE);
			 planmap[ inttriple(xn, intpair(yn,zn)) ] = p;
		  }
		  fftwnd_one(p, (fftw_complex*)tpdata.data(), NULL);		  //cerr<<"wedge s"<<endl;
		  double sqrtprod = sqrt(double(xn*yn*zn));
		  for(int i=0; i<xn; i++)			 for(int j=0; j<yn; j++)				for(int k=0; k<zn; k++)				  tpdata(i,j,k) /= sqrtprod;
		  CpxOffTns wpdata(xn,yn,zn);
		  fdct3d_fftshift(xn,yn,zn,tpdata,wpdata);
		  
		  for(int xcur=(int)ceil(xs); xcur<xe; xcur++) {
			 int yfm = (int)ceil( max(-R2, R21*xcur*tan(thts)) );
			 int yto = (int)floor( min(R2, R21*xcur*tan(thte)) );
			 int zfm = (int)ceil( max(-R3, R31*xcur*tan(phis)) );
			 int zto = (int)floor( min(R3, R31*xcur*tan(phie)) );
			 for(int ycur=yfm; ycur<=yto; ycur++)
				for(int zcur=zfm; zcur<=zto; zcur++) {
				  int tmpx = xcur%xn;				  if(tmpx<-xh) tmpx+=xn;				  if(tmpx>=-xh+xn) tmpx-=xn;
				  int tmpy = ycur%yn;				  if(tmpy<-yh) tmpy+=yn;				  if(tmpy>=-yh+yn) tmpy-=yn;
				  int tmpz = zcur%zn;				  if(tmpz<-zh) tmpz+=zn;				  if(tmpz>=-zh+zn) tmpz-=zn;
				  
				  double thtcur = atan2(ycur/R2, xcur/R1);
				  double phicur = atan2(zcur/R3, xcur/R1);
				  double glbpou;					 fdct3d_globalpou(thtcur, phicur, M_PI/4-atan2(1.0-1.0/nd, 1.0), glbpou);
				  double wtht;
				  if(thtcur<thtm) {
					 if(g==0)						wtht = 1;
					 else {						double l,r;						fdct3d_window( (thtcur-thts)/(thtm-thts), l, r);						wtht = l;					 }
				  } else {
					 if(g==nd-1)						wtht = 1;
					 else {						double l,r;						  fdct3d_window( (thtcur-thtm)/(thte-thtm), l, r);						wtht = r;					 }
				  }
				  double wphi;
				  if(phicur<phim) {
					 if(h==0)						wphi = 1;
					 else {						double l,r;						  fdct3d_window( (phicur-phis)/(phim-phis), l, r);						wphi = l;					 }
				  } else {
					 if(h==nd-1)						wphi = 1;
					 else {						double l,r;						  fdct3d_window( (phicur-phim)/(phie-phim), l, r);						wphi = r;					 }
				  }
				  double pou = glbpou * wtht * wphi;
				  wpdata(tmpx, tmpy, tmpz) *= pou;
				  
				  double ss = sma1(xcur)*sma2(ycur)*sma3(zcur);				  double bb = big1(xcur)*big2(ycur)*big3(zcur);
				  int bi,bj,bk;			 int oi,oj,ok;			 fdct3d_position_aux(N1,N2,N3,b, xcur,ycur,zcur, bi,bj,bk,oi,oj,ok);
				  CpxNumTns& Wblk = W.block(bi,bj,bk);
				  Wblk(oi,oj,ok) += wpdata(tmpx,tmpy,tmpz)  * bb * sqrt(1.0-ss*ss);
				}
		  } //xcur
		} //if
		wcnt++;
	 }
  } //end of face
  //face 1. y z x
  for(int f=0; f<nd; f++) {
	 for(int h=0; h<nd; h++) {
		if(crvowners[wcnt]==mpirank) {
		  double ys = R2/4-(W2/2)/4;		  double ye = R2;
		  double zs = -R3 + (2*h-1)*W3/2;		  double ze = -R3 + (2*h+3)*W3/2;
		  double xs = -R1 + (2*f-1)*W1/2;		  double xe = -R1 + (2*f+3)*W1/2;
		  int xn = int(ceil(xe-xs));		  int yn = int(ceil(ye-ys));		  int zn = int(ceil(ze-zs));
		  double thts, thtm, thte; //z to y
		  if(h==0) {
			 thts = atan2(-1.0, 1.0-1.0/nd);			 thtm = atan2(-1.0+1.0/nd, 1.0);			 thte = atan2(-1.0+3.0/nd, 1.0);
		  } else if(h==nd-1) {
			 thts = atan2(-1.0+(2.0*h-1.0)/nd, 1.0);			 thtm = atan2(-1.0+(2.0*h+1.0)/nd, 1.0);			 thte = atan2(1.0, 1.0-1.0/nd);
		  } else {
			 thts = atan2(-1.0+(2.0*h-1.0)/nd, 1.0);			 thtm = atan2(-1.0+(2.0*h+1.0)/nd, 1.0);			 thte = atan2(-1.0+(2.0*h+3.0)/nd, 1.0);
		  }
		  double phis, phim, phie; //z to x
		  if(f==0) {
			 phis = atan2(-1.0, 1.0-1.0/nd);			 phim = atan2(-1.0+1.0/nd, 1.0);			 phie = atan2(-1.0+3.0/nd, 1.0);
		  } else if(f==nd-1) {
			 phis = atan2(-1.0+(2.0*f-1.0)/nd, 1.0);			 phim = atan2(-1.0+(2.0*f+1.0)/nd, 1.0);			 phie = atan2(1.0, 1.0-1.0/nd);
		  } else {
			 phis = atan2(-1.0+(2.0*f-1.0)/nd, 1.0);			 phim = atan2(-1.0+(2.0*f+1.0)/nd, 1.0);			 phie = atan2(-1.0+(2.0*f+3.0)/nd, 1.0);
		  }
		  int xh = xn/2;		  int yh = yn/2;		  int zh = zn/2;
		  double R32 = R3/R2;		  double R12 = R1/R2;
		  
		  CpxNumTns tpdata(xn,yn,zn);
		  CpxNumTns& Cblk = C.block(s,wcnt);
		  tpdata = Cblk;
		  //fft
		  fftwnd_plan p = NULL;
		  map<inttriple, fftwnd_plan>::iterator mit = planmap.find( inttriple(xn, intpair(yn,zn)) );
		  if(mit!=planmap.end()) {			 p = (*mit).second;
		  } else {
			 p = fftw3d_create_plan(zn, yn, xn, FFTW_FORWARD, FFTW_ESTIMATE | FFTW_IN_PLACE);
			 planmap[ inttriple(xn, intpair(yn,zn)) ] = p;
		  }
		  fftwnd_one(p, (fftw_complex*)tpdata.data(), NULL);		  //cerr<<"wedge s"<<endl;
		  double sqrtprod = sqrt(double(xn*yn*zn));
		  for(int i=0; i<xn; i++)			 for(int j=0; j<yn; j++)				for(int k=0; k<zn; k++)				  tpdata(i,j,k) /= sqrtprod;
		  CpxOffTns wpdata(xn,yn,zn);
		  fdct3d_fftshift(xn,yn,zn,tpdata,wpdata);

		  for(int ycur=(int)ceil(ys); ycur<ye; ycur++) {
			 int zfm = (int)ceil( max(-R3, R32*ycur*tan(thts)) );
			 int zto = (int)floor( min(R3, R32*ycur*tan(thte)) );
			 int xfm = (int)ceil( max(-R1, R12*ycur*tan(phis)) );
			 int xto = (int)floor( min(R1, R12*ycur*tan(phie)) );
			 for(int zcur=zfm; zcur<=zto; zcur++)
				for(int xcur=xfm; xcur<=xto; xcur++) {
				  int tmpx = xcur%xn;				  if(tmpx<-xh) tmpx+=xn;				  if(tmpx>=-xh+xn) tmpx-=xn;
				  int tmpy = ycur%yn;				  if(tmpy<-yh) tmpy+=yn;				  if(tmpy>=-yh+yn) tmpy-=yn;
				  int tmpz = zcur%zn;				  if(tmpz<-zh) tmpz+=zn;				  if(tmpz>=-zh+zn) tmpz-=zn;
				  
				  double thtcur = atan2(zcur/R3, ycur/R2);
				  double phicur = atan2(xcur/R1, ycur/R2);
				  double glbpou;					 fdct3d_globalpou(thtcur, phicur, M_PI/4-atan2(1.0-1.0/nd, 1.0), glbpou); //CHECK
				  double wtht;
				  if(thtcur<thtm) {
					 if(h==0)						wtht = 1;
					 else {						double l,r;						fdct3d_window( (thtcur-thts)/(thtm-thts), l, r);						wtht = l;					 }
				  } else {
					 if(h==nd-1)						wtht = 1;
					 else {						double l,r;						  fdct3d_window( (thtcur-thtm)/(thte-thtm), l, r);						wtht = r;					 }
				  }
				  double wphi;
				  if(phicur<phim) {
					 if(f==0)						wphi = 1;
					 else {						double l,r;						  fdct3d_window( (phicur-phis)/(phim-phis), l, r);						wphi = l;					 }
				  } else {
					 if(f==nd-1)						wphi = 1;
					 else {						double l,r;						  fdct3d_window( (phicur-phim)/(phie-phim), l, r);						wphi = r;					 }
				  }
				  double pou = glbpou * wtht * wphi;
				  wpdata(tmpx, tmpy, tmpz) *= pou;
				  
				  double ss = sma1(xcur)*sma2(ycur)*sma3(zcur);				  double bb = big1(xcur)*big2(ycur)*big3(zcur);
				  int bi,bj,bk;			 int oi,oj,ok;			 fdct3d_position_aux(N1,N2,N3,b, xcur,ycur,zcur, bi,bj,bk,oi,oj,ok);
				  CpxNumTns& Wblk = W.block(bi,bj,bk);
				  Wblk(oi,oj,ok) += wpdata(tmpx,tmpy,tmpz)  * bb * sqrt(1.0-ss*ss);
				}
		  } //ycur
		}//if
		wcnt++;
	 }
  }//end of face
  //face 2. z x y
  for(int g=0; g<nd; g++) {
	 for(int f=0; f<nd; f++) {
		if(crvowners[wcnt]==mpirank) {
		  double zs = R3/4-(W3/2)/4;		double ze = R3;
		  double xs = -R1 + (2*f-1)*W1/2;		double xe = -R1 + (2*f+3)*W1/2;
		  double ys = -R2 + (2*g-1)*W2/2;		double ye = -R2 + (2*g+3)*W2/2;
		  int xn = int(ceil(xe-xs));		  int yn = int(ceil(ye-ys));		  int zn = int(ceil(ze-zs));
		  double thts, thtm, thte; //y to x
		  if(f==0) {
			 thts = atan2(-1.0, 1.0-1.0/nd);			 thtm = atan2(-1.0+1.0/nd, 1.0);			 thte = atan2(-1.0+3.0/nd, 1.0);
		  } else if(f==nd-1) {
			 thts = atan2(-1.0+(2.0*f-1.0)/nd, 1.0);			 thtm = atan2(-1.0+(2.0*f+1.0)/nd, 1.0);			 thte = atan2(1.0, 1.0-1.0/nd);
		  } else {
			 thts = atan2(-1.0+(2.0*f-1.0)/nd, 1.0);			 thtm = atan2(-1.0+(2.0*f+1.0)/nd, 1.0);			 thte = atan2(-1.0+(2.0*f+3.0)/nd, 1.0);
		  }
		  double phis, phim, phie; //z to x
		  if(g==0) {
			 phis = atan2(-1.0, 1.0-1.0/nd);			 phim = atan2(-1.0+1.0/nd, 1.0);			 phie = atan2(-1.0+3.0/nd, 1.0);
		  } else if(g==nd-1) {
			 phis = atan2(-1.0+(2.0*g-1.0)/nd, 1.0);			 phim = atan2(-1.0+(2.0*g+1.0)/nd, 1.0);			 phie = atan2(1.0, 1.0-1.0/nd);
		  } else {
			 phis = atan2(-1.0+(2.0*g-1.0)/nd, 1.0);			 phim = atan2(-1.0+(2.0*g+1.0)/nd, 1.0);			 phie = atan2(-1.0+(2.0*g+3.0)/nd, 1.0);
		  }
		  int xh = xn/2;		  int yh = yn/2;		  int zh = zn/2;
		  double R13 = double(F1)/double(F3);		  double R23 = double(F2)/double(F3);

		  CpxNumTns tpdata(xn,yn,zn);
		  CpxNumTns& Cblk = C.block(s,wcnt);
		  tpdata = Cblk;
		  //fft
		  fftwnd_plan p = NULL;
		  map<inttriple, fftwnd_plan>::iterator mit = planmap.find( inttriple(xn, intpair(yn,zn)) );
		  if(mit!=planmap.end()) {			 p = (*mit).second;
		  } else {
			 p = fftw3d_create_plan(zn, yn, xn, FFTW_FORWARD, FFTW_ESTIMATE | FFTW_IN_PLACE);
			 planmap[ inttriple(xn, intpair(yn,zn)) ] = p;
		  }
		  fftwnd_one(p, (fftw_complex*)tpdata.data(), NULL);		  //cerr<<"wedge s"<<endl;
		  double sqrtprod = sqrt(double(xn*yn*zn));
		  for(int i=0; i<xn; i++)			 for(int j=0; j<yn; j++)				for(int k=0; k<zn; k++)				  tpdata(i,j,k) /= sqrtprod;
		  CpxOffTns wpdata(xn,yn,zn);
		  fdct3d_fftshift(xn,yn,zn,tpdata,wpdata);
		  
		  for(int zcur=(int)ceil(zs); zcur<ze; zcur++) {
			 int xfm = (int)ceil( max(-R1, R13*zcur*tan(thts)) );
			 int xto = (int)floor( min(R1, R13*zcur*tan(thte)) );
			 int yfm = (int)ceil( max(-R2, R23*zcur*tan(phis)) );
			 int yto = (int)floor( min(R2, R23*zcur*tan(phie)) );
			 for(int xcur=xfm; xcur<=xto; xcur++)
				for(int ycur=yfm; ycur<=yto; ycur++) {
				  int tmpx = xcur%xn;				  if(tmpx<-xh) tmpx+=xn;				  if(tmpx>=-xh+xn) tmpx-=xn;
				  int tmpy = ycur%yn;				  if(tmpy<-yh) tmpy+=yn;				  if(tmpy>=-yh+yn) tmpy-=yn;
				  int tmpz = zcur%zn;				  if(tmpz<-zh) tmpz+=zn;				  if(tmpz>=-zh+zn) tmpz-=zn;
				  
				  double thtcur = atan2(xcur/R1, zcur/R3);
				  double phicur = atan2(ycur/R2, zcur/R3);
				  double glbpou;					 fdct3d_globalpou(thtcur, phicur, M_PI/4-atan2(1.0-1.0/nd, 1.0), glbpou);
				  double wtht;
				  if(thtcur<thtm) {
					 if(f==0)						wtht = 1;
					 else {						double l,r;						fdct3d_window( (thtcur-thts)/(thtm-thts), l, r);						wtht = l;					 }
				  } else {
					 if(f==nd-1)						wtht = 1;
					 else {						double l,r;						  fdct3d_window( (thtcur-thtm)/(thte-thtm), l, r);						wtht = r;					 }
				  }
				  double wphi;
				  if(phicur<phim) {
					 if(g==0)						wphi = 1;
					 else {						double l,r;						  fdct3d_window( (phicur-phis)/(phim-phis), l, r);						wphi = l;					 }
				  } else {
					 if(g==nd-1)						wphi = 1;
					 else {						double l,r;						  fdct3d_window( (phicur-phim)/(phie-phim), l, r);						wphi = r;					 }
				  }
				  double pou = glbpou * wtht * wphi;
				  wpdata(tmpx, tmpy, tmpz) *= pou;
				  
				  double ss = sma1(xcur)*sma2(ycur)*sma3(zcur);				  double bb = big1(xcur)*big2(ycur)*big3(zcur);
				  int bi,bj,bk;			 int oi,oj,ok;			 fdct3d_position_aux(N1,N2,N3,b, xcur,ycur,zcur, bi,bj,bk,oi,oj,ok);
				  CpxNumTns& Wblk = W.block(bi,bj,bk);
				  Wblk(oi,oj,ok) += wpdata(tmpx,tmpy,tmpz)  * bb * sqrt(1.0-ss*ss);
				}
		  }//zcur
		}//if
		wcnt++;
	 }
  }//end of face
  //face 3: -x,-y,-z
  for(int h=nd-1; h>=0; h--) {
	 for(int g=nd-1; g>=0; g--) {
		if(crvowners[wcnt]==mpirank) {
		  double xs = -R1;		  double xe = -R1/4+(W1/2)/4;
		  double ys = -R2 + (2*g-1)*W2/2;		double ye = -R2 + (2*g+3)*W2/2;
		  double zs = -R3 + (2*h-1)*W3/2;		double ze = -R3 + (2*h+3)*W3/2;
		  int xn = int(ceil(xe-xs));		  int yn = int(ceil(ye-ys));		  int zn = int(ceil(ze-zs));
		  double thts, thtm, thte; //y to x
		  if(g==0) {
			 thts = atan2(-1.0, 1.0-1.0/nd);			 thtm = atan2(-1.0+1.0/nd, 1.0);			 thte = atan2(-1.0+3.0/nd, 1.0);
		  } else if(g==nd-1) {
			 thts = atan2(-1.0+(2.0*g-1.0)/nd, 1.0);			 thtm = atan2(-1.0+(2.0*g+1.0)/nd, 1.0);			 thte = atan2(1.0, 1.0-1.0/nd);
		  } else {
			 thts = atan2(-1.0+(2.0*g-1.0)/nd, 1.0);			 thtm = atan2(-1.0+(2.0*g+1.0)/nd, 1.0);			 thte = atan2(-1.0+(2.0*g+3.0)/nd, 1.0);
		  }
		  double phis, phim, phie; //z to x
		  if(h==0) {
			 phis = atan2(-1.0, 1.0-1.0/nd);			 phim = atan2(-1.0+1.0/nd, 1.0);			 phie = atan2(-1.0+3.0/nd, 1.0);
		  } else if(h==nd-1) {
			 phis = atan2(-1.0+(2.0*h-1.0)/nd, 1.0);			 phim = atan2(-1.0+(2.0*h+1.0)/nd, 1.0);			 phie = atan2(1.0, 1.0-1.0/nd);
		  } else {
			 phis = atan2(-1.0+(2.0*h-1.0)/nd, 1.0);			 phim = atan2(-1.0+(2.0*h+1.0)/nd, 1.0);			 phie = atan2(-1.0+(2.0*h+3.0)/nd, 1.0);
		  }
		  int xh = xn/2;		  int yh = yn/2;		  int zh = zn/2;
		  double R21 = R2/R1;		  double R31 = R3/R1;
		  
		  CpxNumTns tpdata(xn,yn,zn);
		  CpxNumTns& Cblk = C.block(s,wcnt);
		  tpdata = Cblk;
		  //fft
		  fftwnd_plan p = NULL;
		  map<inttriple, fftwnd_plan>::iterator mit = planmap.find( inttriple(xn, intpair(yn,zn)) );
		  if(mit!=planmap.end()) {			 p = (*mit).second;
		  } else {
			 p = fftw3d_create_plan(zn, yn, xn, FFTW_FORWARD, FFTW_ESTIMATE | FFTW_IN_PLACE);
			 planmap[ inttriple(xn, intpair(yn,zn)) ] = p;
		  }
		  fftwnd_one(p, (fftw_complex*)tpdata.data(), NULL);		  //cerr<<"wedge s"<<endl;
		  double sqrtprod = sqrt(double(xn*yn*zn));
		  for(int i=0; i<xn; i++)			 for(int j=0; j<yn; j++)				for(int k=0; k<zn; k++)				  tpdata(i,j,k) /= sqrtprod;
		  CpxOffTns wpdata(xn,yn,zn);
		  fdct3d_fftshift(xn,yn,zn,tpdata,wpdata);
		  
		  for(int xcur=(int)ceil(xs); xcur<xe; xcur++) {
			 int yfm = (int)ceil( max(-R2, R21*(-xcur)*tan(thts)) );
			 int yto = (int)floor( min(R2, R21*(-xcur)*tan(thte)) );
			 int zfm = (int)ceil( max(-R3, R31*(-xcur)*tan(phis)) );
			 int zto = (int)floor( min(R3, R31*(-xcur)*tan(phie)) );
			 for(int ycur=yfm; ycur<=yto; ycur++)
				for(int zcur=zfm; zcur<=zto; zcur++) {
				  int tmpx = xcur%xn;				  if(tmpx<-xh) tmpx+=xn;				  if(tmpx>=-xh+xn) tmpx-=xn;
				  int tmpy = ycur%yn;				  if(tmpy<-yh) tmpy+=yn;				  if(tmpy>=-yh+yn) tmpy-=yn;
				  int tmpz = zcur%zn;				  if(tmpz<-zh) tmpz+=zn;				  if(tmpz>=-zh+zn) tmpz-=zn;
				  
				  double thtcur = atan2(ycur/R2, (-xcur)/R1);
				  double phicur = atan2(zcur/R3, (-xcur)/R1);
				  double glbpou;					 fdct3d_globalpou(thtcur, phicur, M_PI/4-atan2(1.0-1.0/nd, 1.0), glbpou);
				  double wtht;
				  if(thtcur<thtm) {
					 if(g==0)						wtht = 1;
					 else {						double l,r;						fdct3d_window( (thtcur-thts)/(thtm-thts), l, r);						wtht = l;					 }
				  } else {
					 if(g==nd-1)						wtht = 1;
					 else {						double l,r;						  fdct3d_window( (thtcur-thtm)/(thte-thtm), l, r);						wtht = r;					 }
				  }
				  double wphi;
				  if(phicur<phim) {
					 if(h==0)						wphi = 1;
					 else {						double l,r;						  fdct3d_window( (phicur-phis)/(phim-phis), l, r);						wphi = l;					 }
				  } else {
					 if(h==nd-1)						wphi = 1;
					 else {						double l,r;						  fdct3d_window( (phicur-phim)/(phie-phim), l, r);						wphi = r;					 }
				  }
				  double pou = glbpou * wtht * wphi;
				  wpdata(tmpx, tmpy, tmpz) *= pou;
				  
				  double ss = sma1(xcur)*sma2(ycur)*sma3(zcur);				  double bb = big1(xcur)*big2(ycur)*big3(zcur);
				  int bi,bj,bk;			 int oi,oj,ok;			 fdct3d_position_aux(N1,N2,N3,b, xcur,ycur,zcur, bi,bj,bk,oi,oj,ok);
				  CpxNumTns& Wblk = W.block(bi,bj,bk);
				  Wblk(oi,oj,ok) += wpdata(tmpx,tmpy,tmpz)  * bb * sqrt(1.0-ss*ss);
				}
		  } //xcur
		} //if
		wcnt++;
	 }
  } //end of face
  //face 4: -y,-z,-x
  for(int f=nd-1; f>=0; f--) {
	 for(int h=nd-1; h>=0; h--) {
		if(crvowners[wcnt]==mpirank) {
		  double ys = -R2;		  double ye = -R2/4+(W2/2)/4;
		  double zs = -R3 + (2*h-1)*W3/2;		  double ze = -R3 + (2*h+3)*W3/2;
		  double xs = -R1 + (2*f-1)*W1/2;		  double xe = -R1 + (2*f+3)*W1/2;
		  int xn = int(ceil(xe-xs));		  int yn = int(ceil(ye-ys));		  int zn = int(ceil(ze-zs));
		  double thts, thtm, thte; //z to y
		  if(h==0) {
			 thts = atan2(-1.0, 1.0-1.0/nd);			 thtm = atan2(-1.0+1.0/nd, 1.0);			 thte = atan2(-1.0+3.0/nd, 1.0);
		  } else if(h==nd-1) {
			 thts = atan2(-1.0+(2.0*h-1.0)/nd, 1.0);			 thtm = atan2(-1.0+(2.0*h+1.0)/nd, 1.0);			 thte = atan2(1.0, 1.0-1.0/nd);
		  } else {
			 thts = atan2(-1.0+(2.0*h-1.0)/nd, 1.0);			 thtm = atan2(-1.0+(2.0*h+1.0)/nd, 1.0);			 thte = atan2(-1.0+(2.0*h+3.0)/nd, 1.0);
		  }
		  double phis, phim, phie; //z to x
		  if(f==0) {
			 phis = atan2(-1.0, 1.0-1.0/nd);			 phim = atan2(-1.0+1.0/nd, 1.0);			 phie = atan2(-1.0+3.0/nd, 1.0);
		  } else if(f==nd-1) {
			 phis = atan2(-1.0+(2.0*f-1.0)/nd, 1.0);			 phim = atan2(-1.0+(2.0*f+1.0)/nd, 1.0);			 phie = atan2(1.0, 1.0-1.0/nd);
		  } else {
			 phis = atan2(-1.0+(2.0*f-1.0)/nd, 1.0);			 phim = atan2(-1.0+(2.0*f+1.0)/nd, 1.0);			 phie = atan2(-1.0+(2.0*f+3.0)/nd, 1.0);
		  }
		  int xh = xn/2;		  int yh = yn/2;		  int zh = zn/2;
		  double R32 = double(F3)/double(F2);		  double R12 = double(F1)/double(F2);

		  CpxNumTns tpdata(xn,yn,zn);
		  CpxNumTns& Cblk = C.block(s,wcnt);
		  tpdata = Cblk;
		  //fft
		  fftwnd_plan p = NULL;
		  map<inttriple, fftwnd_plan>::iterator mit = planmap.find( inttriple(xn, intpair(yn,zn)) );
		  if(mit!=planmap.end()) {			 p = (*mit).second;
		  } else {
			 p = fftw3d_create_plan(zn, yn, xn, FFTW_FORWARD, FFTW_ESTIMATE | FFTW_IN_PLACE);
			 planmap[ inttriple(xn, intpair(yn,zn)) ] = p;
		  }
		  fftwnd_one(p, (fftw_complex*)tpdata.data(), NULL);		  //cerr<<"wedge s"<<endl;
		  double sqrtprod = sqrt(double(xn*yn*zn));
		  for(int i=0; i<xn; i++)			 for(int j=0; j<yn; j++)				for(int k=0; k<zn; k++)				  tpdata(i,j,k) /= sqrtprod;
		  CpxOffTns wpdata(xn,yn,zn);
		  fdct3d_fftshift(xn,yn,zn,tpdata,wpdata);
		  
		  for(int ycur=(int)ceil(ys); ycur<ye; ycur++) {
			 int zfm = (int)ceil( max(-R3, R32*(-ycur)*tan(thts)) );
			 int zto = (int)floor( min(R3, R32*(-ycur)*tan(thte)) );
			 int xfm = (int)ceil( max(-R1, R12*(-ycur)*tan(phis)) );
			 int xto = (int)floor( min(R1, R12*(-ycur)*tan(phie)) );
			 for(int zcur=zfm; zcur<=zto; zcur++)
				for(int xcur=xfm; xcur<=xto; xcur++) {
				  int tmpx = xcur%xn;				  if(tmpx<-xh) tmpx+=xn;				  if(tmpx>=-xh+xn) tmpx-=xn;
				  int tmpy = ycur%yn;				  if(tmpy<-yh) tmpy+=yn;				  if(tmpy>=-yh+yn) tmpy-=yn;
				  int tmpz = zcur%zn;				  if(tmpz<-zh) tmpz+=zn;				  if(tmpz>=-zh+zn) tmpz-=zn;
				  
				  double thtcur = atan2(zcur/R3, (-ycur)/R2);
				  double phicur = atan2(xcur/R1, (-ycur)/R2);
				  double glbpou;					 fdct3d_globalpou(thtcur, phicur, M_PI/4-atan2(1.0-1.0/nd, 1.0), glbpou); //CHECK
				  double wtht;
				  if(thtcur<thtm) {
					 if(h==0)						wtht = 1;
					 else {						double l,r;						fdct3d_window( (thtcur-thts)/(thtm-thts), l, r);						wtht = l;					 }
				  } else {
					 if(h==nd-1)						wtht = 1;
					 else {						double l,r;						  fdct3d_window( (thtcur-thtm)/(thte-thtm), l, r);						wtht = r;					 }
				  }
				  double wphi;
				  if(phicur<phim) {
					 if(f==0)						wphi = 1;
					 else {						double l,r;						  fdct3d_window( (phicur-phis)/(phim-phis), l, r);						wphi = l;					 }
				  } else {
					 if(f==nd-1)						wphi = 1;
					 else {						double l,r;						  fdct3d_window( (phicur-phim)/(phie-phim), l, r);						wphi = r;					 }
				  }
				  double pou = glbpou * wtht * wphi;
				  wpdata(tmpx, tmpy, tmpz) *= pou;
				  
				  double ss = sma1(xcur)*sma2(ycur)*sma3(zcur);				  double bb = big1(xcur)*big2(ycur)*big3(zcur);
				  int bi,bj,bk;			 int oi,oj,ok;			 fdct3d_position_aux(N1,N2,N3,b, xcur,ycur,zcur, bi,bj,bk,oi,oj,ok);
				  CpxNumTns& Wblk = W.block(bi,bj,bk);
				  Wblk(oi,oj,ok) += wpdata(tmpx,tmpy,tmpz)  * bb * sqrt(1.0-ss*ss);
				}
		  } //ycur
		}//if
		wcnt++;
	 }
  }//end of face
  //face 5.-z,-x,-y
  for(int g=nd-1; g>=0; g--) {
	 for(int f=nd-1; f>=0; f--) {
		if(crvowners[wcnt]==mpirank) {
		  double zs = -R3;		  double ze = -R3/4+(W3/2)/4;
		  double xs = -R1 + (2*f-1)*W1/2;		double xe = -R1 + (2*f+3)*W1/2;
		  double ys = -R2 + (2*g-1)*W2/2;		double ye = -R2 + (2*g+3)*W2/2;
		  int xn = int(ceil(xe-xs));		  int yn = int(ceil(ye-ys));		  int zn = int(ceil(ze-zs));
		  double thts, thtm, thte; //y to x
		  if(f==0) {
			 thts = atan2(-1.0, 1.0-1.0/nd);			 thtm = atan2(-1.0+1.0/nd, 1.0);			 thte = atan2(-1.0+3.0/nd, 1.0);
		  } else if(f==nd-1) {
			 thts = atan2(-1.0+(2.0*f-1.0)/nd, 1.0);			 thtm = atan2(-1.0+(2.0*f+1.0)/nd, 1.0);			 thte = atan2(1.0, 1.0-1.0/nd);
		  } else {
			 thts = atan2(-1.0+(2.0*f-1.0)/nd, 1.0);			 thtm = atan2(-1.0+(2.0*f+1.0)/nd, 1.0);			 thte = atan2(-1.0+(2.0*f+3.0)/nd, 1.0);
		  }
		  double phis, phim, phie; //z to x
		  if(g==0) {
			 phis = atan2(-1.0, 1.0-1.0/nd);			 phim = atan2(-1.0+1.0/nd, 1.0);			 phie = atan2(-1.0+3.0/nd, 1.0);
		  } else if(g==nd-1) {
			 phis = atan2(-1.0+(2.0*g-1.0)/nd, 1.0);			 phim = atan2(-1.0+(2.0*g+1.0)/nd, 1.0);			 phie = atan2(1.0, 1.0-1.0/nd);
		  } else {
			 phis = atan2(-1.0+(2.0*g-1.0)/nd, 1.0);			 phim = atan2(-1.0+(2.0*g+1.0)/nd, 1.0);			 phie = atan2(-1.0+(2.0*g+3.0)/nd, 1.0);
		  }
		  int xh = xn/2;		  int yh = yn/2;		  int zh = zn/2;
		  double R13 = double(F1)/double(F3);		  double R23 = double(F2)/double(F3);
		  
		  CpxNumTns tpdata(xn,yn,zn);
		  CpxNumTns& Cblk = C.block(s,wcnt);
		  tpdata = Cblk;
		  //fft
		  fftwnd_plan p = NULL;
		  map<inttriple, fftwnd_plan>::iterator mit = planmap.find( inttriple(xn, intpair(yn,zn)) );
		  if(mit!=planmap.end()) {			 p = (*mit).second;
		  } else {
			 p = fftw3d_create_plan(zn, yn, xn, FFTW_FORWARD, FFTW_ESTIMATE | FFTW_IN_PLACE);
			 planmap[ inttriple(xn, intpair(yn,zn)) ] = p;
		  }
		  fftwnd_one(p, (fftw_complex*)tpdata.data(), NULL);		  //cerr<<"wedge s"<<endl;
		  double sqrtprod = sqrt(double(xn*yn*zn));
		  for(int i=0; i<xn; i++)			 for(int j=0; j<yn; j++)				for(int k=0; k<zn; k++)				  tpdata(i,j,k) /= sqrtprod;
		  CpxOffTns wpdata(xn,yn,zn);
		  fdct3d_fftshift(xn,yn,zn,tpdata,wpdata);
		  
		  for(int zcur=(int)ceil(zs); zcur<ze; zcur++) {
			 int xfm = (int)ceil( max(-R1, R13*(-zcur)*tan(thts)) );
			 int xto = (int)floor( min(R1, R13*(-zcur)*tan(thte)) );
			 int yfm = (int)ceil( max(-R2, R23*(-zcur)*tan(phis)) );
			 int yto = (int)floor( min(R2, R23*(-zcur)*tan(phie)) );
			 for(int xcur=xfm; xcur<=xto; xcur++)
				for(int ycur=yfm; ycur<=yto; ycur++) {
				  int tmpx = xcur%xn;				  if(tmpx<-xh) tmpx+=xn;				  if(tmpx>=-xh+xn) tmpx-=xn;
				  int tmpy = ycur%yn;				  if(tmpy<-yh) tmpy+=yn;				  if(tmpy>=-yh+yn) tmpy-=yn;
				  int tmpz = zcur%zn;				  if(tmpz<-zh) tmpz+=zn;				  if(tmpz>=-zh+zn) tmpz-=zn;
				  
				  double thtcur = atan2(xcur/R1, (-zcur)/R3);
				  double phicur = atan2(ycur/R2, (-zcur)/R3);
				  double glbpou;					 fdct3d_globalpou(thtcur, phicur, M_PI/4-atan2(1.0-1.0/nd, 1.0), glbpou);
				  double wtht;
				  if(thtcur<thtm) {
					 if(f==0)						wtht = 1;
					 else {						double l,r;						fdct3d_window( (thtcur-thts)/(thtm-thts), l, r);						wtht = l;					 }
				  } else {
					 if(f==nd-1)						wtht = 1;
					 else {						double l,r;						  fdct3d_window( (thtcur-thtm)/(thte-thtm), l, r);						wtht = r;					 }
				  }
				  double wphi;
				  if(phicur<phim) {
					 if(g==0)						wphi = 1;
					 else {						double l,r;						  fdct3d_window( (phicur-phis)/(phim-phis), l, r);						wphi = l;					 }
				  } else {
					 if(g==nd-1)						wphi = 1;
					 else {						double l,r;						  fdct3d_window( (phicur-phim)/(phie-phim), l, r);						wphi = r;					 }
				  }
				  double pou = glbpou * wtht * wphi;
				  wpdata(tmpx, tmpy, tmpz) *= pou;
				  				  
				  double ss = sma1(xcur)*sma2(ycur)*sma3(zcur);				  double bb = big1(xcur)*big2(ycur)*big3(zcur);
				  int bi,bj,bk;			 int oi,oj,ok;			 fdct3d_position_aux(N1,N2,N3,b, xcur,ycur,zcur, bi,bj,bk,oi,oj,ok);
				  CpxNumTns& Wblk = W.block(bi,bj,bk);
				  Wblk(oi,oj,ok) += wpdata(tmpx,tmpy,tmpz)  * bb * sqrt(1.0-ss*ss);
				}
		  }//zcur
		}//if
		wcnt++;
	 }
  }//end of face
  iA(wcnt==nd*nd*nf);
  
  //remove plans
  for(map<inttriple, fftwnd_plan>::iterator mit=planmap.begin(); mit!=planmap.end(); mit++) {
	 fftwnd_plan p = (*mit).second;
	 fftwnd_destroy_plan(p);
  }
  
  return 0;
}
Ejemplo n.º 8
0
int CpxCrvletPrtd::scatter(vector< vector<bool> >& newexists)
{
  //LEXING: usually only called once
  vector< vector<int> >& c = _nx;
  //1. the global vector
  vector<int> glblszs(mpisize(), 0);
  int glbnum = 0;
  for(int s=0; s<c.size(); s++)	 for(int w=0; w<c[s].size(); w++) {
	 int pi = _owners[s][w];
	 glblszs[pi] += _sizes[s][w];
	 glbnum += _sizes[s][w];
  }
  vector<int> glbaccs(mpisize(), 0);
  int tmp = 0;
  for(int pi=0; pi<mpisize(); pi++) {
	 glbaccs[pi] = tmp;
	 tmp += glblszs[pi];
  }
  vector< vector<int> > glbstts(c); //not cleared, but okay
  for(int s=0; s<c.size(); s++)	 for(int w=0; w<c[s].size(); w++) {
	 int pi = _owners[s][w];
	 glbstts[s][w] = glbaccs[pi];
	 glbaccs[pi] += _sizes[s][w];
  }
  
  int lclsum = 0;
  vector<int> l2gmap;
  for(int s=0; s<c.size(); s++)	 for(int w=0; w<c[s].size(); w++) {
	 if(newexists[s][w]==true && _exists[s][w]==false) {
		lclsum += _sizes[s][w];
		for(int g=0; g<_sizes[s][w]; g++)
		  l2gmap.push_back( glbstts[s][w] + g );
	 }
  }
  iA(l2gmap.size()==lclsum);
  
  IS lclis;  iC( ISCreateStride(PETSC_COMM_SELF, l2gmap.size(), 0, 1, &lclis) );
  IS glbis;  iC( ISCreateGeneral(PETSC_COMM_WORLD, l2gmap.size(), &(l2gmap[0]), &glbis) );
  l2gmap.clear(); //SAVE SPACE
  
  //2. allocate a global vector, and copy data
  Vec glbvec;  iC( VecCreateMPI(PETSC_COMM_WORLD, glblszs[mpirank()], PETSC_DETERMINE, &glbvec) );
  double* glbarr;  iC( VecGetArray(glbvec, &glbarr) );
  double* glbptr = glbarr;
  for(int s=0; s<c.size(); s++)	 for(int w=0; w<c[s].size(); w++) {
	int pi = _owners[s][w];
	if(pi==mpirank()) {
	  double* tmpptr = (double*)(_blocks[s][w].data());
	  for(int g=0; g<_sizes[s][w]; g++) {
		*glbptr = tmpptr[g];		  glbptr++;
	  }
	}
  }
  iC( VecRestoreArray(glbvec, &glbarr) );
  
  Vec lclvec;  iC( VecCreateSeq(PETSC_COMM_SELF, lclsum, &lclvec) );
  
  //3. vec scatter
  VecScatter sc;  iC( VecScatterCreate(glbvec, glbis, lclvec, lclis, &sc) );
  iC( ISDestroy(lclis) );  iC( ISDestroy(glbis) ); //SAVE SPACE
  
  iC( VecScatterBegin(glbvec, lclvec, INSERT_VALUES, SCATTER_FORWARD, sc) );
  iC( VecScatterEnd(  glbvec, lclvec, INSERT_VALUES, SCATTER_FORWARD, sc) );
  
  iC( VecScatterDestroy(sc) ); //SAVE SPACE
  iC( VecDestroy(glbvec) );
  
  //4. store
  double* lclarr;  iC( VecGetArray(lclvec, &lclarr) );
  double* lclptr = lclarr;
  for(int s=0; s<c.size(); s++)	 for(int w=0; w<c[s].size(); w++) {
	 if(newexists[s][w]==true && _exists[s][w]==false) {
		_blocks[s][w].resize(_nx[s][w], _ny[s][w], _nz[s][w]);
		double* tmpptr = (double*)(_blocks[s][w].data());
		for(int g=0; g<_sizes[s][w]; g++) {
		  tmpptr[g] = *lclptr;		  lclptr++;
		}
		_exists[s][w] = true; //VERY IMPORTANT
	 }
  }
  iC( VecRestoreArray(lclvec, &lclarr) );
  iC( VecDestroy(lclvec) );
  return 0;
}
Ejemplo n.º 9
0
int CpxCrvletPrtd::combine()
{
  //LEXING: usually only called once
  vector< vector<int> >& c = _nx;
  //1. the global vector
  vector<int> glblszs(mpisize(), 0);
  int glbnum = 0;
  for(int s=0; s<c.size(); s++)	 for(int w=0; w<c[s].size(); w++) {
	 int pi = _owners[s][w];
	 glblszs[pi] += _sizes[s][w];
	 glbnum += _sizes[s][w];
  }
  vector<int> glbaccs(mpisize(), 0);
  int tmp = 0;
  for(int pi=0; pi<mpisize(); pi++) {
	 glbaccs[pi] = tmp;
	 tmp += glblszs[pi];
  }
  vector< vector<int> > glbstts(c);
  for(int s=0; s<c.size(); s++)	 for(int w=0; w<c[s].size(); w++) {
	 int pi = _owners[s][w];
	 glbstts[s][w] = glbaccs[pi];
	 glbaccs[pi] += _sizes[s][w];
  }
  
  int lclsum = 0;
  vector<int> l2gmap;
  for(int s=0; s<c.size(); s++)	 for(int w=0; w<c[s].size(); w++) {
	 if(_exists[s][w]==true && _owners[s][w]!=mpirank()) {
		lclsum += _sizes[s][w];
		for(int g=0; g<_sizes[s][w]; g++)
		  l2gmap.push_back( glbstts[s][w] + g );
	 }
  }
  iA(l2gmap.size()==lclsum);
  
  IS lclis;  iC( ISCreateStride(PETSC_COMM_SELF, l2gmap.size(), 0, 1, &lclis) );
  IS glbis;  iC( ISCreateGeneral(PETSC_COMM_WORLD, l2gmap.size(), &(l2gmap[0]), &glbis) );
  l2gmap.clear(); //SAVE SPACE
  
  //2. allocate a global vector and a local vector, put data in local
  Vec glbvec;  iC( VecCreateMPI(PETSC_COMM_WORLD, glblszs[mpirank()], PETSC_DETERMINE, &glbvec) );

  Vec lclvec;  iC( VecCreateSeq(PETSC_COMM_SELF, lclsum, &lclvec) );
  double* lclarr;  iC( VecGetArray(lclvec, &lclarr) );
  double* lclptr = lclarr;
  for(int s=0; s<c.size(); s++)	 for(int w=0; w<c[s].size(); w++) {
	 if(_exists[s][w]==true && _owners[s][w]!=mpirank()) {
		double* tmpptr = (double*)(_blocks[s][w].data());
		for(int g=0; g<_sizes[s][w]; g++) {
		  *lclptr = tmpptr[g];		  lclptr++;
		}
	 }
  }
  iC( VecRestoreArray(lclvec, &lclarr) );
  
  //3. vec scatter
  VecScatter sc;  iC( VecScatterCreate(glbvec, glbis, lclvec, lclis, &sc) );
  iC( ISDestroy(lclis) );  iC( ISDestroy(glbis) ); //SAVE SPACE
  
  iC( VecScatterBegin(glbvec, lclvec, ADD_VALUES, SCATTER_REVERSE, sc) );
  iC( VecScatterEnd(  glbvec, lclvec, ADD_VALUES, SCATTER_REVERSE, sc) );
  
  iC( VecScatterDestroy(sc) ); //SAVE SPACE
  iC( VecDestroy(lclvec) );
  
  //4. store
  double* glbarr;  iC( VecGetArray(glbvec, &glbarr) );
  double* glbptr = glbarr;
  for(int s=0; s<c.size(); s++)	 for(int w=0; w<c[s].size(); w++) {
	 int pi = _owners[s][w];
	 if(pi==mpirank()) {
		double* tmpptr = (double*)(_blocks[s][w].data());
		for(int g=0; g<_sizes[s][w]; g++) {
		  tmpptr[g] += *glbptr;		  glbptr++; //LEXING: += is very important
		}
	 }
  }
  iC( VecRestoreArray(glbvec, &glbarr) );
  iC( VecDestroy(glbvec) );
  
  //IMPORTANT
  for(int s=0; s<c.size(); s++)	 for(int w=0; w<c[s].size(); w++) {
	 if(_owners[s][w]!=mpirank()) {
		_blocks[s][w].resize(0,0,0);
		_exists[s][w] = false;
	 }
  }
  return 0;
}