void MPC::init_spline() { Array<complex<double>,3> rBox(SplineDim[0], SplineDim[1], SplineDim[2]), GBox(SplineDim[0], SplineDim[1], SplineDim[2]); Array<double,3> splineData(SplineDim[0], SplineDim[1], SplineDim[2]); GBox = complex<double>(); Vconst = 0.0; // Now fill in elements of GBox double vol = PtclRef->Lattice.Volume; double volInv = 1.0/vol; for (int iG=0; iG < Gvecs.size(); iG++) { TinyVector<int,OHMMS_DIM> gint = Gints[iG]; PosType G = Gvecs[iG]; double G2 = dot(G,G); TinyVector<int,OHMMS_DIM> index; for (int j=0; j<OHMMS_DIM; j++) index[j] = (gint[j] + SplineDim[j]) % SplineDim[j]; if (!(index[0]==0 && index[1]==0 && index[2]==0)) { GBox(index[0], index[1], index[2]) = vol * Rho_G[iG] * (4.0*M_PI*volInv/G2 - f_G[iG]); Vconst -= 0.5 * vol * vol * norm(Rho_G[iG]) * (4.0*M_PI*volInv/G2 - f_G[iG]); } } // G=0 component calculated seperately GBox(0,0,0) = -vol * f_0 * Rho_G[0]; Vconst += 0.5 * vol * vol * f_0 * norm(Rho_G[0]); app_log() << " Constant potential = " << Vconst << endl; fftw_plan fft = fftw_plan_dft_3d (SplineDim[0], SplineDim[1], SplineDim[2], (fftw_complex*)GBox.data(), (fftw_complex*) rBox.data(), -1, FFTW_ESTIMATE); fftw_execute (fft); fftw_destroy_plan (fft); for (int i0=0; i0<SplineDim[0]; i0++) for (int i1=0; i1<SplineDim[1]; i1++) for (int i2=0; i2<SplineDim[2]; i2++) splineData(i0, i1, i2) = real(rBox(i0,i1,i2)); BCtype_d bc0, bc1, bc2; Ugrid grid0, grid1, grid2; grid0.start=0.0; grid0.end=1.0; grid0.num = SplineDim[0]; grid1.start=0.0; grid1.end=1.0; grid1.num = SplineDim[1]; grid2.start=0.0; grid2.end=1.0; grid2.num = SplineDim[2]; bc0.lCode = bc0.rCode = PERIODIC; bc1.lCode = bc1.rCode = PERIODIC; bc2.lCode = bc2.rCode = PERIODIC; VlongSpline = create_UBspline_3d_d (grid0, grid1, grid2, bc0, bc1, bc2, splineData.data()); // grid0.num = PtclRef->Density_r.size(0); // grid1.num = PtclRef->Density_r.size(1); // grid2.num = PtclRef->Density_r.size(2); // DensitySpline = create_UBspline_3d_d (grid0, grid1, grid2, bc0, bc1, bc2, // PtclRef->Density_r.data()); }
EXPORT void rgrid2d_fftw_free(rgrid2d *grid) { if(grid->plan) fftw_destroy_plan(grid->plan); if(grid->iplan) fftw_destroy_plan(grid->iplan); }
int main(int argc,char* argv[]) { progname = argv[0]; const char* const short_options = "hf:l:c:i:o:t:d:Ig:r:F:w"; int next_option; double freq = 1,w=0; int L = -1,col = 1; FILE* FIN = stdin,*FOUT = stdout; char* delim; delim = NULL; int cmp=0; int ignore=0; int inv=0; int icol1,icol2; do{ next_option = getopt_long(argc,argv,short_options,long_options,NULL); switch(next_option){ case 'h': PrintUsage(); break; case 'f': freq = atof(optarg); break; case 'l': L = atoi(optarg); break; case 'c': col = atoi(optarg); break; case 't': freq = -atoi(optarg); break; case 'i': FIN = fopen(optarg,"r"); break; case 'o': FOUT = fopen(optarg,"w+"); break; case 'd': delim = (strcmp(optarg,"tab")==0)?"\t":(strcmp(optarg,"spc")==0)?" ":optarg; break; case 'I': cmp=1; break; case 'g': ignore=atoi(optarg); break; case 'r': inv=1; icol1=atoi(strtok(optarg,",")); icol2=atoi(strtok(NULL,",")); freq=0; break; case 'F': freq=-atoi(optarg); break; case 'w': w = 1; break; } }while(next_option!=-1); if(delim==NULL){ delim = malloc(3*sizeof(char)); strcpy(delim," ");} if( FIN==stdin ) fprintf(stderr,"Waiting for Input from stdin\n"); int i; fftw_complex *out,*in; double *data,scale,**data2,*OUT; fftw_plan P; if( inv==0 ){ data = ColumnRead(FIN,delim,&L,ignore,col,&freq); out = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*(L/2+1)); // P = fftw_plan_dft_1d(L,in,out,FFTW_FORWARD,FFTW_ESTIMATE); P = fftw_plan_dft_r2c_1d(L,data,out,FFTW_ESTIMATE); fftw_execute(P); scale = freq/L; L = L/2+1; scale = (w)?scale*2*M_PI:scale; if( cmp==0 ){ for(i=0;i<L;i++) fprintf(FOUT,"%lf %.4e\n",i*scale,pow(cabs(out[i]),2)/(L-1));} else{ for(i=0;i<L;i++) fprintf(FOUT,"%lf %.4e %.4e\n",i*scale,creal(out[i])/(L-1),cimag(out[i])/(L-1));} fftw_destroy_plan(P); free(data); fftw_free(out); } else{ data2 = ColumnRead2(FIN,delim,&L,ignore,icol1,icol2,&freq); L = 2*(L-1); OUT = (double*)malloc(L*sizeof(double)); in = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*(L/2+1)); for( i=0;i<L/2;i++ ) in[i] = data2[0][i]+I*data2[1][i]; free(data2[0]); free(data2[1]); free(data2); P = fftw_plan_dft_c2r_1d(L,in,OUT,FFTW_ESTIMATE); fftw_execute(P); for( i=0;i<L;i++ ) fprintf(FOUT,"%lf %.4e\n",(double)i/freq,OUT[i]); free(OUT); fftw_free(in); fftw_destroy_plan(P); } if( FOUT!=stdout ) fclose(FOUT); if( FIN!=stdin ) fclose(FIN); if( delim!=NULL ) free(delim); return 0; }
int SPB::BandSolver_Ez::SolveK(const double *k){ SPB_VERB(1, "Solving k-point (%.14g, %.14g)\n", k[0], k[1]); ClearSolution(); last_k[0] = k[0]; last_k[1] = k[1]; // Prepare the indexing const size_t Ngrid = res[0] * res[1]; if(impl->structure_changed_since_last_solve){ free(impl->ind); impl->ind = (int*)malloc(sizeof(int) * 2*Ngrid); fftw_free(impl->eps_z_fft); impl->eps_z_fft = (complex_t*)fftw_malloc(sizeof(complex_t)*Ngrid); size_t next_index = 0; for(int i = 0; i < res[0]; ++i){ const double fi = ((double)i/(double)res[0]) - 0.5; for(int j = 0; j < res[1]; ++j){ const double fj = ((double)j/(double)res[1]) - 0.5; impl->ind[2*IDX(i,j)+0] = 4*Ngrid+next_index; // get material of this cell (simple pointwise check) int tag, num_poles; //if(2 == dim){ double p[2] = { L.Lr[0]*fi + L.Lr[2]*fj, L.Lr[1]*fi + L.Lr[3]*fj }; if(!shapeset.QueryPt(p, &tag)){ tag = -1; } /*}else{ double p[3] = { L.Lr[0]*fi + L.Lr[3]*fj + L.Lr[6]*fk, L.Lr[1]*fi + L.Lr[4]*fj + L.Lr[7]*fk, L.Lr[2]*fi + L.Lr[5]*fj + L.Lr[8]*fk }; if(ShapeSet3_query_pt(shapeset.d3, p, NULL, &tag)){ }else{ tag = -1; } }*/ if(-1 == tag){ num_poles = 0; impl->eps_z_fft[IDX(i,j)] = 1.; }else{ num_poles = material[tag].poles.size(); impl->eps_z_fft[IDX(i,j)] = material[tag].eps_inf.value[8]; std::cout << i << "\t" << j << "\t" << impl->eps_z_fft[IDX(i,j)] << "\t" << num_poles << std::endl; } impl->ind[2*IDX(i,j)+1] = tag; // update next index next_index += 2*num_poles; } } //impl->N = 4*Ngrid + 3*zero_constraint + next_index; impl->N = 4*Ngrid + next_index; /* switch(pol){ case 1: // Hx,Hy,Ez, divH N = (3+1)*Ngrid + 3*zero_constraint + next_index; break; case 2: // Hz,Ex,Ey (Hz is already div-free) N = (3+0)*Ngrid + 3*zero_constraint + next_index; break; default: // Hx,Hy,Hz,Ex,Ey,Ez, divH N = (6+1)*Ngrid + 6*zero_constraint + next_index; break; }*/ fftw_plan plan_eps = fftw_plan_many_dft( 2/*rank*/, res, 1 /*howmany*/, (fftw_complex*)impl->eps_z_fft, NULL/*inembed*/, 1/*istride*/, Ngrid/*idist*/, (fftw_complex*)impl->eps_z_fft, NULL/*onembed*/, 1/*ostride*/, Ngrid/*odist*/, FFTW_BACKWARD, FFTW_ESTIMATE); fftw_execute(plan_eps); fftw_destroy_plan(plan_eps); impl->structure_changed_since_last_solve = false; } sparse_t::entry_map_t Amap; sparse_t::entry_map_t Bmap; { const double Lrl[2] = { hypot(L.Lr[0], L.Lr[1]), hypot(L.Lr[2], L.Lr[3]) }; const double idr[2] = { (double)res[0] / Lrl[0], (double)res[1] / Lrl[1] }; const complex_t Bloch[2] = { complex_t(cos(k[0]*2*M_PI), sin(k[0]*2*M_PI)), complex_t(cos(k[1]*2*M_PI), sin(k[1]*2*M_PI)) }; for(int i = 0; i < res[0]; ++i){ for(int j = 0; j < res[1]; ++j){ size_t row, col; complex_t coeff; const int curmat = impl->ind[2*IDX(i,j)+1]; complex_t eps_z(1.); if(curmat >= 0){ eps_z = material[curmat].eps_inf.value[8]; } #define ASET(ROW,COL,COEFF) Amap[sparse_t::index_t((ROW),(COL))] = (COEFF) #define BSET(ROW,COL,COEFF) Bmap[sparse_t::index_t((ROW),(COL))] = (COEFF) // divH ~ dx Hx + dy Hy + dz Hz // E ~ -i wp V // V ~ +i wp E - i G V - i w0 P // P ~ +i w0 V //for(size_t idbg=0;idbg<ne+nh+1;++idbg){ //ASET(row0+idbg,row0+idbg,1); // for debugging //} // Hx ~ -i dy Ez // Hy ~ +i dx Ez // Ez ~ -i dy Hx + i dx Hy // Hx = complex_t(0,-idr[1]) * (Ez[i,j+1,k] - Ez[i,j,k]) row = HX_OFF + IDX(i,j); coeff = complex_t(0,-idr[1]); col = EZ_OFF + IDX(i,j); // Ez ASET(row,col, -coeff); if(j+1 == res[1]){ col = EZ_OFF + IDX(i,0); // Ez ASET(row,col, coeff/Bloch[1]); }else{ col = EZ_OFF + IDX(i,j+1); // Ez ASET(row,col, coeff); } BSET(row,row, 1); // Hy = complex_t(0, idr[0]) * (Ez[i+1,j,k] - Ez[i,j,k]) row = HY_OFF + IDX(i,j); coeff = complex_t(0, idr[0]); col = EZ_OFF + IDX(i,j); // Ez ASET(row,col, -coeff); if(i+1 == res[0]){ col = EZ_OFF + IDX(0,j); // Ez ASET(row,col, coeff/Bloch[0]); }else{ col = EZ_OFF + IDX(i+1,j); // Ez ASET(row,col, coeff); } BSET(row,row, 1); // divH = idr[0] * (Hx[i+1,j,k] - Hx[i,j,k]) // + idr[1] * (Hy[i,j+1,k] - Hx[i,j,k]) row = DIVH_OFF + IDX(i,j); coeff = complex_t(0,idr[0]); col = HX_OFF + IDX(i,j); // Hx ASET(row,col, -coeff); ASET(col,row, -std::conj(coeff)); if(i+1 == res[0]){ col = HX_OFF + IDX(0,j); // Hx ASET(row,col, coeff/Bloch[0]); ASET(col,row, std::conj(coeff/Bloch[0])); }else{ col = HX_OFF + IDX(i+1,j); // Hx ASET(row,col, coeff); ASET(col,row, std::conj(coeff)); } coeff = complex_t(0,idr[1]); col = HY_OFF + IDX(i,j); // Hy ASET(row,col, -coeff); ASET(col,row, -std::conj(coeff)); if(j+1 == res[1]){ col = HY_OFF + IDX(i,0); // Hy ASET(row,col, coeff/Bloch[1]); ASET(col,row, std::conj(coeff/Bloch[1])); }else{ col = HY_OFF + IDX(i,j+1); // Hy ASET(row,col, coeff); ASET(col,row, std::conj(coeff)); } BSET(row,row, 0); // Ez = complex_t(0,-idr[1]) * (Hx[i,j,k] - Hx[i,j-1,k]) // + complex_t(0, idr[0]) * (Hy[i,j,k] - Hy[i-1,j,k]) row = EZ_OFF + IDX(i,j); coeff = complex_t(0,-idr[1]); col = HX_OFF + IDX(i,j); // Hx ASET(row,col, coeff); if(0 == j){ col = HX_OFF + IDX(i,res[1]-1); // Hx ASET(row,col, -coeff*Bloch[1]); }else{ col = HX_OFF + IDX(i,j-1); // Hx ASET(row,col, -coeff); } coeff = complex_t(0, idr[0]); col = HY_OFF + IDX(i,j); // Hy ASET(row,col, coeff); if(0 == i){ col = HY_OFF + IDX(res[0]-1,j); // Hy ASET(row,col, -coeff*Bloch[0]); }else{ col = HY_OFF + IDX(i-1,j); // Hy ASET(row,col, -coeff); } BSET(row,row, eps_z); if(curmat >= 0){ const int row0 = impl->ind[2*IDX(i,j)+0]; const Material &m = material[curmat]; const size_t np = m.poles.size(); for(size_t p = 0; p < np; ++p){ row = row0 + 2*p + 0; // V_p coeff = complex_t(0, m.poles[p].omega_p) * eps_z; col = EZ_OFF + IDX(i,j); // E ASET(row,col, coeff); ASET(col,row, std::conj(coeff)); if(0 != m.poles[p].Gamma){ coeff = complex_t(0,-m.poles[p].Gamma) * eps_z; ASET(row,row, coeff); } BSET(row,row, 1); coeff = complex_t(0, -m.poles[p].omega_0) * eps_z; col = row0 + 2*p + 1; // P ASET(row,col, coeff); ASET(col,row, std::conj(coeff)); BSET(col,col, 1); } } /* }else if(2 == pol){ // Hz ~ +i dy Ex - i dx Ey // Ex ~ +i dy Hz // Ey ~ -i dx Hz }else{ // Hx ~ +i dz Ey - i dy Ez // Hy ~ -i dz Ex + i dx Ez // Hz ~ +i dy Ex - i dx Ey // Ex ~ -i dz Hy + i dy Hz // Ey ~ +i dz Hx - i dx Hz // Ez ~ -i dy Hx + i dx Hy }*/ } } } impl->A = new sparse_t(impl->N,impl->N, Amap); impl->B = new sparse_t(impl->N,impl->N, Bmap); if(0){ std::cout << "A="; RNP::Sparse::PrintSparseMatrix(*(impl->A)) << ";" << std::endl; std::cout << "B="; RNP::Sparse::PrintSparseMatrix(*(impl->B)) << ";" << std::endl; exit(0); } /* complex_t *tmp = new complex_t[4*Ngrid]; complex_t *tmp2 = new complex_t[16*Ngrid*Ngrid]; for(size_t i = 0; i < res[0]; ++i){ for(size_t j = 0; j < res[1]; ++j){ tmp[IDX(i,j)] = 0; } } for(size_t i = 0; i < res[0]; ++i){ for(size_t j = 0; j < res[1]; ++j){ tmp[IDX(i,j)] = 1; Precond(tmp, &tmp2[0+IDX(i,j)*Ngrid]); tmp[IDX(i,j)] = 0; } } delete [] tmp2; delete [] tmp; */ return solver->Solve(); /* { const size_t n = 4*Ngrid; complex_t *x = (complex_t*)fftw_malloc(sizeof(complex_t)*n); complex_t *y = (complex_t*)fftw_malloc(sizeof(complex_t)*n); complex_t *z = (complex_t*)fftw_malloc(sizeof(complex_t)*n); const double theta = 0.6; memset(x, 0, sizeof(complex_t)*n); for(int i = 0; i < n; ++i){ x[i] = frand(); } std::cout << "x = "; RNP::IO::PrintVector(n, x, 1) << std::endl; Aop(x, y); Bop(x, z); RNP::TBLAS::Axpy(n, -theta, z,1, y,1); // At this point y = A*x-theta*B*x std::cout << "y = "; RNP::IO::PrintVector(n, y, 1) << std::endl; Op(n, theta, y, z); // At this point z should be the same as x std::cout << "z = "; RNP::IO::PrintVector(n, z, 1) << std::endl; RNP::TBLAS::Axpy(n, -1., x,1,z,1); std::cout << "diff = "; RNP::IO::PrintVector(n, z, 1) << std::endl; fftw_free(z); fftw_free(y); fftw_free(x); }*/ /* size_t n_wanted = 10; size_t ncv = 2*n_wanted+1; SPB::complex_t *w = new SPB::complex_t[n_wanted+ncv*4*Ngrid]; SPB::complex_t *v = w+n_wanted; int nconv = RNP::IRA::ShiftInvert( 4*Ngrid, 0.0, &op_, &bv_, n_wanted, ncv, &RNP::IRA::LargestMagnitude, w, v, 4*Ngrid, NULL, NULL, (void*)this, (void*)this); for(size_t i = 0; i < n_wanted;++i){ std::cout << w[i] << std::endl; } */ }
int main(int argc,char **args) { const ptrdiff_t N0=2056,N1=2056; fftw_plan bplan,fplan; fftw_complex *out; double *in1,*in2; ptrdiff_t alloc_local,local_n0,local_0_start; ptrdiff_t local_n1,local_1_start; PetscInt i,j; PetscMPIInt size,rank; int n,N,N_factor,NM; PetscScalar one=2.0,zero=0.5; PetscScalar two=4.0,three=8.0,four=16.0; PetscScalar a,*x_arr,*y_arr,*z_arr; PetscReal enorm; Vec fin,fout,fout1; Vec ini,final; PetscRandom rnd; PetscErrorCode ierr; PetscInt *indx3,tempindx,low,*indx4,tempindx1; ierr = PetscInitialize(&argc,&args,(char*)0,help);if (ierr) return ierr; ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr); ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr); ierr = PetscRandomCreate(PETSC_COMM_WORLD,&rnd);CHKERRQ(ierr); alloc_local = fftw_mpi_local_size_2d_transposed(N0,N1/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); #if defined(DEBUGGING) printf("The value alloc_local is %ld from process %d\n",alloc_local,rank); printf("The value local_n0 is %ld from process %d\n",local_n0,rank); printf("The value local_0_start is %ld from process %d\n",local_0_start,rank); /* printf("The value local_n1 is %ld from process %d\n",local_n1,rank); */ /* printf("The value local_1_start is %ld from process %d\n",local_1_start,rank); */ /* printf("The value local_n0 is %ld from process %d\n",local_n0,rank); */ #endif /* Allocate space for input and output arrays */ in1=(double*)fftw_malloc(sizeof(double)*alloc_local*2); in2=(double*)fftw_malloc(sizeof(double)*alloc_local*2); out=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); N = 2*N0*(N1/2+1); N_factor = N0*N1; n = 2*local_n0*(N1/2+1); /* printf("The value N is %d from process %d\n",N,rank); */ /* printf("The value n is %d from process %d\n",n,rank); */ /* printf("The value n1 is %d from process %d\n",n1,rank);*/ /* Creating data vector and accompanying array with VeccreateMPIWithArray */ ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,1,n,N,(PetscScalar*)in1,&fin);CHKERRQ(ierr); ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,1,n,N,(PetscScalar*)out,&fout);CHKERRQ(ierr); ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,1,n,N,(PetscScalar*)in2,&fout1);CHKERRQ(ierr); /* Set the vector with random data */ ierr = VecSet(fin,zero);CHKERRQ(ierr); /* for (i=0;i<N0*N1;i++) */ /* { */ /* VecSetValues(fin,1,&i,&one,INSERT_VALUES); */ /* } */ /* VecSet(fin,one); */ i =0; ierr = VecSetValues(fin,1,&i,&one,INSERT_VALUES);CHKERRQ(ierr); i =1; ierr = VecSetValues(fin,1,&i,&two,INSERT_VALUES);CHKERRQ(ierr); i =4; ierr = VecSetValues(fin,1,&i,&three,INSERT_VALUES);CHKERRQ(ierr); i =5; ierr = VecSetValues(fin,1,&i,&four,INSERT_VALUES);CHKERRQ(ierr); ierr = VecAssemblyBegin(fin);CHKERRQ(ierr); ierr = VecAssemblyEnd(fin);CHKERRQ(ierr); ierr = VecSet(fout,zero);CHKERRQ(ierr); ierr = VecSet(fout1,zero);CHKERRQ(ierr); /* Get the meaningful portion of array */ ierr = VecGetArray(fin,&x_arr);CHKERRQ(ierr); ierr = VecGetArray(fout1,&z_arr);CHKERRQ(ierr); ierr = VecGetArray(fout,&y_arr);CHKERRQ(ierr); fplan=fftw_mpi_plan_dft_r2c_2d(N0,N1,(double*)x_arr,(fftw_complex*)y_arr,PETSC_COMM_WORLD,FFTW_ESTIMATE); bplan=fftw_mpi_plan_dft_c2r_2d(N0,N1,(fftw_complex*)y_arr,(double*)z_arr,PETSC_COMM_WORLD,FFTW_ESTIMATE); fftw_execute(fplan); fftw_execute(bplan); ierr = VecRestoreArray(fin,&x_arr); ierr = VecRestoreArray(fout1,&z_arr); ierr = VecRestoreArray(fout,&y_arr); /* VecView(fin,PETSC_VIEWER_STDOUT_WORLD); */ ierr = VecCreate(PETSC_COMM_WORLD,&ini);CHKERRQ(ierr); ierr = VecCreate(PETSC_COMM_WORLD,&final);CHKERRQ(ierr); ierr = VecSetSizes(ini,local_n0*N1,N0*N1);CHKERRQ(ierr); ierr = VecSetSizes(final,local_n0*N1,N0*N1);CHKERRQ(ierr); ierr = VecSetFromOptions(ini);CHKERRQ(ierr); ierr = VecSetFromOptions(final);CHKERRQ(ierr); if (N1%2==0) { NM = N1+2; } else { NM = N1+1; } /*printf("The Value of NM is %d",NM); */ ierr = VecGetOwnershipRange(fin,&low,NULL); /*printf("The local index is %d from %d\n",low,rank); */ ierr = PetscMalloc1(local_n0*N1,&indx3); ierr = PetscMalloc1(local_n0*N1,&indx4); for (i=0;i<local_n0;i++) { for (j=0;j<N1;j++) { tempindx = i*N1 + j; tempindx1 = i*NM + j; indx3[tempindx]=local_0_start*N1+tempindx; indx4[tempindx]=low+tempindx1; /* printf("index3 %d from proc %d is \n",indx3[tempindx],rank); */ /* printf("index4 %d from proc %d is \n",indx4[tempindx],rank); */ } } ierr = PetscMalloc2(local_n0*N1,&x_arr,local_n0*N1,&y_arr);CHKERRQ(ierr); /* arr must be allocated for VecGetValues() */ ierr = VecGetValues(fin,local_n0*N1,indx4,(PetscScalar*)x_arr);CHKERRQ(ierr); ierr = VecSetValues(ini,local_n0*N1,indx3,x_arr,INSERT_VALUES);CHKERRQ(ierr); ierr = VecAssemblyBegin(ini);CHKERRQ(ierr); ierr = VecAssemblyEnd(ini);CHKERRQ(ierr); ierr = VecGetValues(fout1,local_n0*N1,indx4,y_arr); ierr = VecSetValues(final,local_n0*N1,indx3,y_arr,INSERT_VALUES); ierr = VecAssemblyBegin(final); ierr = VecAssemblyEnd(final); ierr = PetscFree2(x_arr,y_arr);CHKERRQ(ierr); /* VecScatter vecscat; IS indx1,indx2; for (i=0;i<N0;i++) { indx = i*NM; ISCreateStride(PETSC_COMM_WORLD,N1,indx,1,&indx1); indx = i*N1; ISCreateStride(PETSC_COMM_WORLD,N1,indx,1,&indx2); VecScatterCreate(fin,indx1,ini,indx2,&vecscat); VecScatterBegin(vecscat,fin,ini,INSERT_VALUES,SCATTER_FORWARD); VecScatterEnd(vecscat,fin,ini,INSERT_VALUES,SCATTER_FORWARD); VecScatterBegin(vecscat,fout1,final,INSERT_VALUES,SCATTER_FORWARD); VecScatterEnd(vecscat,fout1,final,INSERT_VALUES,SCATTER_FORWARD); } */ a = 1.0/(PetscReal)N_factor; ierr = VecScale(fout1,a);CHKERRQ(ierr); ierr = VecScale(final,a);CHKERRQ(ierr); /* VecView(ini,PETSC_VIEWER_STDOUT_WORLD); */ /* VecView(final,PETSC_VIEWER_STDOUT_WORLD); */ ierr = VecAXPY(final,-1.0,ini);CHKERRQ(ierr); ierr = VecNorm(final,NORM_1,&enorm);CHKERRQ(ierr); if (enorm > 1.e-10) { ierr = PetscPrintf(PETSC_COMM_WORLD," Error norm of |x - z| = %e\n",enorm);CHKERRQ(ierr); } /* Execute fftw with function fftw_execute and destory it after execution */ fftw_destroy_plan(fplan); fftw_destroy_plan(bplan); fftw_free(in1); ierr = VecDestroy(&fin);CHKERRQ(ierr); fftw_free(out); ierr = VecDestroy(&fout);CHKERRQ(ierr); fftw_free(in2); ierr = VecDestroy(&fout1);CHKERRQ(ierr); ierr = VecDestroy(&ini);CHKERRQ(ierr); ierr = VecDestroy(&final);CHKERRQ(ierr); ierr = PetscRandomDestroy(&rnd);CHKERRQ(ierr); ierr = PetscFree(indx3);CHKERRQ(ierr); ierr = PetscFree(indx4);CHKERRQ(ierr); ierr = PetscFinalize(); return ierr; }
/** This routine performs must of the work involved in the analyze modes2d command. A breakdown of what the routine does is as follows \li fftw plans and in / out arrays are initialized as required \li calculate height function is called \li The height function is fourier transformed using the fftw library. Note: argument switch_fluc switch_fluc == 1 for height grid switch_fluc == 0 for thickness */ int modes2d(fftw_complex* modes, int switch_fluc) { /* All these variables need to be static so that the fftw3 plan can be initialised and reused */ static fftw_plan mode_analysis_plan; // height grid /** Input values for the fft */ static double* height_grid; /** Output values for the fft */ static fftw_complex* result; /** Every time a change is made to the grid calculate the fftw plan for the subsequent fft and destroy any existing plans */ if ( mode_grid_changed ) { STAT_TRACE(fprintf(stderr,"%d,initializing fftw for mode analysis \n",this_node)); if ( xdir + ydir + zdir == -3 ) { char *errtxt = runtime_error(128); ERROR_SPRINTF(errtxt,"{092 attempt to perform mode analysis with uninitialized grid} "); return -1; } STAT_TRACE(fprintf(stderr,"%d,destroying old fftw plan \n",this_node)); /* Make sure all memory is free and old plan is destroyed. It's ok to call these functions on uninitialised pointers I think */ fftw_free(result); fftw_free(height_grid); fftw_destroy_plan(mode_analysis_plan); fftw_cleanup(); /* Allocate memory for input and output arrays */ height_grid = (double*) malloc((mode_grid_3d[xdir])*sizeof(double)*mode_grid_3d[ydir]); result = (fftw_complex*) malloc((mode_grid_3d[ydir]/2+1)*(mode_grid_3d[xdir])*sizeof(fftw_complex)); mode_analysis_plan = fftw_plan_dft_r2c_2d(mode_grid_3d[xdir],mode_grid_3d[ydir],height_grid, result,FFTW_ESTIMATE); STAT_TRACE(fprintf(stderr,"%d,created new fftw plan \n",this_node)); mode_grid_changed = 0; } /* Update particles */ updatePartCfg(WITHOUT_BONDS); //Make sure particles are sorted if (!sortPartCfg()) { fprintf(stderr,"%d,could not sort partCfg \n",this_node); return -1; } if ( !calc_fluctuations(height_grid, switch_fluc)) { char *errtxt = runtime_error(128); ERROR_SPRINTF(errtxt,"{034 calculation of height grid failed } "); return -1; } STAT_TRACE(fprintf(stderr,"%d,calling fftw \n",this_node)); fftw_execute(mode_analysis_plan); /* Copy result to modes */ memcpy(modes, result, mode_grid_3d[xdir]*(mode_grid_3d[ydir]/2 + 1)*sizeof(fftw_complex)); STAT_TRACE(fprintf(stderr,"%d,called fftw \n",this_node)); return 1; }
// calculate a degree 3 spline of 2 dim periodic data: // in each cell, data is approximated by degree 3 polynomial // f(x,y) = p00*N0(x)N0(y) + p01*N0(x)N1(y) + p10*N1(x)N0(y) + ... // where Ni(x) = binom(3,i)*x^i(1-x)^(3-i) spline2d_t spline2d_init(double *data, int m, int n) { int i, j, l1, l2, idx; double *d; double complex *kcx, *kcy, *kd, *kp; double complex wx[4], wy[4]; double complex kcxx, kcyy; fftw_plan cxplan, cyplan, dplan, iplan; spline2d_t myspline; myspline.nx = m; myspline.ny = n; myspline.cells = calloc(m*n,sizeof(spline2d_cell_t)); d = calloc(m*(n/2+1)*2,sizeof(double)); kcx = calloc(m,sizeof(double complex)); kcy = calloc(n,sizeof(double complex)); kd = calloc(m*(n/2+1),sizeof(double complex)); kp = calloc(m*(n/2+1),sizeof(double complex)); cxplan = fftw_plan_dft_1d(m, kcx, kcx, FFTW_BACKWARD, FFTW_MEASURE); cyplan = fftw_plan_dft_1d(n, kcy, kcy, FFTW_BACKWARD, FFTW_MEASURE); dplan = fftw_plan_dft_r2c_2d(m, n, d, kd, FFTW_MEASURE); iplan = fftw_plan_dft_c2r_2d(m, n, kp, d, FFTW_MEASURE); // initialize kcx,kcy kcx[0] = 1.0 + 0.0*I; kcx[1] = 4.0 + 0.0*I; kcx[2] = 1.0 + 0.0*I; kcy[0] = 1.0 + 0.0*I; kcy[1] = 4.0 + 0.0*I; kcy[2] = 1.0 + 0.0*I; fftw_execute(cxplan); fftw_execute(cyplan); // calculate kd for(i = 0; i < m; i++) { for(j = 0; j < n; j++) { idx = i*(n/2 + 0)*2 + j; d[idx] = data[i*n + j]; } } fftw_execute(dplan); // kd now holds fft of data // calculate p's for(l1 = 0; l1 < 4; l1++) { for(l2 = 0; l2 < 4; l2++) { for(i = 0; i < m; i++) { for(j = 0; j < n/2 + 1; j++) { idx = i*(n/2 + 1) + j; // weights calcualted here!! wx[0] = 1.0; wx[1] = cexp(2.0*M_PI*I*i/(double)m)*4.0; wx[1]+= cexp(4.0*M_PI*I*i/(double)m)*2.0; wx[2] = cexp(2.0*M_PI*I*i/(double)m)*2.0; wx[2]+= cexp(4.0*M_PI*I*i/(double)m)*4.0; wx[3] = cexp(2.0*M_PI*I*i/(double)m); wy[0] = 1.0; wy[1] = cexp(2.0*M_PI*I*j/(double)n)*4.0; wy[1]+= cexp(4.0*M_PI*I*j/(double)n)*2.0; wy[2] = cexp(2.0*M_PI*I*j/(double)n)*2.0; wy[2]+= cexp(4.0*M_PI*I*j/(double)n)*4.0; wy[3] = cexp(2.0*M_PI*I*j/(double)n); if(l1 == 0 || l1 == 3) { kcxx = 1.0; } else { kcxx = kcx[i]; } if(l2 == 0 || l2 == 3) { kcyy = 1.0; } else { kcyy = kcy[j]; } if(cabs(kcxx*kcyy) > 0) { kp[idx] = kd[idx]*wx[l1]*wy[l2]/(kcxx*kcyy); } } } fftw_execute(iplan); // d holds p[i*n + j][l1*4 + l2] unnormalized for(i = 0; i < m; i++) { for(j = 0; j < n; j++) { myspline.cells[i*n + j].p[l1][l2] = d[i*(n/2+0)*2 + j]/(double)(m*n); } } } } fftw_destroy_plan(cxplan); fftw_destroy_plan(cyplan); fftw_destroy_plan(dplan); fftw_destroy_plan(iplan); free(kcx); free(kcy); free(kd); free(kp); return myspline; }
static MagickBooleanType InverseFourierTransform(FourierInfo *fourier_info, fftw_complex *fourier,Image *image,ExceptionInfo *exception) { CacheView *image_view; double *source; fftw_plan fftw_c2r_plan; register IndexPacket *indexes; register PixelPacket *q; register ssize_t i, x; ssize_t y; source=(double *) AcquireQuantumMemory((size_t) fourier_info->height, fourier_info->width*sizeof(*source)); if (source == (double *) NULL) { (void) ThrowMagickException(exception,GetMagickModule(), ResourceLimitError,"MemoryAllocationFailed","`%s'",image->filename); return(MagickFalse); } #if defined(MAGICKCORE_OPENMP_SUPPORT) #pragma omp critical (MagickCore_InverseFourierTransform) #endif { fftw_c2r_plan=fftw_plan_dft_c2r_2d(fourier_info->width,fourier_info->height, fourier,source,FFTW_ESTIMATE); fftw_execute(fftw_c2r_plan); fftw_destroy_plan(fftw_c2r_plan); } i=0L; image_view=AcquireCacheView(image); for (y=0L; y < (ssize_t) fourier_info->height; y++) { if (y >= (ssize_t) image->rows) break; q=GetCacheViewAuthenticPixels(image_view,0L,y,fourier_info->width > image->columns ? image->columns : fourier_info->width,1UL,exception); if (q == (PixelPacket *) NULL) break; indexes=GetCacheViewAuthenticIndexQueue(image_view); for (x=0L; x < (ssize_t) fourier_info->width; x++) { switch (fourier_info->channel) { case RedChannel: default: { SetPixelRed(q,ClampToQuantum(QuantumRange*source[i])); break; } case GreenChannel: { SetPixelGreen(q,ClampToQuantum(QuantumRange*source[i])); break; } case BlueChannel: { SetPixelBlue(q,ClampToQuantum(QuantumRange*source[i])); break; } case OpacityChannel: { SetPixelOpacity(q,ClampToQuantum(QuantumRange*source[i])); break; } case IndexChannel: { SetPixelIndex(indexes+x,ClampToQuantum(QuantumRange* source[i])); break; } case GrayChannels: { SetPixelGray(q,ClampToQuantum(QuantumRange*source[i])); break; } } i++; q++; } if (SyncCacheViewAuthenticPixels(image_view,exception) == MagickFalse) break; } image_view=DestroyCacheView(image_view); source=(double *) RelinquishMagickMemory(source); return(MagickTrue); }
static MagickBooleanType ForwardFourierTransform(FourierInfo *fourier_info, const Image *image,double *magnitude,double *phase,ExceptionInfo *exception) { CacheView *image_view; double n, *source; fftw_complex *fourier; fftw_plan fftw_r2c_plan; register const IndexPacket *indexes; register const PixelPacket *p; register ssize_t i, x; ssize_t y; /* Generate the forward Fourier transform. */ source=(double *) AcquireQuantumMemory((size_t) fourier_info->height, fourier_info->width*sizeof(*source)); if (source == (double *) NULL) { (void) ThrowMagickException(exception,GetMagickModule(), ResourceLimitError,"MemoryAllocationFailed","`%s'",image->filename); return(MagickFalse); } ResetMagickMemory(source,0,fourier_info->height*fourier_info->width* sizeof(*source)); i=0L; image_view=AcquireCacheView(image); for (y=0L; y < (ssize_t) fourier_info->height; y++) { p=GetCacheViewVirtualPixels(image_view,0L,y,fourier_info->width,1UL, exception); if (p == (const PixelPacket *) NULL) break; indexes=GetCacheViewVirtualIndexQueue(image_view); for (x=0L; x < (ssize_t) fourier_info->width; x++) { switch (fourier_info->channel) { case RedChannel: default: { source[i]=QuantumScale*GetPixelRed(p); break; } case GreenChannel: { source[i]=QuantumScale*GetPixelGreen(p); break; } case BlueChannel: { source[i]=QuantumScale*GetPixelBlue(p); break; } case OpacityChannel: { source[i]=QuantumScale*GetPixelOpacity(p); break; } case IndexChannel: { source[i]=QuantumScale*GetPixelIndex(indexes+x); break; } case GrayChannels: { source[i]=QuantumScale*GetPixelGray(p); break; } } i++; p++; } } image_view=DestroyCacheView(image_view); fourier=(fftw_complex *) AcquireQuantumMemory((size_t) fourier_info->height, fourier_info->center*sizeof(*fourier)); if (fourier == (fftw_complex *) NULL) { (void) ThrowMagickException(exception,GetMagickModule(), ResourceLimitError,"MemoryAllocationFailed","`%s'",image->filename); source=(double *) RelinquishMagickMemory(source); return(MagickFalse); } #if defined(MAGICKCORE_OPENMP_SUPPORT) #pragma omp critical (MagickCore_ForwardFourierTransform) #endif fftw_r2c_plan=fftw_plan_dft_r2c_2d(fourier_info->width,fourier_info->width, source,fourier,FFTW_ESTIMATE); fftw_execute(fftw_r2c_plan); fftw_destroy_plan(fftw_r2c_plan); source=(double *) RelinquishMagickMemory(source); /* Normalize Fourier transform. */ n=(double) fourier_info->width*(double) fourier_info->width; i=0L; for (y=0L; y < (ssize_t) fourier_info->height; y++) for (x=0L; x < (ssize_t) fourier_info->center; x++) { #if defined(MAGICKCORE_HAVE_COMPLEX_H) fourier[i]/=n; #else fourier[i][0]/=n; fourier[i][1]/=n; #endif i++; } /* Generate magnitude and phase (or real and imaginary). */ i=0L; if (fourier_info->modulus != MagickFalse) for (y=0L; y < (ssize_t) fourier_info->height; y++) for (x=0L; x < (ssize_t) fourier_info->center; x++) { magnitude[i]=cabs(fourier[i]); phase[i]=carg(fourier[i]); i++; } else for (y=0L; y < (ssize_t) fourier_info->height; y++) for (x=0L; x < (ssize_t) fourier_info->center; x++) { magnitude[i]=creal(fourier[i]); phase[i]=cimag(fourier[i]); i++; } fourier=(fftw_complex *) RelinquishMagickMemory(fourier); return(MagickTrue); }
/*--------------------------------------------------------------------------*/ void _fftwD (fftw_plan p) /*destroy*/ { fftw_destroy_plan(p); return; }
/** * Function used only internally, to create an FFTW plan for a specified problem (thereby adding to wisdom) */ int plan_problem(char type, /**< 'r' for real or 'c' for complex transform */ char direc, /**< 'f' for forward or 'b'/'r' for backward/reverse transform */ UINT4 transform_size, /**< Size of transform to plan */ int measurelvl) /**< Level of patience in planning (0 least, 3 most) */ { fftw_plan genericPlan; void *indata, *outdata; int fwdflag, planning_flags; fwdflag = ( (direc=='f') || (direc=='F') ); /* We call FFTW routines directly, rather than through LAL, so that if XLAL planning routines are changed to always read in wisdom, we can still toggle that behavior through the command line. In case we ever allow for aligned memory, we allocate everything with fftw_malloc(). */ /* If we ever allow for aligned memory, this will have to toggle depending on input: */ planning_flags = FFTW_UNALIGNED; switch(measurelvl) { case 0: planning_flags |= FFTW_ESTIMATE; break; default: case 3: planning_flags |= FFTW_EXHAUSTIVE; /* Fall through: */ case 2: planning_flags |= FFTW_PATIENT; /* Fall through */ case 1: planning_flags |= FFTW_MEASURE; break; } /* Ugly, but makes us 'locale' independent */ if ( (type=='r') || (type=='R') ) { indata = (double *) fftw_malloc(transform_size*sizeof(double)); outdata = (double *) fftw_malloc(transform_size*sizeof(double)); if ( (!indata) || (!outdata) ) { if (indata) fftw_free(indata); if (outdata) fftw_free(outdata); return 1; } genericPlan = fftw_plan_r2r_1d(transform_size,indata,outdata, (fwdflag ? FFTW_R2HC : FFTW_HC2R), planning_flags); if (!genericPlan) { fftw_free(indata); fftw_free(outdata); return 1; } else { fftw_free(indata); fftw_free(outdata); fftw_destroy_plan(genericPlan); return 0; } } else { /* type == 'c' */ indata = (fftw_complex *) fftw_malloc(transform_size*sizeof(fftw_complex)); outdata = (fftw_complex *) fftw_malloc(transform_size*sizeof(fftw_complex)); if ( (!indata) || (!outdata) ) { if (indata) fftw_free(indata); if (outdata) fftw_free(outdata); return 1; } genericPlan = fftw_plan_dft_1d(transform_size,indata,outdata, (fwdflag ? FFTW_FORWARD : FFTW_BACKWARD), planning_flags); if (!genericPlan) { fftw_free(indata); fftw_free(outdata); return 1; } else { fftw_free(indata); fftw_free(outdata); fftw_destroy_plan(genericPlan); return 0; } } }
/*==============================*/ int main (void) { fftw_complex *deltak; double *deltar; fftw_plan plan; long int nn = 1024; // This needs to be changed to the value of nc long int cn2= nn/2 + 1; long int N = nn*nn*nn; long int Nc = nn*nn*cn2; deltak = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * Nc); deltar = (double *) fftw_malloc(N * sizeof(double)); plan = fftw_plan_dft_r2c_3d(nn, nn, nn, deltar, deltak, FFTW_ESTIMATE); /*===Finishing FFTW initialization===*/ long int i, j, k, index_0; float input; FILE *DEN = fopen(path, "rb"); printf("Reading..\n"); for(k = 0; k < nn; k++) { for(j = 0; j < nn; j++) { for(i = 0; i < nn; i++) { index_0 = index_f(k, j, i); fread(&input, 4, 1, DEN); deltar[index_0] = input - 1.; } } } printf("Finish reading...\n"); /*===Finishing reading data===*/ double r_part, i_part, deltak2; fftw_execute(plan); printf("Finish fftw...\n"); for(k = 0; k < nn; k++) { printf("Squareing...%d\n", k); for(j = 0; j < nn; j++) { for(i = 0; i < cn2; i++) { index_0 = index_k(k, j, i); r_part = deltak[index_0][0]; i_part = deltak[index_0][1]; deltak2 = r_part*r_part + i_part*i_part; deltak[index_0][0] = deltak2; } } } printf("Finish square...\n"); /*===Finishing FFTW & Calculating module===*/ long int x, y, z, ka, kb; double k2, k_mag, kv2, kv_mag, kz2, kz_mag, logk, logkv, logkz, Dlogk; double sincx, sincy, sincz, window; long int count[bins][bins] = {0.}; double k_bin[bins][bins] = {0.}; double p_bin[bins][bins] = {0.}; Dlogk = (log10(nn/2) - log10(1))/bins; //Calculating spacing in log bins printf("%f\t%f\t%d\t%f\t\n", log10(nn/2), log10(1), bins, Dlogk); printf("%d\t%d\n", nn, nn/2); for(k = 0; k < nn; k++) { printf("complicate process~~~%d\n", k); for(j = 0; j < nn; j++) { for(i = 0; i < cn2; i++) { index_0 = index_k(k, j, i); //index to find that element x = i; // index to calculating wavenumber if(j < cn2) y = j; else y = nn - j; if(k < cn2) z = k; else z = nn - k; k2 = x*x + y*y + z*z; kv2 = x*x + y*y; kz2 = z*z; if (k2 == 0) continue; if (kv2 == 0) { kb = 0; } else { kv_mag = sqrt(kv2); logkv = log10(kv_mag); kb = logkv/Dlogk; } if (kz2 == 0) { ka = 0; } else { kz_mag = sqrt(kz2); logkz = log10(kz_mag); ka = logkz/Dlogk; } if(ka >= bins) continue; if(kb >= bins) continue; if(x == 0) sincx = 1.; else sincx = sin(PI*x/nn)/(PI*x/nn); if(y == 0) sincy = 1.; else sincy = sin(PI*y/nn)/(PI*y/nn); if(z == 0) sincz = 1.; else sincz = sin(PI*z/nn)/(PI*z/nn); window = pow(sincx, 2)*pow(sincy, 2)*pow(sincz, 2); count[ka][kb] = count[ka][kb] + 1; k_bin[ka][kb] = k_bin[ka][kb] + k_mag; p_bin[ka][kb] = p_bin[ka][kb] + deltak[index_0][0]/window; } } } printf("finish bins...\n"); for(i = 0; i < bins; i++) { for(j = 0; j < bins; j++) { printf("avaraging bins...%d\n", i); k_bin[i][j] = k_bin[i][j]/count[i][j]; p_bin[i][j] = p_bin[i][j]/count[i][j]; // k_bin[i] = k_bin[i]*kf; p_bin[i][j] = p_bin[i][j]*pow(box, 3)/pow(nn, 6); // p_bin[i] = p_bin[i]*pow(box, 3)/pow(nn, 6)*pow(k_bin[i], 3)/2./PI/PI; } } /*===Deconvolving window function & Caculating power specturm in bins===*/ FILE *OUT = fopen(pathout, "w"); for(i = 0; i < bins; i++) { for(j = 0; j < bins; j++) { fprintf(OUT, "%e\t", p_bin[i][j]); //printf("Outputing...%d\n", i); } fprintf(OUT, "\n"); } /*============================*/ fftw_destroy_plan(plan); fftw_free(deltak); fftw_free(deltar); return 0; }
void F77_FUNC_(fftw_f77_destroy_plan,FFTW_F77_DESTROY_PLAN) (fftw_plan *p) { fftw_destroy_plan(*p); }
void psi_do_power_spectrum(psi_grid* grid, psi_real* P, psi_real* kk, psi_real dk, psi_int nbins) { fftw_plan p; fftw_complex* rhok; psi_int ax, i, j, k, ll; psi_dvec halfn, dims; psi_rvec kvec, L; psi_real ksc, zre, zim, kvol, kvolfac; for(ax = 0; ax < 3; ++ax) { dims.ijk[ax] = grid->n.ijk[ax]; halfn.ijk[ax] = dims.ijk[ax]/2 + 1; L.xyz[ax] = grid->window[1].xyz[ax]-grid->window[0].xyz[ax]; } psi_int *cellcount = (psi_int*) psi_malloc(nbins*sizeof(psi_int)); memset(cellcount, 0, nbins*sizeof(psi_int)); // do the FFT and bin the power rhok = (fftw_complex*) fftw_malloc(dims.i*dims.j*halfn.k*sizeof(fftw_complex)); p = fftw_plan_dft_r2c_3d(dims.i, dims.j, dims.k, grid->fields[0], rhok, FFTW_ESTIMATE); fftw_execute(p); fftw_destroy_plan(p); for(i = 0; i < dims.i; ++i) for(j = 0; j < dims.j; ++j) for(k = 0; k < halfn.k; ++k) { // compute wavenumbers (arbitrary units) kvec.x = TWO_PI/L.x * ((i < halfn.i)? i : (i - dims.i)); kvec.y = TWO_PI/L.y * ((j < halfn.j)? j : (j - dims.j)); kvec.z = TWO_PI/L.z * k; ksc = sqrt(kvec.x*kvec.x + kvec.y*kvec.y + kvec.z*kvec.z); // get the index and bin the power ll = floor(ksc/dk); if(ll < nbins) { zre = rhok[dims.j*halfn.k*i + halfn.k*j + k][0]; zim = rhok[dims.j*halfn.k*i + halfn.k*j + k][1]; P[ll] += zre*zre + zim*zim; ++cellcount[ll]; } } fftw_free(rhok); // normalize // the factor of 2 is for the real FFT, // we have only counted the +- mode pairs once //kvolfac = for(ll = 0; ll < nbins; ++ll) { kk[ll] = dk*(ll+0.5); P[ll] *= 2.0/(dims.i*dims.j*dims.k); P[ll] /= cellcount[ll]; //kvol = (FOUR_PI*dk*dk*dk/3.0)*(1+3*ll+3*ll*ll); //P[ll] *= 1.0/kvol; } free(cellcount); }
void simulateGrid(Grid * grid, int numTimeSteps, double finalTime, int mutliFlag) { int globalGridDimX = grid->globalGridDimX ; int globalGridDimY = grid->globalGridDimY ; int localDimX = grid->localGridDimX ; int localDimY = grid->localGridDimY ; int globalOffset = grid->globalOffset ; double * gridPointsTrTime0 = fftw_alloc_real(grid->allocScheme) ; double * gridPointsTrTimeT = fftw_alloc_real(grid->allocScheme) ; ptrdiff_t y,x,kx,ky ; fftw_plan planFor ; fftw_plan planBack ; planFor = fftw_mpi_plan_r2r_2d(globalGridDimY, globalGridDimX, grid->gridPoints, gridPointsTrTime0, MPI_COMM_WORLD, FFTW_REDFT10, FFTW_REDFT10, FFTW_ESTIMATE) ; planBack = fftw_mpi_plan_r2r_2d(globalGridDimY, globalGridDimX, gridPointsTrTimeT, grid->gridPoints, MPI_COMM_WORLD, FFTW_REDFT01 , FFTW_REDFT01, FFTW_ESTIMATE) ; // Transform phi_0 fftw_execute(planFor) ; // If multiflag passed then every timestep is printed. // if (mutliFlag) { char filename[10] ; sprintf(filename, "out%d.txt", rank) ; FILE * out = fopen(filename, "w") ; for (int i = 0 ; i < numTimeSteps ; ++i) { double timeT = (i/(double)numTimeSteps)*finalTime ; // Advance transformed F(phi_0) to F(phi_t_i) for (kx = 0 ; kx < localDimX ; ++kx) { for (ky = 0 ; ky < localDimY ; ++ky) { ptrdiff_t globalky = ky+globalOffset ; gridPointsTrTimeT[ky*localDimX+kx] = gridPointsTrTime0[ky*localDimX+kx]*exp(-PI*PI*timeT*(kx*kx + globalky*globalky)) ; } } // Transform back from F(phi_t) to phi_t fftw_execute(planBack) ; // Multiply by normalisation constant 1/(2N) * (1/(2N)). // for (x = 0 ; x < localDimX ; ++x) { for (y = 0 ; y < localDimY ; ++y) { grid->gridPoints[y*localDimX+x] /= (double)(4*globalGridDimX*globalGridDimY) ; } } printSplotData(grid,out) ; fprintf(out,"\n\n") ; } fclose(out) ; } // Else only evolve until final timestep. // else { char filename[10] ; sprintf(filename, "out%d.txt", rank) ; FILE * out = fopen(filename, "w") ; // Advance transformed F(phi_0) to F(phi_t_final) for (kx = 0 ; kx < localDimX ; ++kx) { for (ky = 0 ; ky < localDimY ; ++ky) { ptrdiff_t globalky = ky+globalOffset ; gridPointsTrTimeT[ky*localDimX+kx] = gridPointsTrTime0[ky*localDimX+kx]*exp(-PI*PI*finalTime*(kx*kx + globalky*globalky)) ; } } // Transform back from F(phi_t) to phi_t fftw_execute(planBack) ; // Multiply by normalisation constant 1/(2N) * (1/(2N)). // for (x = 0 ; x < localDimX ; ++x) { for (y = 0 ; y < localDimY ; ++y) { grid->gridPoints[y*localDimX+x] /= (double)(4*globalGridDimX*globalGridDimY) ; } } printSplotData(grid,out) ; fclose(out) ; } // Clean up resources. // fftw_destroy_plan(planFor); fftw_destroy_plan(planBack); fftw_free(gridPointsTrTime0) ; fftw_free(gridPointsTrTimeT) ; } /* ----- end of function simulateGrid ----- */
PetscInt main(PetscInt argc,char **args) { PetscErrorCode ierr; PetscMPIInt rank,size; PetscInt N0=50,N1=20,N=N0*N1; PetscRandom rdm; PetscScalar a; PetscReal enorm; Vec x,y,z; PetscBool view=PETSC_FALSE,use_interface=PETSC_TRUE; ierr = PetscInitialize(&argc,&args,(char *)0,help);CHKERRQ(ierr); #if !defined(PETSC_USE_COMPLEX) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP, "This example requires complex numbers"); #endif ierr = PetscOptionsBegin(PETSC_COMM_WORLD, PETSC_NULL, "FFTW Options", "ex143");CHKERRQ(ierr); ierr = PetscOptionsBool("-vec_view_draw", "View the vectors", "ex143", view, &view, PETSC_NULL);CHKERRQ(ierr); ierr = PetscOptionsBool("-use_FFTW_interface", "Use PETSc-FFTW interface", "ex143",use_interface, &use_interface, PETSC_NULL);CHKERRQ(ierr); ierr = PetscOptionsEnd();CHKERRQ(ierr); ierr = PetscOptionsGetBool(PETSC_NULL,"-use_FFTW_interface",&use_interface,PETSC_NULL);CHKERRQ(ierr); ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr); ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr); ierr = PetscRandomCreate(PETSC_COMM_WORLD, &rdm);CHKERRQ(ierr); ierr = PetscRandomSetFromOptions(rdm);CHKERRQ(ierr); if (!use_interface){ /* Use mpi FFTW without PETSc-FFTW interface, 2D case only */ /*---------------------------------------------------------*/ fftw_plan fplan,bplan; fftw_complex *data_in,*data_out,*data_out2; ptrdiff_t alloc_local,local_n0,local_0_start; if (!rank) printf("Use FFTW without PETSc-FFTW interface\n"); fftw_mpi_init(); N = N0*N1; alloc_local = fftw_mpi_local_size_2d(N0,N1,PETSC_COMM_WORLD,&local_n0,&local_0_start); data_in = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); data_out = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); data_out2 = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,1,(PetscInt)local_n0*N1,(PetscInt)N,(const PetscScalar*)data_in,&x);CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject) x, "Real Space vector");CHKERRQ(ierr); ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,1,(PetscInt)local_n0*N1,(PetscInt)N,(const PetscScalar*)data_out,&y);CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject) y, "Frequency space vector");CHKERRQ(ierr); ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,1,(PetscInt)local_n0*N1,(PetscInt)N,(const PetscScalar*)data_out2,&z);CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject) z, "Reconstructed vector");CHKERRQ(ierr); fplan = fftw_mpi_plan_dft_2d(N0,N1,data_in,data_out,PETSC_COMM_WORLD,FFTW_FORWARD,FFTW_ESTIMATE); bplan = fftw_mpi_plan_dft_2d(N0,N1,data_out,data_out2,PETSC_COMM_WORLD,FFTW_BACKWARD,FFTW_ESTIMATE); ierr = VecSetRandom(x, rdm);CHKERRQ(ierr); if (view){ierr = VecView(x,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);} fftw_execute(fplan); if (view){ierr = VecView(y,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);} fftw_execute(bplan); /* Compare x and z. FFTW computes an unnormalized DFT, thus z = N*x */ a = 1.0/(PetscReal)N; ierr = VecScale(z,a);CHKERRQ(ierr); if (view){ierr = VecView(z, PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);} ierr = VecAXPY(z,-1.0,x);CHKERRQ(ierr); ierr = VecNorm(z,NORM_1,&enorm);CHKERRQ(ierr); if (enorm > 1.e-11){ ierr = PetscPrintf(PETSC_COMM_SELF," Error norm of |x - z| %G\n",enorm);CHKERRQ(ierr); } /* Free spaces */ fftw_destroy_plan(fplan); fftw_destroy_plan(bplan); fftw_free(data_in); ierr = VecDestroy(&x);CHKERRQ(ierr); fftw_free(data_out); ierr = VecDestroy(&y);CHKERRQ(ierr); fftw_free(data_out2);ierr = VecDestroy(&z);CHKERRQ(ierr); } else { /* Use PETSc-FFTW interface */ /*-------------------------------------------*/ PetscInt i,*dim,k,DIM; Mat A; N=1; for (i=1; i<5; i++){ DIM = i; ierr = PetscMalloc(i*sizeof(PetscInt),&dim);CHKERRQ(ierr); for(k=0;k<i;k++){ dim[k]=30; } N *= dim[i-1]; /* Create FFTW object */ if (!rank) printf("Use PETSc-FFTW interface...%d-DIM:%d \n",DIM,N); ierr = MatCreateFFT(PETSC_COMM_WORLD,DIM,dim,MATFFTW,&A);CHKERRQ(ierr); /* Create vectors that are compatible with parallel layout of A - must call MatGetVecs()! */ ierr = MatGetVecsFFTW(A,&x,&y,&z);CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject) x, "Real space vector");CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject) y, "Frequency space vector");CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject) z, "Reconstructed vector");CHKERRQ(ierr); /* Set values of space vector x */ ierr = VecSetRandom(x,rdm);CHKERRQ(ierr); if (view){ierr = VecView(x,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);} // Apply FFTW_FORWARD and FFTW_BACKWARD ierr = MatMult(A,x,y);CHKERRQ(ierr); if (view){ierr = VecView(y,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);} ierr = MatMultTranspose(A,y,z);CHKERRQ(ierr); // Compare x and z. FFTW computes an unnormalized DFT, thus z = N*x a = 1.0/(PetscReal)N; ierr = VecScale(z,a);CHKERRQ(ierr); if (view){ierr = VecView(z,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);} ierr = VecAXPY(z,-1.0,x);CHKERRQ(ierr); ierr = VecNorm(z,NORM_1,&enorm);CHKERRQ(ierr); if (enorm > 1.e-9 && !rank){ ierr = PetscPrintf(PETSC_COMM_SELF," Error norm of |x - z| %e\n",enorm);CHKERRQ(ierr); } ierr = VecDestroy(&x);CHKERRQ(ierr); ierr = VecDestroy(&y);CHKERRQ(ierr); ierr = VecDestroy(&z);CHKERRQ(ierr); ierr = MatDestroy(&A);CHKERRQ(ierr); ierr = PetscFree(dim);CHKERRQ(ierr); } } ierr = PetscRandomDestroy(&rdm);CHKERRQ(ierr); ierr = PetscFinalize(); return 0; }
void CosPmlTableGen(int bw, int m, double *tablespace, double *workspace) { double *prev, *prevprev, *temp1, *temp2, *temp3, *temp4; double *x_i, *eval_args; double *tableptr, *cosres ; int i, j, k; /* fftw stuff now */ double fudge ; fftw_plan p ; prevprev = workspace; prev = prevprev + bw; temp1 = prev + bw; temp2 = temp1 + bw; temp3 = temp2 + bw; temp4 = temp3 + bw; x_i = temp4 + bw; eval_args = x_i + bw; cosres = eval_args + bw; tableptr = tablespace; /* make fftw plan */ p = fftw_plan_r2r_1d( bw, temp4, cosres, FFTW_REDFT10, FFTW_ESTIMATE ) ; /* main loop */ /* Set the initial number of evaluation points to appropriate amount */ /* now get the evaluation nodes */ EvalPts(bw,x_i); ArcCosEvalPts(bw,eval_args); /* set initial values of first two Pmls */ for (i=0; i<bw; i++) prevprev[i] = 0.0; if (m == 0) for (i=0; i<bw; i++) prev[i] = 0.707106781186547; /* sqrt(1/2) */ else Pmm_L2(m, eval_args, bw, prev); if ( m % 2 ) /* need to divide out sin x */ for (i=0; i<bw; i++) prev[i] /= sin(eval_args[i]); /* set k to highest degree coefficient */ if ((m % 2) == 0) k = m; else k = m-1; /* now compute cosine transform */ memcpy( temp4, prev, sizeof(double) * bw ); fftw_execute( p ); cosres[0] *= 0.707106781186547 ; fudge = 1. / sqrt(((double) bw ) ); for ( i = 0 ; i < bw ; i ++ ) cosres[i] *= fudge ; /* store what I've got so far */ for (i=0; i<=k; i+=2) tableptr[i/2] = cosres[i]; /* update tableptr */ tableptr += k/2+1; /* now generate remaining pmls */ for (i=0; i<bw-m-1; i++) { vec_mul(L2_cn(m,m+i),prevprev,temp1,bw); vec_pt_mul(prev, x_i, temp2, bw); vec_mul(L2_an(m,m+i), temp2, temp3, bw); vec_add(temp3, temp1, temp4, bw); /* temp4 now contains P(m,m+i+1) */ /* compute cosine transform */ fftw_execute( p ); cosres[0] *= 0.707106781186547 ; for ( j = 0 ; j < bw ; j ++ ) cosres[j] *= fudge ; /* update degree counter */ k++; /* now put decimated result into table */ if ( i % 2 ) for (j=0; j<=k; j+=2) tableptr[j/2] = cosres[j]; else for (j=1; j<=k; j+=2) tableptr[j/2] = cosres[j]; /* update tableptr */ tableptr += k/2+1; /* now update Pi and P(i+1) */ memcpy(prevprev, prev, sizeof(double) * bw); memcpy(prev, temp4, sizeof(double) * bw); } fftw_destroy_plan( p ); }
void fft_3d(FFT_DATA *in, FFT_DATA *out, int flag, struct fft_plan_3d *plan) { int i,total,length,num; double norm; FFT_DATA *data,*copy; /* pre-remap to prepare for 1st FFTs if needed copy = loc for remap result */ if (plan->pre_plan) { if (plan->pre_target == 0) copy = out; else copy = plan->copy; remap_3d((double *) in, (double *) copy, (double *) plan->scratch, plan->pre_plan); data = copy; } else data = in; // --------------------------------------------------------------------------- // 1d FFTs along mid axis // --------------------------------------------------------------------------- total = plan->total1; length = plan->length1; { int sign = flag == +1 ? FFTW_FORWARD : FFTW_BACKWARD; int N = length; fftw_plan fftplan = fftw_plan_many_dft(1, &N, total/length, data, NULL, 1, length, data, NULL, 1, length, sign, FFTW_ESTIMATE); fftw_execute(fftplan); fftw_destroy_plan(fftplan); } /* 1st mid-remap to prepare for 2nd FFTs copy = loc for remap result */ if (plan->mid1_target == 0) copy = out; else copy = plan->copy; remap_3d((double *) data, (double *) copy, (double *) plan->scratch, plan->mid1_plan); data = copy; // --------------------------------------------------------------------------- // 1d FFTs along mid axis // --------------------------------------------------------------------------- total = plan->total2; length = plan->length2; { int sign = flag == +1 ? FFTW_FORWARD : FFTW_BACKWARD; int N = length; fftw_plan fftplan = fftw_plan_many_dft(1, &N, total/length, data, NULL, 1, length, data, NULL, 1, length, sign, FFTW_ESTIMATE); fftw_execute(fftplan); fftw_destroy_plan(fftplan); } /* 2nd mid-remap to prepare for 3rd FFTs copy = loc for remap result */ if (plan->mid2_target == 0) copy = out; else copy = plan->copy; remap_3d((double *) data, (double *) copy, (double *) plan->scratch, plan->mid2_plan); data = copy; // --------------------------------------------------------------------------- // 1d FFTs along slow axis // --------------------------------------------------------------------------- total = plan->total3; length = plan->length3; { int sign = flag == +1 ? FFTW_FORWARD : FFTW_BACKWARD; int N = length; fftw_plan fftplan = fftw_plan_many_dft(1, &N, total/length, data, NULL, 1, length, data, NULL, 1, length, sign, FFTW_ESTIMATE); fftw_execute(fftplan); fftw_destroy_plan(fftplan); } /* post-remap to put data in output format if needed destination is always out */ if (plan->post_plan) remap_3d((double *) data, (double *) out, (double *) plan->scratch, plan->post_plan); /* scaling if required */ if (flag == 1 && plan->scaled) { norm = plan->norm; num = plan->normnum; for (i = 0; i < num; i++) { out[i][0] *= norm; out[i][1] *= norm; } } }
// Called at the end of each timestep. Intended for adding effects to creative scenes. void sceneScriptingCallback() { //The fourier transform shit goes here int i = 0; double x; char *tempBuf; double tx, amplitude; int tempVal = 0; for(i = 0; i<SIZE; i++) { tempBuf = fgets(fileBuf, 100, fp); if(tempBuf) { sscanf(fileBuf, "%*[ ]%lf%*[ ]%lf", &tx, &litude); // printf("Double: count: %d %lf, %lf\n", i, tx, amplitude); in[i][0] = amplitude; in[i][1] = 0; } else { fileFinished=true; break; } } if(!fileFinished) { p = fftw_plan_dft_1d(i, in, out, FFTW_FORWARD, FFTW_ESTIMATE); fftw_execute(p); tempVal = i; for(i = 0; i < tempVal; i++) { x = sqrt(out[i][0] * out[i][0] + out[i][1] * out[i][1]); // printf("Fourier: count: %d\t%f\n", i, x); } fftw_destroy_plan(p); } double buckets[10]; int temp = (SIZE/2)/10; for(int a = 0; a < 10; a++) { double avg = 0; double t = (temp*a+temp<(SIZE/2))?temp*a+temp:(SIZE/2); for(int b = temp*a; b<t; b++) { x = sqrt(out[b][0] * out[b][0] + out[b][1] * out[b][1]); avg+=x; } avg/=temp; buckets[a] = avg; // std::cout << "Printing da buckets " << buckets[a] << std::endl; } if(g_scene_tag == "Visualizer") { // Get the particle tags const std::vector<std::string>& tags = (*g_scene).getParticleTags(); // Get the particle positions VectorXs& x = (*g_scene).getX(); // Get the particle velocities VectorXs& v = (*g_scene).getV(); for(int i = 0; i < tags.size(); i++) { //v[2*i+1] = 1; std::string tempTag = tags[i]; //Parsing the tag: std::size_t found = tempTag.find("mobile"); if(found!=std::string::npos) { int len = tempTag.length(); int num = tempTag[len - 1] - '0'; if(buckets[num]<=1) { x[2*i] = 100; x[2*i+1] = 100; } else { x[2*i] = -2 + 0.5*num; x[2*i+1] = -3; } } found = tempTag.find("spring"); if(found!=std::string::npos) { int len = tempTag.length(); int num = tempTag[len - 1] - '0'; x[2*i+1]=scalingFn(buckets[num]); } /* std::string tempTag = tags[i]; std::size_t found = tempTag.find("bucket"); if(found!=std::string::npos) { int len = tempTag.length(); int num = tempTag[len-1]-'0'; if(v[2*i]>=buckets[num] || v[2*i+1]>=buckets[num]) { v[2*i] -= buckets[num]*(rand()%10); v[2*i+1] -= buckets[num]*(rand()%10); } else { v[2*i] += buckets[num]*(rand()%10); v[2*i+1] += buckets[num]*(rand()%10); } } */ } } memset(in, 0, SIZE*sizeof(fftw_complex) ); memset(out, 0, SIZE*sizeof(fftw_complex) ); }
// real forward int check_rf(int n, int mode, int veclen, int sizeOfVect) { int i, j; DFT *p = DFT_init(mode, n, DFT_FLAG_REAL); fftw_plan w[n]; double *in[sizeOfVect]; fftw_complex *out[sizeOfVect]; REAL *sx = SIMDBase_alignedMalloc(sizeOfVect*n*2); // for(j=0;j<veclen;j++) { in[j] = (double *) fftw_malloc(sizeof(double) * n); out[j] = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * (n/2+1)); w[j] = fftw_plan_dft_r2c_1d(n, in[j], out[j], FFTW_ESTIMATE); for(i=0;i<n;i++) { double re = random() / (double)RAND_MAX; sx[i*veclen+j] = re; in[j][i] = re; } } // DFT_execute(p, mode, sx, -1); for(j=0;j<veclen;j++) { fftw_execute(w[j]); } // int success = 1; for(j=0;j<veclen;j++) { for(i=0;i<n/2;i++) { if (i == 0) { if (fabs(sx[(i*2+0)*veclen+j] - creal(out[j][0])) > THRES) success = 0; if (fabs(sx[(i*2+1)*veclen+j] - creal(out[j][n/2])) > THRES) success = 0; } else { if (fabs(sx[(i*2+0)*veclen+j] - creal(out[j][i])) > THRES) success = 0; if (fabs(sx[(i*2+1)*veclen+j] - cimag(out[j][i])) > THRES) success = 0; } } } // for(j=0;j<veclen;j++) { fftw_destroy_plan(w[j]); fftw_free(in[j]); fftw_free(out[j]); } SIMDBase_alignedFree(sx); DFT_dispose(p, mode); // return success; }
void SPB::BandSolver_Ez::ShiftInv(const complex_t &shift, const complex_t *x, complex_t *y) const{ const int Ngrid = res[0]*res[1]; size_t n = 4*Ngrid; RNP::TBLAS::Copy(n, x,1, y,1); // Invert V and P fields // For zero shift, the main E/H field block matrix inverse is unchanged // v = inv(D) h // u = inv(C - W^H inv(D) W) (g - W^H v) for(int i = 0; i < res[0]; ++i){ for(int j = 0; j < res[1]; ++j){ const int tag = impl->ind[2*IDX(i,j)+1]; if(tag < 0){ continue; } const int row0 = impl->ind[2*IDX(i,j)+0]; const Material &mat = material[tag]; const int np = mat.poles.size(); for(int p = 0; p < np; ++p){ const LorentzPole &pole = mat.poles[p]; const complex_t iwp = complex_t(0,pole.omega_p) * mat.eps_inf.value[8]; const complex_t i_w0 = complex_t(0,1./pole.omega_0) / mat.eps_inf.value[8]; const complex_t iG_w0w0 = i_w0 * pole.Gamma / pole.omega_0; y[row0 + 2*p + 0] = -i_w0 * x[row0 + 2*p + 1]; y[row0 + 2*p + 1] = i_w0 * x[row0 + 2*p + 0] + iG_w0w0 * x[row0 + 2*p + 1]; y[EZ_OFF + IDX(i,j)] += iwp * y[row0 + 2*p + 0]; } } } // Data layout: divH, Ez, Hx, Hy fftw_plan plan_forward = fftw_plan_many_dft( 2/*rank*/, res, 4 /*howmany*/, (fftw_complex*)y, NULL/*inembed*/, 1/*istride*/, Ngrid/*idist*/, (fftw_complex*)y, NULL/*onembed*/, 1/*ostride*/, Ngrid/*odist*/, FFTW_BACKWARD, FFTW_ESTIMATE); fftw_plan plan_backward = fftw_plan_many_dft( 2/*rank*/, res, 4 /*howmany*/, (fftw_complex*)y, NULL/*inembed*/, 1/*istride*/, Ngrid/*idist*/, (fftw_complex*)y, NULL/*onembed*/, 1/*ostride*/, Ngrid/*odist*/, FFTW_FORWARD, FFTW_ESTIMATE); const double kshiftsign = 1.0; for(int i = 0; i < res[0]; ++i){ for(int j = 0; j < res[1]; ++j){ double phase = kshiftsign*2*M_PI*(last_k[0]*(double)i/res[0] + last_k[1]*(double)j/res[1]); y[HX_OFF + IDX(i,j)] *= complex_t(cos(phase), sin(phase)); y[HY_OFF + IDX(i,j)] *= complex_t(cos(phase), sin(phase)); y[EZ_OFF + IDX(i,j)] *= complex_t(cos(phase), sin(phase)); y[DIVH_OFF+IDX(i,j)] *= complex_t(cos(phase), sin(phase)); } } fftw_execute(plan_forward); complex_t A[4*4], b[4]; size_t ipiv[4]; for(int i = 0; i < res[0]; ++i){ const int fi = (i > res[0]/2 ? i-res[0] : i); for(int j = 0; j < res[1]; ++j){ const int fj = (j > res[1]/2 ? j-res[1] : j); double kpG[2] = { (L.Lk[0]*(last_k[0]+fi) + L.Lk[2]*(last_k[1]+fj)), (L.Lk[1]*(last_k[0]+fi) + L.Lk[3]*(last_k[1]+fj)) }; kpG[0] *= 2*M_PI; kpG[1] *= 2*M_PI; const double klen2 = kpG[0]*kpG[0] + kpG[1]*kpG[1]; const double klen = sqrt(klen2); // At the Gamma point, project out the constant basis vector if(klen < std::numeric_limits<double>::epsilon() * L.CharacteristicKLength()){ y[DIVH_OFF+IDX(i,j)] = 0; y[EZ_OFF + IDX(i,j)] = 0; y[HX_OFF + IDX(i,j)] = 0; y[HY_OFF + IDX(i,j)] = 0; continue; } // [ 0 0 k. 0 0 ] [dvH] = [dvH] // [ 0 -q eps -k x 0 -i wp ] [ E ] = [ E ] // [ k k x -q mu 0 0 ] [ H ] [ H ] // [ 0 0 0 -q eta i w0 ] [ P ] [ P ] // [ 0 i wp 0 -i w0 eta -q eta ] [ V ] [ V ] // given memset(A, 0, sizeof(complex_t)*4*4); // Forward and backward differences #define FDIFF(VEC,D) ((std::exp(complex_t(0,-(VEC)[D]/res[D]))-1.) * (double)res[D]) #define BDIFF(VEC,D) ((1.-std::exp(complex_t(0,(VEC)[D]/res[D]))) * (double)res[D]) static const complex_t I(0.,1.); A[2+1*4] = -I*FDIFF(kpG,1); A[2+0*4] = I*BDIFF(kpG,0); A[3+1*4] = I*FDIFF(kpG,0); A[3+0*4] = I*BDIFF(kpG,1); A[1+2*4] = -I*BDIFF(kpG,1); A[1+3*4] = I*BDIFF(kpG,0); A[0+2*4] = I*FDIFF(kpG,0); A[0+3*4] = I*FDIFF(kpG,1); b[0] = y[DIVH_OFF+IDX(i,j)]; b[1] = y[EZ_OFF + IDX(i,j)]; b[2] = y[HX_OFF + IDX(i,j)]; b[3] = y[HY_OFF + IDX(i,j)]; RNP::LinearSolve<'N'>(4,1, A,4, b,4); y[DIVH_OFF+IDX(i,j)] = b[0] / ((double)Ngrid); y[EZ_OFF + IDX(i,j)] = b[1] / ((double)Ngrid); y[HX_OFF + IDX(i,j)] = b[2] / ((double)Ngrid); y[HY_OFF + IDX(i,j)] = b[3] / ((double)Ngrid); } } fftw_execute(plan_backward); fftw_destroy_plan(plan_forward); fftw_destroy_plan(plan_backward); for(int i = 0; i < res[0]; ++i){ for(int j = 0; j < res[1]; ++j){ double phase = -kshiftsign*2*M_PI*(last_k[0]*(double)i/res[0] + last_k[1]*(double)j/res[1]); y[DIVH_OFF+IDX(i,j)] *= complex_t(cos(phase), sin(phase)); y[EZ_OFF + IDX(i,j)] *= complex_t(cos(phase), sin(phase)); y[HX_OFF + IDX(i,j)] *= complex_t(cos(phase), sin(phase)); y[HY_OFF + IDX(i,j)] *= complex_t(cos(phase), sin(phase)); } } // v += inv(D) W u for(int i = 0; i < res[0]; ++i){ for(int j = 0; j < res[1]; ++j){ const int tag = impl->ind[2*IDX(i,j)+1]; if(tag < 0){ continue; } const int row0 = impl->ind[2*IDX(i,j)+0]; const Material &mat = material[tag]; const int np = mat.poles.size(); for(int p = 0; p < np; ++p){ const LorentzPole &pole = mat.poles[p]; y[row0 + 2*p + 0] -= (pole.omega_p/pole.omega_0) * y[EZ_OFF + IDX(i,j)]; } } } }
// real backward int check_rb(int n, int mode, int veclen, int sizeOfVect) { int i, j; DFT *p = DFT_init(mode, n, DFT_FLAG_REAL); fftw_plan w[n]; fftw_complex *in[sizeOfVect]; double *out[sizeOfVect]; REAL *sx = SIMDBase_alignedMalloc(sizeOfVect*n*2); // for(j=0;j<veclen;j++) { in[j] = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * (n/2+1)); out[j] = (double *) fftw_malloc(sizeof(double) * n); w[j] = fftw_plan_dft_c2r_1d(n, in[j], out[j], FFTW_ESTIMATE); for(i=0;i<n/2;i++) { if (i == 0) { in[j][0 ] = (random() / (double)RAND_MAX); in[j][n/2] = (random() / (double)RAND_MAX); } else { in[j][i ] = (random() / (double)RAND_MAX) + (random() / (double)RAND_MAX) * _Complex_I; } } for(i=0;i<n/2;i++) { if (i == 0) { sx[(2*0+0) * veclen + j] = creal(in[j][0 ]); sx[(2*0+1) * veclen + j] = creal(in[j][n/2]); } else { sx[(2*i+0) * veclen + j] = creal(in[j][i]); sx[(2*i+1) * veclen + j] = cimag(in[j][i]); } } } // DFT_execute(p, mode, sx, 1); for(j=0;j<veclen;j++) { fftw_execute(w[j]); } // int success = 1; for(j=0;j<veclen;j++) { for(i=0;i<n/2;i++) { if ((fabs(sx[i * veclen + j]*2 - out[j][i]) > THRES)) { success = 0; } } } // for(j=0;j<veclen;j++) { fftw_destroy_plan(w[j]); fftw_free(in[j]); fftw_free(out[j]); } SIMDBase_alignedFree(sx); DFT_dispose(p, mode); // return success; }
// This is an operator that applies (A - shift*B) on a vector using the FFT void SPB::BandSolver_Ez::OpForw(size_t n, const complex_t &shift, const complex_t *x, complex_t *y) const{ const int Ngrid = res[0]*res[1]; complex_t *t = (complex_t*)fftw_malloc(sizeof(complex_t)*n); // Data layout: Hx, Hy, Ez, divH fftw_plan plan_forward = fftw_plan_many_dft( 2/*rank*/, res, 4 /*howmany*/, (fftw_complex*)t, NULL/*inembed*/, 1/*istride*/, Ngrid/*idist*/, (fftw_complex*)t, NULL/*onembed*/, 1/*ostride*/, Ngrid/*odist*/, FFTW_BACKWARD, FFTW_ESTIMATE); fftw_plan plan_backward = fftw_plan_many_dft( 2/*rank*/, res, 4 /*howmany*/, (fftw_complex*)t, NULL/*inembed*/, 1/*istride*/, Ngrid/*idist*/, (fftw_complex*)t, NULL/*onembed*/, 1/*ostride*/, Ngrid/*odist*/, FFTW_FORWARD, FFTW_ESTIMATE); const double kshiftsign = 1.0; RNP::TBLAS::Copy(n, x,1, t,1); for(int i = 0; i < res[0]; ++i){ for(int j = 0; j < res[1]; ++j){ double phase = kshiftsign*2*M_PI*(last_k[0]*((double)i)/res[0] + last_k[1]*((double)j)/res[1]); t[HX_OFF + IDX(i,j)] *= complex_t(cos(phase), sin(phase)); t[HY_OFF + IDX(i,j)] *= complex_t(cos(phase), sin(phase)); t[EZ_OFF + IDX(i,j)] *= complex_t(cos(phase), sin(phase)); t[DIVH_OFF+IDX(i,j)] *= complex_t(cos(phase), sin(phase)); } } fftw_execute(plan_forward); for(int i = 0; i < res[0]; ++i){ const int fi = (i > res[0]/2 ? i-res[0] : i); for(int j = 0; j < res[1]; ++j){ const int fj = (j > res[1]/2 ? j-res[1] : j); double kpG[2] = { (L.Lk[0]*(last_k[0]+fi) + L.Lk[2]*(last_k[1]+fj)), (L.Lk[1]*(last_k[0]+fi) + L.Lk[3]*(last_k[1]+fj)) }; kpG[0] *= 2*M_PI; kpG[1] *= 2*M_PI; const double klen2 = kpG[0]*kpG[0] + kpG[1]*kpG[1]; const double klen = sqrt(klen2); if(klen < std::numeric_limits<double>::epsilon() * L.CharacteristicKLength()){ continue; } // [ -q mu k x k 0 0 ] [ H ] [ H ] // [ -k x -q eps 0 0 -i wp ] [ E ] = [ E ] // [ k. 0 0 0 0 ] [dvH] = [dvH] // [ 0 0 0 -q eta i w0 ] [ P ] [ P ] // [ 0 i wp 0 -i w0 eta -q eta ] [ V ] [ V ] // given /* // Forward and backward differences #define FDIFF(VEC,D) ((std::exp(complex_t(0,-(VEC)[D]/res[D]))-1.) * (double)res[D]) #define BDIFF(VEC,D) ((1.-std::exp(complex_t(0,(VEC)[D]/res[D]))) * (double)res[D]) static const complex_t I(0.,1.); y[HX_OFF + IDX(i,j)] = -I*FDIFF(kpG,1)*t[EZ_OFF + IDX(i,j)] + I*BDIFF(kpG,0)*t[DIVH_OFF+IDX(i,j)]; y[HX_OFF + IDX(i,j)] /= ((double)Ngrid); y[HY_OFF + IDX(i,j)] = I*FDIFF(kpG,0)*t[EZ_OFF + IDX(i,j)] + I*BDIFF(kpG,1)*t[DIVH_OFF+IDX(i,j)]; y[HY_OFF + IDX(i,j)] /= ((double)Ngrid); y[EZ_OFF + IDX(i,j)] = -I*BDIFF(kpG,1)*t[HX_OFF + IDX(i,j)] + I*BDIFF(kpG,0)*t[HY_OFF + IDX(i,j)]; y[EZ_OFF + IDX(i,j)] /= ((double)Ngrid); y[DIVH_OFF+IDX(i,j)] = I*FDIFF(kpG,0)*t[HX_OFF + IDX(i,j)] + I*FDIFF(kpG,1)*t[HY_OFF + IDX(i,j)]; y[DIVH_OFF+IDX(i,j)] /= ((double)Ngrid); y[HX_OFF + IDX(i,j)] -= shift*t[HX_OFF + IDX(i,j)]/((double)Ngrid); y[HY_OFF + IDX(i,j)] -= shift*t[HY_OFF + IDX(i,j)]/((double)Ngrid); y[EZ_OFF + IDX(i,j)] -= shift*t[EZ_OFF + IDX(i,j)]/((double)Ngrid); */ static const complex_t I(0.,1.); const size_t n_res = 0; const size_t nh = 4+2*n_res; complex_t *A = new complex_t[nh*nh+2*nh]; complex_t *b = A+nh*nh; complex_t *c = b+nh; memset(A, 0, sizeof(complex_t)*nh*nh); A[0+2*nh] = -I*FDIFF(kpG,1); A[0+3*nh] = I*BDIFF(kpG,0); A[1+2*nh] = I*FDIFF(kpG,0); A[1+3*nh] = I*BDIFF(kpG,1); A[2+0*nh] = -I*BDIFF(kpG,1); A[2+1*nh] = I*BDIFF(kpG,0); A[3+0*nh] = I*FDIFF(kpG,0); A[3+1*nh] = I*FDIFF(kpG,1); A[0+0*nh] = -shift; A[1+1*nh] = -shift; A[2+2*nh] = -shift; b[0] = t[HX_OFF + IDX(i,j)]; b[1] = t[HY_OFF + IDX(i,j)]; b[2] = t[EZ_OFF + IDX(i,j)]; b[3] = t[DIVH_OFF+IDX(i,j)]; RNP::TBLAS::MultMV<'N'>(nh,nh, 1.,A,nh, b,1, 0.,c,1); y[HX_OFF + IDX(i,j)] = c[0] / ((double)Ngrid); y[HY_OFF + IDX(i,j)] = c[1] / ((double)Ngrid); y[EZ_OFF + IDX(i,j)] = c[2] / ((double)Ngrid); y[DIVH_OFF+IDX(i,j)] = c[3] / ((double)Ngrid); delete [] A; } } RNP::TBLAS::Copy(n, y,1, t,1); fftw_execute(plan_backward); fftw_destroy_plan(plan_forward); fftw_destroy_plan(plan_backward); RNP::TBLAS::Copy(n, t,1, y,1); for(int i = 0; i < res[0]; ++i){ for(int j = 0; j < res[1]; ++j){ double phase = -kshiftsign*2*M_PI*(last_k[0]*((double)i)/res[0] + last_k[1]*((double)j)/res[1]); y[HX_OFF + IDX(i,j)] *= complex_t(cos(phase), sin(phase)); y[HY_OFF + IDX(i,j)] *= complex_t(cos(phase), sin(phase)); y[EZ_OFF + IDX(i,j)] *= complex_t(cos(phase), sin(phase)); y[DIVH_OFF+IDX(i,j)] *= complex_t(cos(phase), sin(phase)); /* y[HX_OFF + IDX(i,j)] -= shift*x[HX_OFF + IDX(i,j)]; y[HY_OFF + IDX(i,j)] -= shift*x[HY_OFF + IDX(i,j)]; y[EZ_OFF + IDX(i,j)] -= shift*x[EZ_OFF + IDX(i,j)];*/ } } fftw_free(t); }
// complex backward int check_cb(int n, int mode, int veclen, int sizeOfVect) { int i, j; DFT *p = DFT_init(mode, n, 0); fftw_plan w[n]; fftw_complex *in[sizeOfVect], *out[sizeOfVect]; REAL *sx = SIMDBase_alignedMalloc(sizeOfVect*n*2); // for(j=0;j<veclen;j++) { in[j] = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * n); out[j] = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * n); w[j] = fftw_plan_dft_1d(n, in[j], out[j], FFTW_BACKWARD, FFTW_ESTIMATE); for(i=0;i<n;i++) { double re = random() / (double)RAND_MAX; double im = random() / (double)RAND_MAX; sx[(i*2+0)*veclen+j] = re; sx[(i*2+1)*veclen+j] = im; in[j][i] = re + im * _Complex_I; } } // DFT_execute(p, mode, sx, 1); for(j=0;j<veclen;j++) { fftw_execute(w[j]); } // int success = 1; for(j=0;j<veclen;j++) { for(i=0;i<n;i++) { if (fabs(sx[(i*2+0)*veclen+j] - creal(out[j][i])) > THRES) success = 0; if (fabs(sx[(i*2+1)*veclen+j] - cimag(out[j][i])) > THRES) success = 0; } } // for(j=0;j<veclen;j++) { fftw_destroy_plan(w[j]); fftw_free(in[j]); fftw_free(out[j]); } SIMDBase_alignedFree(sx); DFT_dispose(p, mode); // return success; }
void FFT_FFTW(fftw_complex *inputSignal, fftw_complex * outputSignal, int n) { fftw_plan p1 = fftw_plan_dft_1d(n, inputSignal, outputSignal, FFTW_FORWARD, FFTW_ESTIMATE); fftw_execute(p1); fftw_destroy_plan(p1); }
bool do_fft_1d_r2c(int M, int N, float* out, float* in) { /* if (num_threads>1) { fftw_init_threads(); fftw_plan_with_nthreads(num_threads); } */ int MN = M * N; fftw_complex* in2 = (fftw_complex*)fftw_malloc(sizeof(fftw_complex) * MN); fftw_complex* out2 = (fftw_complex*)fftw_malloc(sizeof(fftw_complex) * MN); for (int ii = 0; ii < MN; ii++) { //in2[ii][0]=in[ii*2]; //in2[ii][1]=in[ii*2+1]; in2[ii][0] = in[ii]; in2[ii][1] = 0; } /* * From FFTW docs: * howmany is the number of transforms to compute. * The resulting plan computes howmany transforms, * where the input of the k-th transform is at * location in+k*idist (in C pointer arithmetic), * and its output is at location out+k*odist. * Plans obtained in this way can often be faster * than calling FFTW multiple times for the individual * transforms. The basic fftw_plan_dft interface corresponds * to howmany=1 (in which case the dist parameters are ignored). * * Each of the howmany transforms has rank rank * and size n, as in the basic interface. * In addition, the advanced interface allows the * input and output arrays of each transform to be * row-major subarrays of larger rank-rank arrays, * described by inembed and onembed parameters, * respectively. {i,o}nembed must be arrays of length * rank, and n should be elementwise less than or equal * to {i,o}nembed. Passing NULL for an nembed parameter * is equivalent to passing n (i.e. same physical and * logical dimensions, as in the basic interface.) * * The stride parameters indicate that the j-th element * of the input or output arrays is located at j*istride * or j*ostride, respectively. (For a multi-dimensional array, * j is the ordinary row-major index.) When combined with * the k-th transform in a howmany loop, from above, this * means that the (j,k)-th element is at j*stride+k*dist. * (The basic fftw_plan_dft interface corresponds to a stride * of 1.) */ fftw_plan p; int rank = 1; int n[] = { N }; int howmany = M; int* inembed = n; int istride = M; int idist = 1; int* onembed = n; int ostride = M; int odist = 1; int sign = FFTW_FORWARD; unsigned flags = FFTW_ESTIMATE; #pragma omp critical p = fftw_plan_many_dft(rank, n, howmany, in2, inembed, istride, idist, out2, onembed, ostride, odist, sign, flags); //p=fftw_plan_dft_1d(N,in2,out2,FFTW_FORWARD,FFTW_ESTIMATE); fftw_execute(p); for (int ii = 0; ii < MN; ii++) { out[ii * 2] = out2[ii][0]; out[ii * 2 + 1] = out2[ii][1]; } fftw_free(in2); fftw_free(out2); /* if (num_threads>1) { fftw_cleanup_threads(); } */ #pragma omp critical fftw_destroy_plan(p); return true; }
// Calling convention: // comp_filterbank(f,g,a); void mexFunction( int UNUSED(nlhs), mxArray *plhs[], int UNUSED(nrhs), const mxArray *prhs[] ) { static int atExitRegistered = 0; if(!atExitRegistered) { atExitRegistered = 1; mexAtExit(filterbankAtExit); } const mxArray* mxf = prhs[0]; const mxArray* mxg = prhs[1]; const mxArray* mxa = prhs[2]; // input data length const mwSize L = mxGetM(mxf); const mwSize W = mxGetN(mxf); // filter number const mwSize M = mxGetNumberOfElements(mxg); // a col count mwSize acols = mxGetN(mxa); // pointer to a double *a = (double*) mxGetData(mxa); if (acols > 1) { int isOnes = 1; for (mwIndex m = 0; m < M; m++) { isOnes = isOnes && a[M + m] == 1; } if (isOnes) { acols = 1; } } // Cell output plhs[0] = mxCreateCellMatrix(M, 1); // Stuff for sorting the filters mwSize tdCount = 0; mwSize fftCount = 0; mwSize fftblCount = 0; mwIndex tdArgsIdx[M]; mwIndex fftArgsIdx[M]; mwIndex fftblArgsIdx[M]; // WALK the filters to determine what has to be done for (mwIndex m = 0; m < M; m++) { mxArray * gEl = mxGetCell(mxg, m); if (mxGetField(gEl, 0, "h") != NULL) { tdArgsIdx[tdCount++] = m; continue; } if (mxGetField(gEl, 0, "H") != NULL) { if (acols == 1 && L == mxGetNumberOfElements(mxGetField(gEl, 0, "H"))) { fftArgsIdx[fftCount++] = m; continue; } else { fftblArgsIdx[fftblCount++] = m; continue; } } } if (tdCount > 0) { /* Here, we have to reformat the inputs and pick up results to comply with: c=comp_filterbank_td(f,g,a,offset,ext); BEWARE OF THE AUTOMATIC DEALLOCATION!! by the Matlab engine. Arrays can be very easily freed twice causing segfaults. This happends particulary when using mxCreateCell* which stores pointers to other mxArray structs. Setting all such pointers to NULL after they are used seems to solve it. */ mxArray* plhs_td[1]; mxArray* prhs_td[5]; prhs_td[0] = (mxArray*) mxf; prhs_td[1] = mxCreateCellMatrix(tdCount, 1); prhs_td[2] = mxCreateDoubleMatrix(tdCount, 1, mxREAL); double* aPtr = mxGetData(prhs_td[2]); prhs_td[3] = mxCreateDoubleMatrix(tdCount, 1, mxREAL); double* offsetPtr = mxGetData(prhs_td[3]); prhs_td[4] = mxCreateString("per"); for (mwIndex m = 0; m < tdCount; m++) { mxArray * gEl = mxGetCell(mxg, tdArgsIdx[m]); mxSetCell(prhs_td[1], m, mxGetField(gEl, 0, "h")); // This has overhead //mxSetCell((mxArray*)prhs_td[1],m,mxDuplicateArray(mxGetField(gEl,0,"h"))); aPtr[m] = a[tdArgsIdx[m]]; offsetPtr[m] = mxGetScalar(mxGetField(gEl, 0, "offset")); } // Finally call it! // comp_filterbank_td(1,plhs_td,5, prhs_td); // This has overhead: mexCallMATLAB(1, plhs_td, 5, prhs_td, "comp_filterbank_td"); // Copy pointers to a proper index in the output + unset all duplicate cell elements for (mwIndex m = 0; m < tdCount; m++) { mxSetCell(plhs[0], tdArgsIdx[m], mxGetCell(plhs_td[0], m)); mxSetCell(plhs_td[0], m, NULL); mxSetCell(prhs_td[1], m, NULL); } mxDestroyArray(plhs_td[0]); mxDestroyArray(prhs_td[1]); mxDestroyArray(prhs_td[2]); mxDestroyArray(prhs_td[3]); mxDestroyArray(prhs_td[4]); } if (fftCount > 0 || fftblCount > 0) { // Need to do FFT of mxf mwIndex ndim = 2; const mwSize dims[] = {L, W}; if (mxF == NULL || mxGetM(mxF) != L || mxGetN(mxF) != W || mxGetClassID(mxF) != mxGetClassID(mxf)) { if (mxF != NULL) { mxDestroyArray(mxF); mxF = NULL; // printf("Should be called just once\n"); } if (mxIsDouble(mxf)) { mxF = mxCreateNumericArray(ndim, dims, mxDOUBLE_CLASS, mxCOMPLEX); fftw_iodim fftw_dims[1]; fftw_iodim howmanydims[1]; fftw_dims[0].n = L; fftw_dims[0].is = 1; fftw_dims[0].os = 1; howmanydims[0].n = W; howmanydims[0].is = L; howmanydims[0].os = L; if (p_double == NULL) p_double = (fftw_plan*) malloc(sizeof(fftw_plan)); else fftw_destroy_plan(*p_double); // FFTW_MEASURE sometimes hangs here *p_double = fftw_plan_guru_split_dft( 1, fftw_dims, 1, howmanydims, mxGetData(mxF), mxGetImagData(mxF), mxGetData(mxF), mxGetImagData(mxF), FFTW_ESTIMATE); } else if (mxIsSingle(mxf)) { mxF = mxCreateNumericArray(ndim, dims, mxSINGLE_CLASS, mxCOMPLEX); // mexPrintf("M= %i, N= %i\n",mxGetM(mxF),mxGetN(mxF)); fftwf_iodim fftw_dims[1]; fftwf_iodim howmanydims[1]; fftw_dims[0].n = L; fftw_dims[0].is = 1; fftw_dims[0].os = 1; howmanydims[0].n = W; howmanydims[0].is = L; howmanydims[0].os = L; if (p_float == NULL) p_float = (fftwf_plan*) malloc(sizeof(fftwf_plan)); else fftwf_destroy_plan(*p_float); *p_float = fftwf_plan_guru_split_dft( 1, fftw_dims, 1, howmanydims, mxGetData(mxF), mxGetImagData(mxF), mxGetData(mxF), mxGetImagData(mxF), FFTW_ESTIMATE); } } if (mxIsDouble(mxf)) { memcpy(mxGetPr(mxF), mxGetPr(mxf), L * W * sizeof(double)); memset(mxGetPi(mxF), 0, L * W * sizeof(double)); if (mxIsComplex(mxf)) memcpy(mxGetPi(mxF), mxGetPi(mxf), L * W * sizeof(double)); fftw_execute(*p_double); } else if (mxIsSingle(mxf)) { memcpy(mxGetPr(mxF), mxGetPr(mxf), L * W * sizeof(float)); memset(mxGetPi(mxF), 0, L * W * sizeof(float)); if (mxIsComplex(mxf)) memcpy(mxGetPi(mxF), mxGetPi(mxf), L * W * sizeof(float)); fftwf_execute(*p_float); } } if (fftCount > 0) { mxArray* plhs_fft[1]; mxArray* prhs_fft[3]; prhs_fft[0] = mxF; prhs_fft[1] = mxCreateCellMatrix(fftCount, 1); prhs_fft[2] = mxCreateDoubleMatrix(fftCount, 1, mxREAL); double* aPtr = mxGetData(prhs_fft[2]); for (mwIndex m = 0; m < fftCount; m++) { mxArray * gEl = mxGetCell(mxg, fftArgsIdx[m]); mxSetCell(prhs_fft[1], m, mxGetField(gEl, 0, "H")); // This has overhead //mxSetCell((mxArray*)prhs_td[1],m,mxDuplicateArray(mxGetField(gEl,0,"h"))); aPtr[m] = a[fftArgsIdx[m]]; } //comp_filterbank_fft(1,plhs_fft,3, prhs_fft); mexCallMATLAB(1, plhs_fft, 3, prhs_fft, "comp_filterbank_fft"); for (mwIndex m = 0; m < fftCount; m++) { mxSetCell(plhs[0], fftArgsIdx[m], mxGetCell(plhs_fft[0], m)); mxSetCell(plhs_fft[0], m, NULL); mxSetCell(prhs_fft[1], m, NULL); } mxDestroyArray(plhs_fft[0]); mxDestroyArray(prhs_fft[1]); mxDestroyArray(prhs_fft[2]); } if (fftblCount > 0) { mxArray* plhs_fftbl[1]; mxArray* prhs_fftbl[5]; prhs_fftbl[0] = mxF; prhs_fftbl[1] = mxCreateCellMatrix(fftblCount, 1); prhs_fftbl[2] = mxCreateDoubleMatrix(fftblCount, 1, mxREAL); prhs_fftbl[3] = mxCreateDoubleMatrix(fftblCount, 2, mxREAL); prhs_fftbl[4] = mxCreateDoubleMatrix(fftblCount, 1, mxREAL); double* foffPtr = mxGetData(prhs_fftbl[2]); double* aPtr = mxGetData(prhs_fftbl[3]); double* realonlyPtr = mxGetData(prhs_fftbl[4]); // Set all realonly flags to zero memset(realonlyPtr, 0, fftblCount * sizeof * realonlyPtr); for (mwIndex m = 0; m < fftblCount; m++) { mxArray * gEl = mxGetCell(mxg, fftblArgsIdx[m]); mxSetCell(prhs_fftbl[1], m, mxGetField(gEl, 0, "H")); foffPtr[m] = mxGetScalar(mxGetField(gEl, 0, "foff")); aPtr[m] = a[fftblArgsIdx[m]]; if (acols > 1) aPtr[m + fftblCount] = a[fftblArgsIdx[m] + M]; else aPtr[m + fftblCount] = 1; // Only if realonly is specified mxArray* mxrealonly; if ((mxrealonly = mxGetField(gEl, 0, "realonly"))) realonlyPtr[m] = mxGetScalar(mxrealonly); } // comp_filterbank_fftbl(1,plhs_fftbl,5, prhs_fftbl); mexCallMATLAB(1, plhs_fftbl, 5, prhs_fftbl, "comp_filterbank_fftbl"); for (mwIndex m = 0; m < fftblCount; m++) { mxSetCell(plhs[0], fftblArgsIdx[m], mxGetCell(plhs_fftbl[0], m)); mxSetCell(plhs_fftbl[0], m, NULL); mxSetCell(prhs_fftbl[1], m, NULL); } mxDestroyArray(plhs_fftbl[0]); mxDestroyArray(prhs_fftbl[1]); mxDestroyArray(prhs_fftbl[2]); mxDestroyArray(prhs_fftbl[3]); mxDestroyArray(prhs_fftbl[4]); } if (mxF != NULL) mexMakeArrayPersistent(mxF); if (L * W > MAXARRAYLEN && mxF != NULL) { //printf("Damn. Should not get here\n"); mxDestroyArray(mxF); mxF = NULL; } }
void FFTController::destroy_fftw_stuff() { fftw_destroy_plan(plan); fftw_free(in_pointer); fftw_free(out_pointer); in_pointer = out_pointer = NULL; }
Analyzer::~Analyzer() { fftw_destroy_plan(m_plan); fftw_destroy_plan(m_ifftPlan); fftw_cleanup(); }
int main(int argc, char *argv[]) { int readcount; int i,j; int skip=0; int16_t sample[2*FFT_LEN]; double fftavg [FFT_LEN]; int fftavg_counter=FFT_AVG; fftw_complex *in, *out; fftw_plan p; in = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * FFT_LEN); out = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * FFT_LEN); p = fftw_plan_dft_1d(FFT_LEN, in, out, FFTW_FORWARD, FFTW_ESTIMATE); readcount = fread(sample, sizeof(int16_t), 2*FFT_LEN, stdin); while (readcount > 0) { if(skip == 0) { if(readcount< FFT_LEN/2) { fprintf(stderr, "too little data read for fft\n"); } for( i=0,j=0; i<readcount; i+=2,j++ ) { in[j]= (float)sample[i] + I*(float)sample[i+1]; in[j] *= pow(-1,j); } fftw_execute(p); for ( i=0; i<FFT_LEN ; i++) { //avg_calc += cabs(out[i]); fftavg [i]+= cabs(out[i]); } if(fftavg_counter!=0) { fftavg_counter--; } else { fftavg_counter=FFT_AVG; //fprintf(stderr, "avg: %f \n", avg); for ( i=0; i<FFT_LEN ; i++) { if(i>FFT_LEN/2-BANDWIDTH_BINS/2 && i<FFT_LEN/2+BANDWIDTH_BINS/2) { fprintf(stderr, "%f,", (fftavg[i]/FFT_AVG)/(1710000*16)); } fftavg[i]=0; } fprintf(stderr,"\n"); } skip = 0; } else { skip--; } readcount = fread(sample, sizeof(int16_t), 2*FFT_LEN, stdin); } fftw_destroy_plan(p); fftw_free(in); fftw_free(out); return 0; }