int main(int argc, char **argv) { int status = -1; const int N = 1024; double *c_data = (double *)malloc(N*sizeof(double)*N); double *result = (double *)malloc(N*2*sizeof(double)*N); double *Iresult = (double *)malloc(N*sizeof(double)*N); complex_t *dst1= (complex_t *)malloc(N*sizeof(complex_t)*N); cl_mem g_data = NULL; cl_mem g_Iresult = NULL; cl_mem g_result = NULL; cl_mem g_temp = NULL; for(int i=0; i<N; i++) { for(int j=0; j<N; j++) c_data[i*N + j] = i*N + j+1024; } status = clInitialKernelAPI(); status = clMallocBuf((void**)(&g_data),N*N*sizeof(double)); status = clMallocBuf((void**)(&g_result),N*N*2*sizeof(double)); status = clMallocBuf((void**)(&g_Iresult),N*N*sizeof(double)); status = clMallocBuf((void**)(&g_temp),N*N*2*sizeof(double)); status = clMemcpyBuf((void**)(&c_data),(void**)(&g_data),N*N*sizeof(double), clMemcpyHostToDevice); //GPU 1D FFT time_stamp(0, NULL); for(int i=0; i<1000; i++) status = FFT_1D_OCL(g_data, g_result, log2(N),1, NULL, 0, NULL, NULL); clFinish(GetOclCommandQueue0()); status = clMemcpyBuf((void**)(&g_result),(void**)(&result),N*2*sizeof(double), clMemcpyDeviceToHost); time_stamp(1,"GPU time: "); printf(" \n GPU 1D result: \n "); for(int i=0; i<MIN(8, N); i++) { printf("%f %f \n ", result[i*2], result[i*2+1]); } //GPU 1D IFFT time_stamp(0, NULL); for(int i=0; i<1000; i++) status = FFT_1D_OCL(g_result, g_Iresult, log2(N),-1, NULL, 0, NULL, NULL); clFinish(GetOclCommandQueue0()); status = clMemcpyBuf((void**)(&g_Iresult),(void**)(&Iresult),N*sizeof(double), clMemcpyDeviceToHost); time_stamp(1,"GPU time: "); printf(" \n GPU IFFT 1D Iresult: \n "); for(int i=0; i<MIN(8, N); i++) { printf("%f %f \n ", Iresult[i]); } //GPU 2D FFT time_stamp(0, NULL); for(int i=0; i<20; i++) status = FFT_2D_OCL(g_data, g_result,g_temp, log2(N),log2(N),1 ,NULL, 0, NULL, NULL); clFinish(GetOclCommandQueue0()); time_stamp(1,"\nGPU 2D time: "); status = clMemcpyBuf((void**)(&g_result),(void**)(&result),N*N*2*sizeof(double), clMemcpyDeviceToHost); printf(" GPU 2D result: \n "); for(int i=0; i<MIN(8, N); i++) { printf("%f, %f, %f, %f\n ", result[i*2], result[i*2+1], result[i*2+N*2], result[i*2+1+N*2], result[i*2+8*N*2], result[i*2+1+8*N*2]); } //GPU 2D IFFT time_stamp(0, NULL); for(int i=0; i<20; i++) status = FFT_2D_OCL(g_result,g_Iresult, g_temp, log2(N),log2(N),-1 ,NULL, 0, NULL, NULL); clFinish(GetOclCommandQueue0()); time_stamp(1,"\nGPU 2D IFFT time: "); status = clMemcpyBuf((void**)(&g_Iresult),(void**)(&Iresult),N*N*sizeof(double), clMemcpyDeviceToHost); printf(" GPU 2D IFFT result: \n "); for(int i=0; i<MIN(8, N); i++) { printf("%f, %f\n ", Iresult[i], Iresult[i+N]); } clReleaseKernelAPI(); // CPU 1D FFT time_stamp(0, NULL); for(int i=0; i<1000; i++) status = FFT_R2C_1D(c_data, dst1,log2(N)); time_stamp(1,"\nCPU time: "); printf(" \n CPU result: \n "); for(int i=0; i<MIN(8, N); i++) printf("%f, %f\n", dst1[i].real, dst1[i].imaginary); // CPU 1D IFFT time_stamp(0, NULL); for(int i=0; i<1000; i++) status = FFT_C2R_1D(dst1,result, log2(N)); time_stamp(1,"\nCPU time: "); printf(" \n CPU IFFT 1D result: \n "); for(int i=0; i<MIN(8, N); i++) printf("%f, %f\n", result[i]); // CPU 2D time_stamp(0, NULL); status = FFT_2D(c_data, dst1,log2(N), log2(N)); time_stamp(1,"\nCPU 2D time: "); printf(" \n CPU 2D result: \n "); for(int i=0; i<MIN(8, N); i++) printf("%f, %f, %f, %f\n", dst1[i].real, dst1[i].imaginary, dst1[i+N].real, dst1[i+N].imaginary); //cpu 2d IFFT time_stamp(0, NULL); status = IFFT_2D(dst1,Iresult, log2(N), log2(N)); time_stamp(1,"\nCPU 2D time: "); printf(" \n CPU 2D IFFT result: \n "); for(int i=0; i<MIN(8, N); i++) printf("%f, %f\n ", Iresult[i], Iresult[i+N]); return 0; }
// See if two images match in the Fourier domain. // // Vectors av[] and bv[] should be normalized and same size. // // wvlen (in pixels) sets a minimum feature size (roughly). // double FourierMatch( const vector<Point> &pts, const vector<double> &av, const vector<double> &bv, int wvlen, bool write_images, const char *msg, FILE* flog ) { /* ----------------------- */ /* Report difference power */ /* ----------------------- */ if( !MeanSqrDiff( av, bv, msg, flog ) ) return 0.0; /* ----------- */ /* Make images */ /* ----------- */ vector<double> i1, i2, diff; int Nx, Ny; { const vector<Point> *pbest; vector<Point> altpts; pbest = SmallestFootprint( altpts, pts, msg, flog ); MakeMetricImagesFFT( i1, i2, diff, Nx, Ny, *pbest, av, bv, write_images, msg, flog ); } /* ----------- */ /* Image power */ /* ----------- */ // Compare lowest few coefficients (longer than wvlen). // // Remember FFT x-elements are arranged like this: // // elem freq wvlen // ---- ---- ----- // 0 0 DC const // 1 1/Nx Nx // 2 2/Nx Nx/2 // 3 3/Nx Nx/3 // ... ... ... // Nx/2 (Nx/2)/Nx 2 // // Since wvlen = Nx/elem; then elem = Nx/wvlen. vector<CD> i1fft, i2fft, dfft; double total = 0.0, dot = 0.0; int xlim = Nx/wvlen, ylim = Ny/wvlen; FFT_2D( i1fft, i1, Nx, Ny, false ); FFT_2D( i2fft, i2, Nx, Ny, false ); FFT_2D( dfft, diff, Nx, Ny, false ); for( int x = -xlim; x <= xlim; ++x ) { for( int y = -ylim; y <= ylim; ++y ) { CD v1 = FFT_r2c_lookup( i1fft, Nx, Ny, x, y ), v2 = FFT_r2c_lookup( i2fft, Nx, Ny, x, y ); total += sqrt( norm( v1 ) * norm( v2 ) ); dot += v1.real()*v2.real() + v1.imag()*v2.imag(); } } dot /= total; fprintf( flog, "FFT: norm-dot %f, energy %f\n", dot, total ); /* --------------------------------*/ /* Difference power: make spectrum */ /* --------------------------------*/ // Form spectrum with powers ordered from small to large wavelength. // The objective will be to report what fraction of total power is // covered by small-sized features in the diff image. The idea is // that in a seriously bad mismatch, diff will get large features // and the spectrum would be shifted out to long wavelength. int M = Nx/2 + 1; int maxf = int(sqrt( Nx*Nx + Ny*Ny )/2.0) + 1; vector<double> pspectrum( maxf, 0.0 ); for( int x = 1; x < M; ++x ) { double wavex = double(Nx)/x; for( int y = 1; y < Ny; ++y ) { double wavey = double(Ny)/(y > Ny/2 ? Ny-y : y); double wave = sqrt( wavex*wavex + wavey*wavey ); int iwave = int(wave); if( iwave > maxf - 1 ) iwave = maxf - 1; pspectrum[iwave] += norm( dfft[x + M*y] ); } } /* --------------------------------------------------- */ /* Difference power: where cum power exceeds threshold */ /* --------------------------------------------------- */ const double thresh = 0.50; // fraction of total power double tot = 0.0, cum = 0.0; int i; // tot = total power for( i = 0; i < maxf; ++i ) tot += pspectrum[i]; // repeat summing, but only up to tot*thresh for( i = 0; cum < tot * thresh && i < maxf; ++i ) { cum += pspectrum[i]; // report progress periodically if( i && !(i % 10) ) { fprintf( flog, "FFT: %s: cum frac to %d = %f\n", msg, i, cum/tot ); } } fprintf( flog, "FFT: %s: Cum power exceeds %2d%% of %f" " at index %d/%d (frac %f).\n", msg, int(thresh*100.0), tot, i, maxf, cum/tot ); return dot; }