FFTHelper::~FFTHelper() { vDSP_destroy_fftsetup(mSpectrumAnalysis); free (mDspSplitComplex.realp); free (mDspSplitComplex.imagp); }
DspRfft::~DspRfft() { #if __APPLE__ vDSP_destroy_fftsetup(fftSetup); #endif // __APPLE__ }
AccelerateFFT<float>::~AccelerateFFT() { free (complexSplit.realp); free (complexSplit.imagp); vDSP_destroy_fftsetup (fftSetupFloat); }
void computeReferenceF(clFFT_SplitComplex *out, clFFT_Dim3 n, unsigned int batchSize, clFFT_Dimension dim, clFFT_Direction dir) { FFTSetup plan_vdsp; DSPSplitComplex out_vdsp; FFTDirection dir_vdsp = dir == clFFT_Forward ? FFT_FORWARD : FFT_INVERSE; unsigned int i, j, k; unsigned int stride; unsigned int log2Nx = (unsigned int) log2(n.x); unsigned int log2Ny = (unsigned int) log2(n.y); unsigned int log2Nz = (unsigned int) log2(n.z); unsigned int log2N; log2N = log2Nx; log2N = log2N > log2Ny ? log2N : log2Ny; log2N = log2N > log2Nz ? log2N : log2Nz; plan_vdsp = vDSP_create_fftsetup(log2N, 2); switch(dim) { case clFFT_1D: for(i = 0; i < batchSize; i++) { stride = i * n.x; out_vdsp.realp = out->real + stride; out_vdsp.imagp = out->imag + stride; vDSP_fft_zip(plan_vdsp, &out_vdsp, 1, log2Nx, dir_vdsp); } break; case clFFT_2D: for(i = 0; i < batchSize; i++) { for(j = 0; j < n.y; j++) { stride = j * n.x + i * n.x * n.y; out_vdsp.realp = out->real + stride; out_vdsp.imagp = out->imag + stride; vDSP_fft_zip(plan_vdsp, &out_vdsp, 1, log2Nx, dir_vdsp); } } for(i = 0; i < batchSize; i++) { for(j = 0; j < n.x; j++) { stride = j + i * n.x * n.y; out_vdsp.realp = out->real + stride; out_vdsp.imagp = out->imag + stride; vDSP_fft_zip(plan_vdsp, &out_vdsp, n.x, log2Ny, dir_vdsp); } } break; case clFFT_3D: for(i = 0; i < batchSize; i++) { for(j = 0; j < n.z; j++) { for(k = 0; k < n.y; k++) { stride = k * n.x + j * n.x * n.y + i * n.x * n.y * n.z; out_vdsp.realp = out->real + stride; out_vdsp.imagp = out->imag + stride; vDSP_fft_zip(plan_vdsp, &out_vdsp, 1, log2Nx, dir_vdsp); } } } for(i = 0; i < batchSize; i++) { for(j = 0; j < n.z; j++) { for(k = 0; k < n.x; k++) { stride = k + j * n.x * n.y + i * n.x * n.y * n.z; out_vdsp.realp = out->real + stride; out_vdsp.imagp = out->imag + stride; vDSP_fft_zip(plan_vdsp, &out_vdsp, n.x, log2Ny, dir_vdsp); } } } for(i = 0; i < batchSize; i++) { for(j = 0; j < n.y; j++) { for(k = 0; k < n.x; k++) { stride = k + j * n.x + i * n.x * n.y * n.z; out_vdsp.realp = out->real + stride; out_vdsp.imagp = out->imag + stride; vDSP_fft_zip(plan_vdsp, &out_vdsp, n.x*n.y, log2Nz, dir_vdsp); } } } break; } vDSP_destroy_fftsetup(plan_vdsp); }
void benchmark_ffts(int N, int cplx) { int Nfloat = (cplx ? N*2 : N); int Nbytes = Nfloat * sizeof(float); float *X = pffft_aligned_malloc(Nbytes), *Y = pffft_aligned_malloc(Nbytes), *Z = pffft_aligned_malloc(Nbytes); double t0, t1, flops; int k; int max_iter = 5120000/N*4; #ifdef __arm__ max_iter /= 4; #endif int iter; for (k = 0; k < Nfloat; ++k) { X[k] = 0; //sqrtf(k+1); } // FFTPack benchmark { float *wrk = malloc(2*Nbytes + 15*sizeof(float)); int max_iter_ = max_iter/pffft_simd_size(); if (max_iter_ == 0) max_iter_ = 1; if (cplx) cffti(N, wrk); else rffti(N, wrk); t0 = uclock_sec(); for (iter = 0; iter < max_iter_; ++iter) { if (cplx) { cfftf(N, X, wrk); cfftb(N, X, wrk); } else { rfftf(N, X, wrk); rfftb(N, X, wrk); } } t1 = uclock_sec(); free(wrk); flops = (max_iter_*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); // see http://www.fftw.org/speed/method.html show_output("FFTPack", N, cplx, flops, t0, t1, max_iter_); } #ifdef HAVE_VECLIB int log2N = (int)(log(N)/log(2) + 0.5f); if (N == (1<<log2N)) { FFTSetup setup; setup = vDSP_create_fftsetup(log2N, FFT_RADIX2); DSPSplitComplex zsamples; zsamples.realp = &X[0]; zsamples.imagp = &X[Nfloat/2]; t0 = uclock_sec(); for (iter = 0; iter < max_iter; ++iter) { if (cplx) { vDSP_fft_zip(setup, &zsamples, 1, log2N, kFFTDirection_Forward); vDSP_fft_zip(setup, &zsamples, 1, log2N, kFFTDirection_Inverse); } else { vDSP_fft_zrip(setup, &zsamples, 1, log2N, kFFTDirection_Forward); vDSP_fft_zrip(setup, &zsamples, 1, log2N, kFFTDirection_Inverse); } } t1 = uclock_sec(); vDSP_destroy_fftsetup(setup); flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); // see http://www.fftw.org/speed/method.html show_output("vDSP", N, cplx, flops, t0, t1, max_iter); } else { show_output("vDSP", N, cplx, -1, -1, -1, -1); } #endif #ifdef HAVE_FFTW { fftwf_plan planf, planb; fftw_complex *in = (fftw_complex*) fftwf_malloc(sizeof(fftw_complex) * N); fftw_complex *out = (fftw_complex*) fftwf_malloc(sizeof(fftw_complex) * N); memset(in, 0, sizeof(fftw_complex) * N); int flags = (N < 40000 ? FFTW_MEASURE : FFTW_ESTIMATE); // measure takes a lot of time on largest ffts //int flags = FFTW_ESTIMATE; if (cplx) { planf = fftwf_plan_dft_1d(N, (fftwf_complex*)in, (fftwf_complex*)out, FFTW_FORWARD, flags); planb = fftwf_plan_dft_1d(N, (fftwf_complex*)in, (fftwf_complex*)out, FFTW_BACKWARD, flags); } else { planf = fftwf_plan_dft_r2c_1d(N, (float*)in, (fftwf_complex*)out, flags); planb = fftwf_plan_dft_c2r_1d(N, (fftwf_complex*)in, (float*)out, flags); } t0 = uclock_sec(); for (iter = 0; iter < max_iter; ++iter) { fftwf_execute(planf); fftwf_execute(planb); } t1 = uclock_sec(); fftwf_destroy_plan(planf); fftwf_destroy_plan(planb); fftwf_free(in); fftwf_free(out); flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); // see http://www.fftw.org/speed/method.html show_output((flags == FFTW_MEASURE ? "FFTW (meas.)" : " FFTW (estim)"), N, cplx, flops, t0, t1, max_iter); } #endif // PFFFT benchmark { PFFFT_Setup *s = pffft_new_setup(N, cplx ? PFFFT_COMPLEX : PFFFT_REAL); if (s) { t0 = uclock_sec(); for (iter = 0; iter < max_iter; ++iter) { pffft_transform(s, X, Z, Y, PFFFT_FORWARD); pffft_transform(s, X, Z, Y, PFFFT_BACKWARD); } t1 = uclock_sec(); pffft_destroy_setup(s); flops = (max_iter*2) * ((cplx ? 5 : 2.5)*N*log((double)N)/M_LN2); // see http://www.fftw.org/speed/method.html show_output("PFFFT", N, cplx, flops, t0, t1, max_iter); } } if (!array_output_format) { printf("--\n"); } pffft_aligned_free(X); pffft_aligned_free(Y); pffft_aligned_free(Z); }
int main(int argc, char *argv[]) { // Initialize the pseudo-random number generator. InitializeRandom(); // Initialize FFT data. FFTSetup Setup = vDSP_create_fftsetup(Log2SampleLength, FFT_RADIX2); if (Setup == 0) { fprintf(stderr, "Error, unable to create FFT setup.\n"); exit(EXIT_FAILURE); } /* If there are no command-line arguments, prompt for keys interactively. */ if (argc <= 1) { // Process keys for the user until they are done. int c; while (1) { // Prompt the user. printf( "Please enter a key (one of 0-9, *, #, or A-D): "); // Skip whitespace except newlines. do c = getchar(); while (c != '\n' && isspace(c)); // When there is a blank line or an EOF, quit. if (c == '\n' || c == EOF) break; // Look up the key in the table. FrequencyPair F = ConvertKeyToFrequencies(toupper(c)); // If it is a valid key, demonstrate the FFT. if (F.Frequency[0] != 0) Demonstrate(Setup, F); // Skip anything else on the line. do c = getchar(); while (c != EOF && c != '\n'); // When the input ends, quit. Otherwise, do more. if (c == EOF) break; } // Do not leave the cursor in the middle of a line. if (c != '\n') printf("\n"); } // If there is one command line argument, process the keys in it. else if (argc == 2) { for (char *p = argv[1]; *p; ++p) { // Look up the key in the table. FrequencyPair F = ConvertKeyToFrequencies(toupper(*p)); // If it is a valid key, demonstrate the FFT. if (F.Frequency[0] != 0) { printf("Simulating key %c.\n", *p); Demonstrate(Setup, F); } else fprintf(stderr, "Error, key %c not recognized.\n", *p); } } // If there are too many arguments, print a usage message. else { fprintf(stderr, "Usage: %s [telephone keys 0-9, #, *, or A-D]\n", argv[0]); exit(EXIT_FAILURE); } // Release resources. vDSP_destroy_fftsetup(Setup); return 0; }