int gmx_fft_2d(gmx_fft_t fft, enum gmx_fft_direction dir, void * in_data, void * out_data) { int inplace = (in_data == out_data); int status = 0; if( (fft->real_fft == 1) || (fft->ndim != 2) || ((dir != GMX_FFT_FORWARD) && (dir != GMX_FFT_BACKWARD)) ) { gmx_fatal(FARGS,"FFT plan mismatch - bad plan or direction."); return EINVAL; } if(dir==GMX_FFT_FORWARD) { if(inplace) { status = DftiComputeForward(fft->inplace[0],in_data); } else { status = DftiComputeForward(fft->ooplace[0],in_data,out_data); } } else { if(inplace) { status = DftiComputeBackward(fft->inplace[0],in_data); } else { status = DftiComputeBackward(fft->ooplace[0],in_data,out_data); } } if( status != 0 ) { gmx_fatal(FARGS,"Error executing Intel MKL FFT."); status = -1; } return status; }
void N_UTL_IntelFFT_Interface<std::vector<double> >::calculateIFT() { // Although we used a const on input to show that we aren't changing iftInData_ // we need to cast that away as the FFT library takes non-const pointers. std::vector<double>::const_iterator inDataItr = (this->iftInData_)->begin(); double * inDataPtr = const_cast< double * >( &(*inDataItr) ); std::vector<double>::iterator outResultItr = (this->iftOutData_)->begin(); double * outResultPtr = &(*outResultItr); long status = DftiComputeBackward( fftDescriptor, inDataPtr, outResultPtr); checkAndTrapErrors( status ); }
void backward( cube<complex>& in, cube<real>& out ) { ZI_ASSERT(size(in)==fft_complex_size(out)); ZI_ASSERT(size(out)==sz); fft_plan plan = fft_plans.get_backward( vec3i(in.shape()[0],in.shape()[1],in.shape()[2])); MKL_LONG status; # ifdef MEASURE_FFT_RUNTIME zi::wall_timer wt; # endif status = DftiComputeBackward(*plan, reinterpret_cast<real*>(in.data()), reinterpret_cast<real*>(out.data())); # ifdef MEASURE_FFT_RUNTIME fft_stats.add(wt.elapsed<real>()); # endif }
int main(void) { /* Size of 1D transform */ int N = 1000000; /* Arbitrary harmonic */ int H = -N/2; /* Execution status */ MKL_LONG status = 0; int forward_ok = 1, backward_ok = 1; double time_start = 0, time_end = 0; double flops = 0; printf("Forward and backward 1D complex inplace transforms\n"); printf("Allocate space for data on the host\n"); x = (COMPLEX*)malloc( N * sizeof(COMPLEX) ); if (0 == x) { printf("Error: memory allocation on host failed\n"); exit(1); } printf("Preallocate memory on the target\n"); /* * SOLUTION: Use offload pragma to preallocate memory for x on the target. * (1) The lenght of x is N * (2) Make sure the memory of x is aligned on 64-byte boundary * (3) Make sure the allocated memory is not freed */ #pragma offload target(mic) in(x:length(N) align(64) alloc_if(1) free_if(0)) { } printf("Create handle for 1D single-precision forward and backward transforms\n"); /* * SOLUTION: Offload the call to DftiCreateDescriptor to the target. * (1) What would be the 'in' variables? * (2) What would be the 'out' variables? */ #pragma offload target(mic) in(N) nocopy(handle) out(status) { status = DftiCreateDescriptor(&handle, DFTI_SINGLE, DFTI_COMPLEX, 1, (MKL_LONG)N ); if (0 == status) { status = DftiCommitDescriptor(handle); } } if (status) { printf("Error: cannot create handle\n"); exit(1); } /* * SOLUTION: Offload the call to DftiComputeForward to the target. * (1) Make sure x is an 'inout' variable, because this is in-place * transform. * (2) Do not allocate memory for x because it was preallocated. * (3) Do not free momory of x because we will use it again for more * transforms. * (4) What would be the 'out' variables? */ // We do not time the first offload. #pragma offload target(mic) inout(x:length(N) alloc_if(0) free_if(0)) \ nocopy(handle) out(status) { status = DftiComputeForward(handle, x); } printf("Initialize input for forward transform\n"); init(x, N, H); printf("Offload forward FFT computation to the target\n"); time_start = dsecnd(); /* * SOLUTION: Offload the call to DftiComputeForward to the target. * This should be the same as the previous offload. */ #pragma offload target(mic) inout(x:length(N) alloc_if(0) free_if(0)) \ nocopy(handle) out(status) { status = DftiComputeForward(handle, x); } time_end = dsecnd(); if (status) { printf("Error: DftiComputeForward failed\n"); exit(1); } printf("Verify result of forward FFT\n"); forward_ok = verify(x, N, H); if (0 == forward_ok) { flops = 5 * N * log2((double)N) / (time_end - time_start); printf("\t Forward: size = %d, GFlops = %.3f \n", N, flops/1000000000); } printf("Initialize input for backward transform\n"); init(x, N, -H); printf("Offload backward FFT computation to the target\n"); time_start = dsecnd(); /* * SOLUTION: Offload the call to DftiComputeBackward to the target. * (1) Make sure x is an 'inout' variable, because this is in-place * transform. * (2) Do not allocate memory for x because it was preallocated. * (3) Do not free momory of x at this time. * (4) What would be the 'out' variables? */ #pragma offload target(mic) inout(x:length(N) alloc_if(0) free_if(0)) \ nocopy(handle) out(status) { status = DftiComputeBackward(handle, x); } time_end = dsecnd(); if (status) { printf("Error: DftiComputeBackward failed\n"); exit(1); } printf("Verify result of backward FFT\n"); backward_ok = verify(x, N, H); if (0 == backward_ok) { flops = 5 * N * log2((double)N) / (time_end - time_start); printf("\t Backward: size = %d, GFlops = %.3f \n", N, flops/1000000000 ); } printf("Destroy DFTI handle and free space on the target\n"); /* * SOLUTION: Use offload pragma to deallocate memory of x on the target. * (1) What would be 'in' variables? * (2) Do the 'in' variables need to be copied in? */ #pragma offload target(mic) nocopy(x:length(N) alloc_if(0) free_if(1)) \ nocopy(handle) { DftiFreeDescriptor(&handle); } printf("Free space on host\n"); free(x); printf("TEST %s\n",0==forward_ok ? "FORWARD FFT PASSED" : "FORWARD FFT FAILED"); printf("TEST %s\n",0==backward_ok ? "BACKWARD FFT PASSED" : "BACKWARD FFT FAILED"); return 0; }
int gmx_fft_2d_real(gmx_fft_t fft, enum gmx_fft_direction dir, void * in_data, void * out_data) { int inplace = (in_data == out_data); int status = 0; if( (fft->real_fft != 1) || (fft->ndim != 2) || ((dir != GMX_FFT_REAL_TO_COMPLEX) && (dir != GMX_FFT_COMPLEX_TO_REAL)) ) { gmx_fatal(FARGS,"FFT plan mismatch - bad plan or direction."); return EINVAL; } if(dir==GMX_FFT_REAL_TO_COMPLEX) { if(inplace) { /* real-to-complex in Y dimension, in-place */ status = DftiComputeForward(fft->inplace[1],in_data); /* complex-to-complex in X dimension, in-place */ if ( status == 0 ) status = DftiComputeForward(fft->inplace[0],in_data); } else { /* real-to-complex in Y dimension, in_data to out_data */ status = DftiComputeForward(fft->ooplace[1],in_data,out_data); /* complex-to-complex in X dimension, in-place to out_data */ if ( status == 0 ) status = DftiComputeForward(fft->inplace[0],out_data); } } else { if(inplace) { /* complex-to-complex in X dimension, in-place */ status = DftiComputeBackward(fft->inplace[0],in_data); /* complex-to-real in Y dimension, in-place */ if ( status == 0 ) status = DftiComputeBackward(fft->inplace[1],in_data); } else { /* complex-to-complex in X dimension, from in_data to work */ status = DftiComputeBackward(fft->ooplace[0],in_data,fft->work); /* complex-to-real in Y dimension, from work to out_data */ if ( status == 0 ) status = DftiComputeBackward(fft->ooplace[1],fft->work,out_data); } } if( status != 0 ) { gmx_fatal(FARGS,"Error executing Intel MKL FFT."); status = -1; } return status; }
int gmx_fft_3d_real(gmx_fft_t fft, enum gmx_fft_direction dir, void * in_data, void * out_data) { int inplace = (in_data == out_data); int status = 0; int i; int nx,ny,nzc; nx = fft->nx; ny = fft->ny; nzc = fft->nz/2 + 1; if( (fft->real_fft != 1) || (fft->ndim != 3) || ((dir != GMX_FFT_REAL_TO_COMPLEX) && (dir != GMX_FFT_COMPLEX_TO_REAL)) ) { gmx_fatal(FARGS,"FFT plan mismatch - bad plan or direction."); return EINVAL; } if(dir==GMX_FFT_REAL_TO_COMPLEX) { if(inplace) { /* real-to-complex in Z dimension, in-place */ status = DftiComputeForward(fft->inplace[2],in_data); /* complex-to-complex in Y dimension, in-place */ for(i=0;i<nx;i++) { if ( status == 0 ) status = DftiComputeForward(fft->inplace[1],(t_complex *)in_data+i*ny*nzc); } /* complex-to-complex in X dimension, in-place */ if ( status == 0 ) status = DftiComputeForward(fft->inplace[0],in_data); } else { /* real-to-complex in Z dimension, from in_data to out_data */ status = DftiComputeForward(fft->ooplace[2],in_data,out_data); /* complex-to-complex in Y dimension, in-place */ for(i=0;i<nx;i++) { if ( status == 0 ) status = DftiComputeForward(fft->inplace[1],(t_complex *)out_data+i*ny*nzc); } /* complex-to-complex in X dimension, in-place */ if ( status == 0 ) status = DftiComputeForward(fft->inplace[0],out_data); } } else { if(inplace) { /* complex-to-complex in X dimension, in-place */ status = DftiComputeBackward(fft->inplace[0],in_data); /* complex-to-complex in Y dimension, in-place */ for(i=0;i<nx;i++) { if ( status == 0 ) status = DftiComputeBackward(fft->inplace[1],(t_complex *)in_data+i*ny*nzc); } /* complex-to-real in Z dimension, in-place */ if ( status == 0 ) status = DftiComputeBackward(fft->inplace[2],in_data); } else { /* complex-to-complex in X dimension, from in_data to work */ status = DftiComputeBackward(fft->ooplace[0],in_data,fft->work); /* complex-to-complex in Y dimension, in-place */ for(i=0;i<nx;i++) { if ( status == 0 ) status = DftiComputeBackward(fft->inplace[1],fft->work+i*ny*nzc); } /* complex-to-real in Z dimension, work to out_data */ if ( status == 0 ) status = DftiComputeBackward(fft->ooplace[2],fft->work,out_data); } } if( status != 0 ) { gmx_fatal(FARGS,"Error executing Intel MKL FFT."); status = -1; } return status; }
void ccmfft(complex *data, int n1, int n2, int ld1, int sign) { #if defined(HAVE_LIBSCS) int ntable, nwork, zero=0; static int isys, nprev=0; static float *work, *table, scale=1.0; #elif defined(ACML440) static int nprev=0; int nwork, zero=0, one=1, i, j, inpl; static int isys; static complex *work; REAL scl; complex *y; #elif defined(MKL) static DFTI_DESCRIPTOR_HANDLE handle[MAX_NUMTHREADS]; static int nprev[MAX_NUMTHREADS]; MKL_LONG Status; int j; #endif int id; #ifdef _OPENMP id = omp_get_thread_num(); #else id = 0; #endif #if defined(HAVE_LIBSCS) if (n1 != nprev) { isys = 0; ntable = 2*n1 + 30; nwork = 2*n1; if (work) free(work); work = (float *)malloc(nwork*sizeof(float)); if (work == NULL) fprintf(stderr,"ccmfft: memory allocation error\n"); if (table) free(table); table = (float *)malloc(ntable*sizeof(float)); if (table == NULL) fprintf(stderr,"ccmfft: memory allocation error\n"); ccfftm_(&zero, &n1, &n2, &scale, data, &ld1, data, &ld1, table, work, &isys); nprev = n1; } ccfftm_(&sign, &n1, &n2, &scale, data, &ld1, data, &ld1, table, work, &isys); #elif defined(ACML440) scl = 1.0; inpl = 1; if (n1 != nprev) { isys = 0; nwork = 5*n1 + 100; if (work) free(work); work = (complex *)malloc(nwork*sizeof(complex)); if (work == NULL) fprintf(stderr,"rc1fft: memory allocation error\n"); acmlccmfft(zero, scl, inpl, n2, n1, data, 1, ld1, y, 1, ld1, work, &isys); nprev = n1; } acmlccmfft(sign, scl, inpl, n2, n1, data, 1, ld1, y, 1, ld1, work, &isys); #elif defined(MKL) if (n1 != nprev[id]) { DftiFreeDescriptor(&handle[id]); Status = DftiCreateDescriptor(&handle[id], DFTI_SINGLE, DFTI_COMPLEX, 1, (MKL_LONG)n1); if(! DftiErrorClass(Status, DFTI_NO_ERROR)){ dfti_status_print(Status); printf(" DftiCreateDescriptor FAIL\n"); } Status = DftiCommitDescriptor(handle[id]); if(! DftiErrorClass(Status, DFTI_NO_ERROR)){ dfti_status_print(Status); printf(" DftiCommitDescriptor FAIL\n"); } nprev[id] = n1; } if (sign < 0) { for (j=0; j<n2; j++) { Status = DftiComputeBackward(handle[id], &data[j*ld1]); } } else { for (j=0; j<n2; j++) { Status = DftiComputeForward(handle[id], &data[j*ld1]); } } #else ccm_fft(data, n1, n2, ld1, sign); #endif return; }
void fft_3d(FFT_DATA *in, FFT_DATA *out, int flag, struct fft_plan_3d *plan) { int i,total,length,offset,num; FFT_SCALAR norm, *out_ptr; FFT_DATA *data,*copy; // system specific constants #if defined(FFT_SCSL) int isys = 0; FFT_PREC scalef = 1.0; #elif defined(FFT_DEC) char c = 'C'; char f = 'F'; char b = 'B'; int one = 1; #elif defined(FFT_T3E) int isys = 0; double scalef = 1.0; #elif defined(FFT_ACML) int info; #elif defined(FFT_FFTW3) FFTW_API(plan) theplan; #else // nothing to do for other FFTs. #endif // pre-remap to prepare for 1st FFTs if needed // copy = loc for remap result if (plan->pre_plan) { if (plan->pre_target == 0) copy = out; else copy = plan->copy; remap_3d((FFT_SCALAR *) in, (FFT_SCALAR *) copy, (FFT_SCALAR *) plan->scratch, plan->pre_plan); data = copy; } else data = in; // 1d FFTs along fast axis total = plan->total1; length = plan->length1; #if defined(FFT_SGI) for (offset = 0; offset < total; offset += length) FFT_1D(flag,length,&data[offset],1,plan->coeff1); #elif defined(FFT_SCSL) for (offset = 0; offset < total; offset += length) FFT_1D(flag,length,scalef,&data[offset],&data[offset],plan->coeff1, plan->work1,&isys); #elif defined(FFT_ACML) num=total/length; FFT_1D(&flag,&num,&length,data,plan->coeff1,&info); #elif defined(FFT_INTEL) for (offset = 0; offset < total; offset += length) FFT_1D(&data[offset],&length,&flag,plan->coeff1); #elif defined(FFT_MKL) if (flag == -1) DftiComputeForward(plan->handle_fast,data); else DftiComputeBackward(plan->handle_fast,data); #elif defined(FFT_DEC) if (flag == -1) for (offset = 0; offset < total; offset += length) FFT_1D(&c,&c,&f,&data[offset],&data[offset],&length,&one); else for (offset = 0; offset < total; offset += length) FFT_1D(&c,&c,&b,&data[offset],&data[offset],&length,&one); #elif defined(FFT_T3E) for (offset = 0; offset < total; offset += length) FFT_1D(&flag,&length,&scalef,&data[offset],&data[offset],plan->coeff1, plan->work1,&isys); #elif defined(FFT_FFTW2) if (flag == -1) fftw(plan->plan_fast_forward,total/length,data,1,length,NULL,0,0); else fftw(plan->plan_fast_backward,total/length,data,1,length,NULL,0,0); #elif defined(FFT_FFTW3) if (flag == -1) theplan=plan->plan_fast_forward; else theplan=plan->plan_fast_backward; FFTW_API(execute_dft)(theplan,data,data); #else if (flag == -1) for (offset = 0; offset < total; offset += length) kiss_fft(plan->cfg_fast_forward,&data[offset],&data[offset]); else for (offset = 0; offset < total; offset += length) kiss_fft(plan->cfg_fast_backward,&data[offset],&data[offset]); #endif // 1st mid-remap to prepare for 2nd FFTs // copy = loc for remap result if (plan->mid1_target == 0) copy = out; else copy = plan->copy; remap_3d((FFT_SCALAR *) data, (FFT_SCALAR *) copy, (FFT_SCALAR *) plan->scratch, plan->mid1_plan); data = copy; // 1d FFTs along mid axis total = plan->total2; length = plan->length2; #if defined(FFT_SGI) for (offset = 0; offset < total; offset += length) FFT_1D(flag,length,&data[offset],1,plan->coeff2); #elif defined(FFT_SCSL) for (offset = 0; offset < total; offset += length) FFT_1D(flag,length,scalef,&data[offset],&data[offset],plan->coeff2, plan->work2,&isys); #elif defined(FFT_ACML) num=total/length; FFT_1D(&flag,&num,&length,data,plan->coeff2,&info); #elif defined(FFT_INTEL) for (offset = 0; offset < total; offset += length) FFT_1D(&data[offset],&length,&flag,plan->coeff2); #elif defined(FFT_MKL) if (flag == -1) DftiComputeForward(plan->handle_mid,data); else DftiComputeBackward(plan->handle_mid,data); #elif defined(FFT_DEC) if (flag == -1) for (offset = 0; offset < total; offset += length) FFT_1D(&c,&c,&f,&data[offset],&data[offset],&length,&one); else for (offset = 0; offset < total; offset += length) FFT_1D(&c,&c,&b,&data[offset],&data[offset],&length,&one); #elif defined(FFT_T3E) for (offset = 0; offset < total; offset += length) FFT_1D(&flag,&length,&scalef,&data[offset],&data[offset],plan->coeff2, plan->work2,&isys); #elif defined(FFT_FFTW2) if (flag == -1) fftw(plan->plan_mid_forward,total/length,data,1,length,NULL,0,0); else fftw(plan->plan_mid_backward,total/length,data,1,length,NULL,0,0); #elif defined(FFT_FFTW3) if (flag == -1) theplan=plan->plan_mid_forward; else theplan=plan->plan_mid_backward; FFTW_API(execute_dft)(theplan,data,data); #else if (flag == -1) for (offset = 0; offset < total; offset += length) kiss_fft(plan->cfg_mid_forward,&data[offset],&data[offset]); else for (offset = 0; offset < total; offset += length) kiss_fft(plan->cfg_mid_backward,&data[offset],&data[offset]); #endif // 2nd mid-remap to prepare for 3rd FFTs // copy = loc for remap result if (plan->mid2_target == 0) copy = out; else copy = plan->copy; remap_3d((FFT_SCALAR *) data, (FFT_SCALAR *) copy, (FFT_SCALAR *) plan->scratch, plan->mid2_plan); data = copy; // 1d FFTs along slow axis total = plan->total3; length = plan->length3; #if defined(FFT_SGI) for (offset = 0; offset < total; offset += length) FFT_1D(flag,length,&data[offset],1,plan->coeff3); #elif defined(FFT_SCSL) for (offset = 0; offset < total; offset += length) FFT_1D(flag,length,scalef,&data[offset],&data[offset],plan->coeff3, plan->work3,&isys); #elif defined(FFT_ACML) num=total/length; FFT_1D(&flag,&num,&length,data,plan->coeff3,&info); #elif defined(FFT_INTEL) for (offset = 0; offset < total; offset += length) FFT_1D(&data[offset],&length,&flag,plan->coeff3); #elif defined(FFT_MKL) if (flag == -1) DftiComputeForward(plan->handle_slow,data); else DftiComputeBackward(plan->handle_slow,data); #elif defined(FFT_DEC) if (flag == -1) for (offset = 0; offset < total; offset += length) FFT_1D(&c,&c,&f,&data[offset],&data[offset],&length,&one); else for (offset = 0; offset < total; offset += length) FFT_1D(&c,&c,&b,&data[offset],&data[offset],&length,&one); #elif defined(FFT_T3E) for (offset = 0; offset < total; offset += length) FFT_1D(&flag,&length,&scalef,&data[offset],&data[offset],plan->coeff3, plan->work3,&isys); #elif defined(FFT_FFTW2) if (flag == -1) fftw(plan->plan_slow_forward,total/length,data,1,length,NULL,0,0); else fftw(plan->plan_slow_backward,total/length,data,1,length,NULL,0,0); #elif defined(FFT_FFTW3) if (flag == -1) theplan=plan->plan_slow_forward; else theplan=plan->plan_slow_backward; FFTW_API(execute_dft)(theplan,data,data); #else if (flag == -1) for (offset = 0; offset < total; offset += length) kiss_fft(plan->cfg_slow_forward,&data[offset],&data[offset]); else for (offset = 0; offset < total; offset += length) kiss_fft(plan->cfg_slow_backward,&data[offset],&data[offset]); #endif // post-remap to put data in output format if needed // destination is always out if (plan->post_plan) remap_3d((FFT_SCALAR *) data, (FFT_SCALAR *) out, (FFT_SCALAR *) plan->scratch, plan->post_plan); // scaling if required #if !defined(FFT_T3E) && !defined(FFT_ACML) if (flag == 1 && plan->scaled) { norm = plan->norm; num = plan->normnum; out_ptr = (FFT_SCALAR *)out; for (i = 0; i < num; i++) { #if defined(FFT_FFTW3) *(out_ptr++) *= norm; *(out_ptr++) *= norm; #elif defined(FFT_MKL) out[i] *= norm; #else out[i].re *= norm; out[i].im *= norm; #endif } } #endif #ifdef FFT_T3E if (flag == 1 && plan->scaled) { norm = plan->norm; num = plan->normnum; for (i = 0; i < num; i++) out[i] *= (norm,norm); } #endif #ifdef FFT_ACML norm = plan->norm; num = plan->normnum; for (i = 0; i < num; i++) { out[i].re *= norm; out[i].im *= norm; } #endif }
void fft_1d_only(FFT_DATA *data, int nsize, int flag, struct fft_plan_3d *plan) { int i,total,length,offset,num; FFT_SCALAR norm, *data_ptr; // system specific constants #ifdef FFT_SCSL int isys = 0; FFT_PREC scalef = 1.0; #endif #ifdef FFT_DEC char c = 'C'; char f = 'F'; char b = 'B'; int one = 1; #endif #ifdef FFT_T3E int isys = 0; double scalef = 1.0; #endif // total = size of data needed in each dim // length = length of 1d FFT in each dim // total/length = # of 1d FFTs in each dim // if total > nsize, limit # of 1d FFTs to available size of data int total1 = plan->total1; int length1 = plan->length1; int total2 = plan->total2; int length2 = plan->length2; int total3 = plan->total3; int length3 = plan->length3; // fftw3 and Dfti in MKL encode the number of transforms // into the plan, so we cannot operate on a smaller data set. #if defined(FFT_MKL) || defined(FFT_FFTW3) if ((total1 > nsize) || (total2 > nsize) || (total3 > nsize)) return; #endif if (total1 > nsize) total1 = (nsize/length1) * length1; if (total2 > nsize) total2 = (nsize/length2) * length2; if (total3 > nsize) total3 = (nsize/length3) * length3; // perform 1d FFTs in each of 3 dimensions // data is just an array of 0.0 #ifdef FFT_SGI for (offset = 0; offset < total1; offset += length1) FFT_1D(flag,length1,&data[offset],1,plan->coeff1); for (offset = 0; offset < total2; offset += length2) FFT_1D(flag,length2,&data[offset],1,plan->coeff2); for (offset = 0; offset < total3; offset += length3) FFT_1D(flag,length3,&data[offset],1,plan->coeff3); #elif defined(FFT_SCSL) for (offset = 0; offset < total1; offset += length1) FFT_1D(flag,length1,scalef,&data[offset],&data[offset],plan->coeff1, plan->work1,&isys); for (offset = 0; offset < total2; offset += length2) FFT_1D(flag,length2,scalef,&data[offset],&data[offset],plan->coeff2, plan->work2,&isys); for (offset = 0; offset < total3; offset += length3) FFT_1D(flag,length3,scalef,&data[offset],&data[offset],plan->coeff3, plan->work3,&isys); #elif defined(FFT_ACML) int info=0; num=total1/length1; FFT_1D(&flag,&num,&length1,data,plan->coeff1,&info); num=total2/length2; FFT_1D(&flag,&num,&length2,data,plan->coeff2,&info); num=total3/length3; FFT_1D(&flag,&num,&length3,data,plan->coeff3,&info); #elif defined(FFT_INTEL) for (offset = 0; offset < total1; offset += length1) FFT_1D(&data[offset],&length1,&flag,plan->coeff1); for (offset = 0; offset < total2; offset += length2) FFT_1D(&data[offset],&length2,&flag,plan->coeff2); for (offset = 0; offset < total3; offset += length3) FFT_1D(&data[offset],&length3,&flag,plan->coeff3); #elif defined(FFT_MKL) if (flag == -1) { DftiComputeForward(plan->handle_fast,data); DftiComputeForward(plan->handle_mid,data); DftiComputeForward(plan->handle_slow,data); } else { DftiComputeBackward(plan->handle_fast,data); DftiComputeBackward(plan->handle_mid,data); DftiComputeBackward(plan->handle_slow,data); } #elif defined(FFT_DEC) if (flag == -1) { for (offset = 0; offset < total1; offset += length1) FFT_1D(&c,&c,&f,&data[offset],&data[offset],&length1,&one); for (offset = 0; offset < total2; offset += length2) FFT_1D(&c,&c,&f,&data[offset],&data[offset],&length2,&one); for (offset = 0; offset < total3; offset += length3) FFT_1D(&c,&c,&f,&data[offset],&data[offset],&length3,&one); } else { for (offset = 0; offset < total1; offset += length1) FFT_1D(&c,&c,&b,&data[offset],&data[offset],&length1,&one); for (offset = 0; offset < total2; offset += length2) FFT_1D(&c,&c,&b,&data[offset],&data[offset],&length2,&one); for (offset = 0; offset < total3; offset += length3) FFT_1D(&c,&c,&b,&data[offset],&data[offset],&length3,&one); } #elif defined(FFT_T3E) for (offset = 0; offset < total1; offset += length1) FFT_1D(&flag,&length1,&scalef,&data[offset],&data[offset],plan->coeff1, plan->work1,&isys); for (offset = 0; offset < total2; offset += length2) FFT_1D(&flag,&length2,&scalef,&data[offset],&data[offset],plan->coeff2, plan->work2,&isys); for (offset = 0; offset < total3; offset += length3) FFT_1D(&flag,&length3,&scalef,&data[offset],&data[offset],plan->coeff3, plan->work3,&isys); #elif defined(FFT_FFTW2) if (flag == -1) { fftw(plan->plan_fast_forward,total1/length1,data,1,0,NULL,0,0); fftw(plan->plan_mid_forward,total2/length2,data,1,0,NULL,0,0); fftw(plan->plan_slow_forward,total3/length3,data,1,0,NULL,0,0); } else { fftw(plan->plan_fast_backward,total1/length1,data,1,0,NULL,0,0); fftw(plan->plan_mid_backward,total2/length2,data,1,0,NULL,0,0); fftw(plan->plan_slow_backward,total3/length3,data,1,0,NULL,0,0); } #elif defined(FFT_FFTW3) FFTW_API(plan) theplan; if (flag == -1) theplan=plan->plan_fast_forward; else theplan=plan->plan_fast_backward; FFTW_API(execute_dft)(theplan,data,data); if (flag == -1) theplan=plan->plan_mid_forward; else theplan=plan->plan_mid_backward; FFTW_API(execute_dft)(theplan,data,data); if (flag == -1) theplan=plan->plan_slow_forward; else theplan=plan->plan_slow_backward; FFTW_API(execute_dft)(theplan,data,data); #else if (flag == -1) { for (offset = 0; offset < total1; offset += length1) kiss_fft(plan->cfg_fast_forward,&data[offset],&data[offset]); for (offset = 0; offset < total2; offset += length2) kiss_fft(plan->cfg_mid_forward,&data[offset],&data[offset]); for (offset = 0; offset < total3; offset += length3) kiss_fft(plan->cfg_slow_forward,&data[offset],&data[offset]); } else { for (offset = 0; offset < total1; offset += length1) kiss_fft(plan->cfg_fast_backward,&data[offset],&data[offset]); for (offset = 0; offset < total2; offset += length2) kiss_fft(plan->cfg_mid_backward,&data[offset],&data[offset]); for (offset = 0; offset < total3; offset += length3) kiss_fft(plan->cfg_slow_backward,&data[offset],&data[offset]); } #endif // scaling if required // limit num to size of data #ifndef FFT_T3E if (flag == 1 && plan->scaled) { norm = plan->norm; num = MIN(plan->normnum,nsize); data_ptr = (FFT_SCALAR *)data; for (i = 0; i < num; i++) { #if defined(FFT_FFTW3) *(data_ptr++) *= norm; *(data_ptr++) *= norm; #elif defined(FFT_MKL) data[i] *= norm; #else data[i].re *= norm; data[i].im *= norm; #endif } } #endif #ifdef FFT_T3E if (flag == 1 && plan->scaled) { norm = plan->norm; num = MIN(plan->normnum,nsize); for (i = 0; i < num; i++) data[i] *= (norm,norm); } #endif }
void single(char *dst, char *const *src) { DftiComputeBackward(descriptor, src[0], dst); }
void fft_1d_only(FFT_DATA *data, int nsize, int flag, struct fft_plan_3d *plan) { int i,num; FFT_SCALAR norm; #if defined(FFT_FFTW3) FFT_SCALAR *data_ptr; #endif // total = size of data needed in each dim // length = length of 1d FFT in each dim // total/length = # of 1d FFTs in each dim // if total > nsize, limit # of 1d FFTs to available size of data int total1 = plan->total1; int length1 = plan->length1; int total2 = plan->total2; int length2 = plan->length2; int total3 = plan->total3; int length3 = plan->length3; // fftw3 and Dfti in MKL encode the number of transforms // into the plan, so we cannot operate on a smaller data set. #if defined(FFT_MKL) || defined(FFT_FFTW3) if ((total1 > nsize) || (total2 > nsize) || (total3 > nsize)) return; #endif if (total1 > nsize) total1 = (nsize/length1) * length1; if (total2 > nsize) total2 = (nsize/length2) * length2; if (total3 > nsize) total3 = (nsize/length3) * length3; // perform 1d FFTs in each of 3 dimensions // data is just an array of 0.0 #if defined(FFT_MKL) if (flag == -1) { DftiComputeForward(plan->handle_fast,data); DftiComputeForward(plan->handle_mid,data); DftiComputeForward(plan->handle_slow,data); } else { DftiComputeBackward(plan->handle_fast,data); DftiComputeBackward(plan->handle_mid,data); DftiComputeBackward(plan->handle_slow,data); } #elif defined(FFT_FFTW2) if (flag == -1) { fftw(plan->plan_fast_forward,total1/length1,data,1,0,NULL,0,0); fftw(plan->plan_mid_forward,total2/length2,data,1,0,NULL,0,0); fftw(plan->plan_slow_forward,total3/length3,data,1,0,NULL,0,0); } else { fftw(plan->plan_fast_backward,total1/length1,data,1,0,NULL,0,0); fftw(plan->plan_mid_backward,total2/length2,data,1,0,NULL,0,0); fftw(plan->plan_slow_backward,total3/length3,data,1,0,NULL,0,0); } #elif defined(FFT_FFTW3) FFTW_API(plan) theplan; if (flag == -1) theplan=plan->plan_fast_forward; else theplan=plan->plan_fast_backward; FFTW_API(execute_dft)(theplan,data,data); if (flag == -1) theplan=plan->plan_mid_forward; else theplan=plan->plan_mid_backward; FFTW_API(execute_dft)(theplan,data,data); if (flag == -1) theplan=plan->plan_slow_forward; else theplan=plan->plan_slow_backward; FFTW_API(execute_dft)(theplan,data,data); #else if (flag == -1) { for (int offset = 0; offset < total1; offset += length1) kiss_fft(plan->cfg_fast_forward,&data[offset],&data[offset]); for (int offset = 0; offset < total2; offset += length2) kiss_fft(plan->cfg_mid_forward,&data[offset],&data[offset]); for (int offset = 0; offset < total3; offset += length3) kiss_fft(plan->cfg_slow_forward,&data[offset],&data[offset]); } else { for (int offset = 0; offset < total1; offset += length1) kiss_fft(plan->cfg_fast_backward,&data[offset],&data[offset]); for (int offset = 0; offset < total2; offset += length2) kiss_fft(plan->cfg_mid_backward,&data[offset],&data[offset]); for (int offset = 0; offset < total3; offset += length3) kiss_fft(plan->cfg_slow_backward,&data[offset],&data[offset]); } #endif // scaling if required // limit num to size of data if (flag == 1 && plan->scaled) { norm = plan->norm; num = MIN(plan->normnum,nsize); #if defined(FFT_FFTW3) data_ptr = (FFT_SCALAR *)data; #endif for (i = 0; i < num; i++) { #if defined(FFT_FFTW3) *(data_ptr++) *= norm; *(data_ptr++) *= norm; #elif defined(FFT_MKL) data[i] *= norm; #else data[i].re *= norm; data[i].im *= norm; #endif } } }
void fft_3d(FFT_DATA *in, FFT_DATA *out, int flag, struct fft_plan_3d *plan) { int i,total,length,offset,num; FFT_SCALAR norm; #if defined(FFT_FFTW3) FFT_SCALAR *out_ptr; #endif FFT_DATA *data,*copy; // system specific constants #if defined(FFT_FFTW3) FFTW_API(plan) theplan; #else // nothing to do for other FFTs #endif // pre-remap to prepare for 1st FFTs if needed // copy = loc for remap result if (plan->pre_plan) { if (plan->pre_target == 0) copy = out; else copy = plan->copy; remap_3d((FFT_SCALAR *) in, (FFT_SCALAR *) copy, (FFT_SCALAR *) plan->scratch, plan->pre_plan); data = copy; } else data = in; // 1d FFTs along fast axis total = plan->total1; length = plan->length1; #if defined(FFT_MKL) if (flag == -1) DftiComputeForward(plan->handle_fast,data); else DftiComputeBackward(plan->handle_fast,data); #elif defined(FFT_FFTW2) if (flag == -1) fftw(plan->plan_fast_forward,total/length,data,1,length,NULL,0,0); else fftw(plan->plan_fast_backward,total/length,data,1,length,NULL,0,0); #elif defined(FFT_FFTW3) if (flag == -1) theplan=plan->plan_fast_forward; else theplan=plan->plan_fast_backward; FFTW_API(execute_dft)(theplan,data,data); #else if (flag == -1) for (offset = 0; offset < total; offset += length) kiss_fft(plan->cfg_fast_forward,&data[offset],&data[offset]); else for (offset = 0; offset < total; offset += length) kiss_fft(plan->cfg_fast_backward,&data[offset],&data[offset]); #endif // 1st mid-remap to prepare for 2nd FFTs // copy = loc for remap result if (plan->mid1_target == 0) copy = out; else copy = plan->copy; remap_3d((FFT_SCALAR *) data, (FFT_SCALAR *) copy, (FFT_SCALAR *) plan->scratch, plan->mid1_plan); data = copy; // 1d FFTs along mid axis total = plan->total2; length = plan->length2; #if defined(FFT_MKL) if (flag == -1) DftiComputeForward(plan->handle_mid,data); else DftiComputeBackward(plan->handle_mid,data); #elif defined(FFT_FFTW2) if (flag == -1) fftw(plan->plan_mid_forward,total/length,data,1,length,NULL,0,0); else fftw(plan->plan_mid_backward,total/length,data,1,length,NULL,0,0); #elif defined(FFT_FFTW3) if (flag == -1) theplan=plan->plan_mid_forward; else theplan=plan->plan_mid_backward; FFTW_API(execute_dft)(theplan,data,data); #else if (flag == -1) for (offset = 0; offset < total; offset += length) kiss_fft(plan->cfg_mid_forward,&data[offset],&data[offset]); else for (offset = 0; offset < total; offset += length) kiss_fft(plan->cfg_mid_backward,&data[offset],&data[offset]); #endif // 2nd mid-remap to prepare for 3rd FFTs // copy = loc for remap result if (plan->mid2_target == 0) copy = out; else copy = plan->copy; remap_3d((FFT_SCALAR *) data, (FFT_SCALAR *) copy, (FFT_SCALAR *) plan->scratch, plan->mid2_plan); data = copy; // 1d FFTs along slow axis total = plan->total3; length = plan->length3; #if defined(FFT_MKL) if (flag == -1) DftiComputeForward(plan->handle_slow,data); else DftiComputeBackward(plan->handle_slow,data); #elif defined(FFT_FFTW2) if (flag == -1) fftw(plan->plan_slow_forward,total/length,data,1,length,NULL,0,0); else fftw(plan->plan_slow_backward,total/length,data,1,length,NULL,0,0); #elif defined(FFT_FFTW3) if (flag == -1) theplan=plan->plan_slow_forward; else theplan=plan->plan_slow_backward; FFTW_API(execute_dft)(theplan,data,data); #else if (flag == -1) for (offset = 0; offset < total; offset += length) kiss_fft(plan->cfg_slow_forward,&data[offset],&data[offset]); else for (offset = 0; offset < total; offset += length) kiss_fft(plan->cfg_slow_backward,&data[offset],&data[offset]); #endif // post-remap to put data in output format if needed // destination is always out if (plan->post_plan) remap_3d((FFT_SCALAR *) data, (FFT_SCALAR *) out, (FFT_SCALAR *) plan->scratch, plan->post_plan); // scaling if required if (flag == 1 && plan->scaled) { norm = plan->norm; num = plan->normnum; #if defined(FFT_FFTW3) out_ptr = (FFT_SCALAR *)out; #endif for (i = 0; i < num; i++) { #if defined(FFT_FFTW3) *(out_ptr++) *= norm; *(out_ptr++) *= norm; #elif defined(FFT_MKL) out[i] *= norm; #else out[i].re *= norm; out[i].im *= norm; #endif } } }