inline void computeNumMatchesWithFFT(double *t, double *p,int n, int m, int transformSize,int *matches, fftw_plan *forward, fftw_plan *inverse){ printf("USING FFT\n"); printf("n: %d m: %d\n",n, m); //TODO: Should probably move these out of the function. No point allocating //and de-allocating the memory over and over for each character double *textSubString = (double *) fftw_malloc(sizeof(double) * transformSize); double *DFTofPattern = (double *) fftw_malloc(sizeof(double) * transformSize); double *DFTofText = (double *) fftw_malloc(sizeof(double) * transformSize); double *pTimesT = (double *) fftw_malloc(sizeof(double) * transformSize); double *pTimesT_PR = (double *) fftw_malloc(sizeof(double) * transformSize); int i; if(*forward==NULL){ *forward = fftw_plan_r2r_1d(transformSize,p,DFTofPattern,FFTW_R2HC,FFTW_ESTIMATE); fftw_execute(*forward); }else{ fftw_execute_r2r(*forward,p,DFTofPattern); } int start = 0; //Perform transforms on sub-strings of the text values while(start <= n-m){ memcpy(textSubString,t+start,sizeof(double)*transformSize); fftw_execute_r2r(*forward,textSubString,DFTofText); /* Multiply the point representations*/ pTimesT_PR[0] = DFTofPattern[0] * DFTofText[0]; THE_COUNT++; if(transformSize % 2==0){ for(i=1;i<transformSize/2;i++){ pTimesT_PR[i] = DFTofPattern[i] * DFTofText[i] - DFTofPattern[transformSize-i] * DFTofText[transformSize-i]; pTimesT_PR[transformSize-i] = DFTofPattern[transformSize-i] * DFTofText[i] + DFTofPattern[i] * DFTofText[transformSize-i]; } pTimesT_PR[i] = DFTofPattern[i] * DFTofText[i]; }else{ for(i=1;i<=transformSize/2;i++){ pTimesT_PR[i] = DFTofPattern[i] * DFTofText[i] - DFTofPattern[transformSize-i] * DFTofText[transformSize-i]; pTimesT_PR[transformSize-i] = DFTofPattern[transformSize-i] * DFTofText[i] + DFTofPattern[i] * DFTofText[transformSize-i]; } } /* Convert back to a coefficient representation */ //On first iteration, need to create the inverse plan if(*inverse==NULL){ *inverse = fftw_plan_r2r_1d(transformSize,pTimesT_PR,pTimesT,FFTW_HC2R,FFTW_ESTIMATE); fftw_execute(*inverse); }else{ fftw_execute_r2r(*inverse,pTimesT_PR,pTimesT); } for(i=0;i<=transformSize-m && (i+start)<=n-m;i++){ //printf("i+start: %d+%d=%d, value=%d\n",i,start,i+start,(int) ((pTimesT[m+i-1]/transformSize)+0.5)); //plus 0.5 to allow rounding via truncation matches[i+start] += (int)((pTimesT[m+i-1]/transformSize)+0.5); } start +=transformSize-m +1; } fftw_free(textSubString); fftw_free(DFTofPattern); fftw_free(DFTofText); fftw_free(pTimesT); fftw_free(pTimesT_PR); }
I8 mzi_fft(U32 X, U32 Y, U32 mzi_length, I8 hann_flag, I8 dB_flag, U32 *mzi_indexes, T1 *in, DBL *intensity, DBL *phase, DBL *Re, DBL *Im) { DBL *hann_win = static_cast<DBL *>(fftw_malloc(sizeof(DBL) * mzi_length)); // create FFTW plan fftw_plan fft_p = fftw_plan_r2r_1d(mzi_length, Re, Im, FFTW_R2HC, FFTW_ESTIMATE); U32 width = static_cast<U32>(mzi_length/2); I32 j; // prepare Hanning window for (U32 i = 0; i < mzi_length; i++) { if (hann_flag) hann_win[i] = 0.5 * (1 - cos(kTwoPi * i / mzi_length)); else hann_win[i] = 1.0; } // parallel run by A-lines #pragma omp parallel for default(shared) private(j) for (j = 0; j < static_cast<I32>(Y); j++) { DBL *tmp_fft_in = static_cast<DBL *>(fftw_malloc(sizeof(DBL) * mzi_length)); DBL *tmp_fft_out =static_cast<DBL *>(fftw_malloc(sizeof(DBL) * mzi_length)); I32 pos = j * width; // MZI filter: pick up RAW A-line values for corresponding MZI indexes for (U32 i = 0; i < mzi_length; i++) tmp_fft_in[i] = in[j * X + mzi_indexes[i]]; // apply hanning window or just make data cast transform(tmp_fft_in, tmp_fft_in + mzi_length, hann_win, tmp_fft_in, multiplies<DBL>()); // perform FFT using FFTW3 library fftw_execute_r2r(fft_p, tmp_fft_in, tmp_fft_out); // ZERO components if (dB_flag) intensity[pos] = 20 * log10(abs(tmp_fft_out[0])); else intensity[pos] = tmp_fft_out[0]; Re[pos] = tmp_fft_out[0]; phase[pos] = Im[pos] = 0.0; // construct intensity and phase information, Re and Im parts for (U32 pos1 = 1, pos2 = mzi_length - 1; pos1 < width; pos1++, pos2--) { // intensity if (dB_flag) intensity[pos + pos1] = 20 * log10(sqrt(tmp_fft_out[pos1] * \ tmp_fft_out[pos1] + tmp_fft_out[pos2] * \ tmp_fft_out[pos2])); else intensity[pos + pos1] = sqrt(tmp_fft_out[pos1] * tmp_fft_out[pos1] + \ tmp_fft_out[pos2] * tmp_fft_out[pos2]); // phase phase[pos + pos1] = atan2(tmp_fft_out[pos2], tmp_fft_out[pos1]); // Re part Re[pos + pos1] = tmp_fft_out[pos1]; // Im part Im[pos + pos1] = tmp_fft_out[pos2]; } fftw_free(tmp_fft_in); fftw_free(tmp_fft_out); } // end of parallel code fftw_destroy_plan(fft_p); fftw_free(hann_win); return EXIT_SUCCESS; }
void SemiNaiveReduced(double *data, int bw, int m, double *result, double *workspace, double *cos_pml_table, double *weights, fftw_plan *fplan ) { int i, j, n; double result0, result1, result2, result3; double fudge ; double d_bw; int toggle ; double *pml_ptr, *weighted_data, *cos_data ; n = 2*bw; d_bw = (double) bw; weighted_data = workspace ; cos_data = weighted_data + (2*bw) ; /* for paranoia, zero out the result array */ memset( result, 0, sizeof(double)*(bw-m)); /* need to apply quadrature weights to the data and compute the cosine transform */ if ( m % 2 ) for ( i = 0; i < n ; ++i ) weighted_data[i] = data[ i ] * weights[ 2*bw + i ]; else for ( i = 0; i < n ; ++i ) weighted_data[i] = data[ i ] * weights[ i ]; /* smooth the weighted signal */ fftw_execute_r2r( *fplan, weighted_data, cos_data ); /* need to normalize */ cos_data[0] *= 0.707106781186547 ; fudge = 1./sqrt(2. * ((double) n ) ); for ( j = 0 ; j < n ; j ++ ) cos_data[j] *= fudge ; /* do the projections; Note that the cos_pml_table has had all the zeroes stripped out so the indexing is complicated somewhat */ /******** this is the original loop toggle = 0 ; for (i=m; i<bw; i++) { pml_ptr = cos_pml_table + NewTableOffset(m,i); if ((m % 2) == 0) { for (j=0; j<(i/2)+1; j++) result[i-m] += cos_data[(2*j)+toggle] * pml_ptr[j]; } else { if (((i-m) % 2) == 0) { for (j=0; j<(i/2)+1; j++) result[i-m] += cos_data[(2*j)+toggle] * pml_ptr[j]; } else { for (j=0; j<(i/2); j++) result[i-m] += cos_data[(2*j)+toggle] * pml_ptr[j]; } } toggle = (toggle+1) % 2; } *****/ /******** this is the new loop *********/ toggle = 0 ; for ( i=m; i<bw; i++ ) { pml_ptr = cos_pml_table + NewTableOffset(m,i); result0 = 0.0 ; result1 = 0.0 ; result2 = 0.0 ; result3 = 0.0 ; for ( j = 0 ; j < ( (i/2) % 4 ) ; ++j ) result0 += cos_data[(2*j)+toggle] * pml_ptr[j]; for ( ; j < (i/2) ; j += 4 ) { result0 += cos_data[(2*j)+toggle] * pml_ptr[j]; result1 += cos_data[(2*(j+1))+toggle] * pml_ptr[j+1]; result2 += cos_data[(2*(j+2))+toggle] * pml_ptr[j+2]; result3 += cos_data[(2*(j+3))+toggle] * pml_ptr[j+3]; } if ((((i-m) % 2) == 0 ) || ( (m % 2) == 0 )) result0 += cos_data[(2*(i/2))+toggle] * pml_ptr[(i/2)]; result[i-m] = result0 + result1 + result2 + result3 ; toggle = (toggle + 1)%2 ; } }
/* easy spline interpolation + FFT main function PURPOSE: calculate FFT (using fftw_plan_r2r_1d() function call from FFTW library) for RAW B-scan [1] converted into linear wavenumber space (k-space) from linear wavelength space using spline interpolation [2]. NOTE! For spectral domain optical coherence tomography (SD-OCT) the spectrum can be less RAW A-line, thus, define the range to select only spectrum part of each RAW A-line within start_index and end_index parameters. INPUTS: X - number of elements in each row (RAW A-line size) Y - number of rows (# of RAW A-lines) start_index - first index for spectrum (left RAW A-line cut-off) end_index - last index for spectrum (right RAW A-line cut-off) start_wavelength - start of wavelength range for laser end_wavelength - end of wavelength range for laser hann_flag - flag for Hanning window [3] dB_flag - flag for scale: linear or dB (20log()) in - pointer to buffer with RAW B-scan before FFT (size: X * Y) OUTPUTS: intensity - pointer to buffer contained intensities, structural B-scan (size: ((end_index - start_index) / 2) * Y) phase - pointer to buffer contained phases, phase B-scan (size: ((end_index - start_index) / 2) * Y) Re - pointer to buffer contained real part of FFT (size: ((end_index - start_index) / 2) * Y) Im - pointer to buffer contained imaginary part of FFT (size: ((end_index - start_index) / 2) * Y) REMARKS: this function is experimental to test the spline interpolation + FFT for spectrum linear in wavelengths. Use spline_FFT.cpp file to perform spline interpolation + FFT for any kind of spectrum. REFERENCES: [1] http://www.fftw.org/fftw3_doc/Real_002dto_002dReal-Transforms.html [2] http://en.wikipedia.org/wiki/Spline_interpolation#Cubic_spline_interpolation [3] http://en.wikipedia.org/wiki/Hann_function */ I8 OL_easy_spline_fft(U32 X, U32 Y, U32 start_index, U32 end_index, DBL start_wavelength, DBL end_wavelength, I8 hann_flag, I8 dB_flag, DBL *in, DBL *intensity, DBL *phase, DBL *Re, DBL *Im) { U32 size = end_index - start_index; DBL *hann_win = static_cast<DBL *>(fftw_malloc(sizeof(DBL) * size)); DBL *XX = static_cast<DBL *>(fftw_malloc(sizeof(DBL) * size)); DBL *XXX = static_cast<DBL *>(fftw_malloc(sizeof(DBL) * size)); // create FFTW plan fftw_plan fft_p = fftw_plan_r2r_1d(size, Re, Im, FFTW_R2HC, FFTW_ESTIMATE); U32 width = static_cast<U32>(size/2); // linear wavelength step DBL wavelength_step = (end_wavelength - start_wavelength) / (size - 1); DBL start_wavenumber = 1 / start_wavelength; // linear wavenumber step DBL wavenumber_step = (start_wavenumber - (1 / end_wavelength)) / (size - 1); I32 j; // prepare Hanning window and linear and non-linear wavelength vectors for (U32 i = 0; i < size; i++) { if (hann_flag) hann_win[i] = 0.5 * (1 - cos(kTwoPi * i / size)); else hann_win[i] = 1.0; // linear in wavelength XX[i] = wavelength_step * i + start_wavelength; // non-linear in wavelength XXX[i] = 1 /(start_wavenumber - wavenumber_step * i); } // parallel run by A-lines #pragma omp parallel for default(shared) private(j) for (j = 0; j < static_cast<I32>(Y); j++) { DBL *tmp_fft_in = static_cast<DBL *>(fftw_malloc(sizeof(DBL) * size)); DBL *tmp_fft_out = static_cast<DBL *>(fftw_malloc(sizeof(DBL) * size)); DBL *b = static_cast<DBL *>(fftw_malloc(sizeof(DBL) * size)); DBL *c = static_cast<DBL *>(fftw_malloc(sizeof(DBL) * size)); DBL *d = static_cast<DBL *>(fftw_malloc(sizeof(DBL) * size)); U32 pos = j * width; // spline interpolation copy(in + j * X + start_index, in + j * X + end_index, tmp_fft_out); cubic_nak(size, XX, tmp_fft_out, b, c, d); // apply hanning window or just get values for (U32 i = 0; i < size; i++) tmp_fft_in[i] = hann_win[i] * spline_eval(size, XX, tmp_fft_out, b, c, d, XXX[i]); // perform FFT using FFTW3 library fftw_execute_r2r(fft_p, tmp_fft_in, tmp_fft_out); // ZERO components if (dB_flag) intensity[pos] = 20 * log10(abs(tmp_fft_out[0])); else intensity[pos] = tmp_fft_out[0]; Re[pos] = tmp_fft_out[0]; phase[pos] = Im[pos] = 0.0; // construct intensity and phase information, Re and Im parts for (U32 pos1 = 1, pos2 = size - 1; pos1 < width; pos1++, pos2--) { // intensity if (dB_flag) intensity[pos + pos1] = 20 * log10(sqrt(tmp_fft_out[pos1] * \ tmp_fft_out[pos1] + tmp_fft_out[pos2] * \ tmp_fft_out[pos2])); else intensity[pos + pos1] = sqrt(tmp_fft_out[pos1] * tmp_fft_out[pos1] + \ tmp_fft_out[pos2] * tmp_fft_out[pos2]); // phase phase[pos + pos1] = atan2(tmp_fft_out[pos2], tmp_fft_out[pos1]); // Re part Re[pos + pos1] = tmp_fft_out[pos1]; // Im part Im[pos + pos1] = tmp_fft_out[pos2]; } fftw_free(tmp_fft_in); fftw_free(tmp_fft_out); fftw_free(b); fftw_free(c); fftw_free(d); } // end of parallel code fftw_destroy_plan(fft_p); fftw_free(hann_win); fftw_free(XX); fftw_free(XXX); return EXIT_SUCCESS; }
void InvSemiNaiveReduced(double *coeffs, int bw, int m, double *result, double *trans_cos_pml_table, double *sin_values, double *workspace, fftw_plan *fplan ) { double *trans_tableptr; double *assoc_offset; int i, j, rowsize; double *p; double *fcos, fcos0, fcos1, fcos2, fcos3; double fudge ; fcos = workspace ; /* for paranoia, zero out arrays */ memset( fcos, 0, sizeof(double) * 2 * bw ); memset( result, 0, sizeof(double) * 2 * bw ); trans_tableptr = trans_cos_pml_table; p = trans_cos_pml_table; /* main loop - compute each value of fcos Note that all zeroes have been stripped out of the trans_cos_pml_table, so indexing is somewhat complicated. */ for (i=0; i<bw; i++) { if (i == (bw-1)) { if ( m % 2 ) { fcos[bw-1] = 0.0; break; } } rowsize = Transpose_RowSize(i, m, bw); if (i > m) assoc_offset = coeffs + (i - m) + (m % 2); else assoc_offset = coeffs + (i % 2); fcos0 = 0.0 ; fcos1 = 0.0; fcos2 = 0.0; fcos3 = 0.0; for (j = 0; j < rowsize % 4; ++j) fcos0 += assoc_offset[2*j] * trans_tableptr[j]; for ( ; j < rowsize; j += 4){ fcos0 += assoc_offset[2*j] * trans_tableptr[j]; fcos1 += assoc_offset[2*(j+1)] * trans_tableptr[j+1]; fcos2 += assoc_offset[2*(j+2)] * trans_tableptr[j+2]; fcos3 += assoc_offset[2*(j+3)] * trans_tableptr[j+3]; } fcos[i] = fcos0 + fcos1 + fcos2 + fcos3 ; trans_tableptr += rowsize; } /* now we have the cosine series for the result, so now evaluate the cosine series at 2*bw Chebyshev nodes */ /* scale coefficients prior to taking inverse DCT */ fudge = 0.5 / sqrt((double) bw) ; for ( j = 1 ; j < 2*bw ; j ++ ) fcos[j] *= fudge ; fcos[0] /= sqrt(2. * ((double) bw)); /* now take the inverse dct */ /* NOTE that I am using the guru interface */ fftw_execute_r2r( *fplan, fcos, result ); /* if m is odd, then need to multiply by sin(x) at Chebyshev nodes */ if ( m % 2 ) { for (j=0; j<(2*bw); j++) result[j] *= sin_values[j]; } trans_tableptr = p; /* amscray */ }