inline void computeNumMatchesWithFFT(double *t, double *p,int n, int m,
		int transformSize,int *matches, fftw_plan *forward, fftw_plan *inverse){

   printf("USING FFT\n");

   printf("n: %d m: %d\n",n, m);

	//TODO: Should probably move these out of the function. No point allocating
	//and de-allocating the memory over and over for each character
	double *textSubString = (double *) fftw_malloc(sizeof(double) * transformSize);
	double *DFTofPattern = (double *) fftw_malloc(sizeof(double) * transformSize);
	double *DFTofText = (double *) fftw_malloc(sizeof(double) * transformSize);
	double *pTimesT = (double *) fftw_malloc(sizeof(double) * transformSize);
	double *pTimesT_PR = (double *) fftw_malloc(sizeof(double) * transformSize);
	int i;

	if(*forward==NULL){
		*forward = fftw_plan_r2r_1d(transformSize,p,DFTofPattern,FFTW_R2HC,FFTW_ESTIMATE);
		fftw_execute(*forward);
	}else{
		fftw_execute_r2r(*forward,p,DFTofPattern);
	}
	int start = 0;
	//Perform transforms on sub-strings of the text values
	while(start <= n-m){
		memcpy(textSubString,t+start,sizeof(double)*transformSize);
		fftw_execute_r2r(*forward,textSubString,DFTofText);

		/* Multiply the point representations*/

		pTimesT_PR[0] = DFTofPattern[0] * DFTofText[0];


      THE_COUNT++;
		if(transformSize % 2==0){

			for(i=1;i<transformSize/2;i++){
				pTimesT_PR[i] = DFTofPattern[i] * DFTofText[i] - DFTofPattern[transformSize-i] * DFTofText[transformSize-i];
				pTimesT_PR[transformSize-i] = DFTofPattern[transformSize-i] * DFTofText[i] + DFTofPattern[i] * DFTofText[transformSize-i];
			}
			pTimesT_PR[i] = DFTofPattern[i] * DFTofText[i];
		}else{

			for(i=1;i<=transformSize/2;i++){
				pTimesT_PR[i] = DFTofPattern[i] * DFTofText[i] - DFTofPattern[transformSize-i] * DFTofText[transformSize-i];
				pTimesT_PR[transformSize-i] = DFTofPattern[transformSize-i] * DFTofText[i] + DFTofPattern[i] * DFTofText[transformSize-i];
			}

		}

		/* Convert back to a coefficient representation */

		//On first iteration, need to create the inverse plan
		if(*inverse==NULL){
			*inverse = fftw_plan_r2r_1d(transformSize,pTimesT_PR,pTimesT,FFTW_HC2R,FFTW_ESTIMATE);
			fftw_execute(*inverse);
		}else{
			fftw_execute_r2r(*inverse,pTimesT_PR,pTimesT);
		}

		for(i=0;i<=transformSize-m && (i+start)<=n-m;i++){
			//printf("i+start: %d+%d=%d, value=%d\n",i,start,i+start,(int) ((pTimesT[m+i-1]/transformSize)+0.5));
			//plus 0.5 to allow rounding via truncation
			matches[i+start] += (int)((pTimesT[m+i-1]/transformSize)+0.5);
		}

		start +=transformSize-m +1;
	}
	fftw_free(textSubString);
	fftw_free(DFTofPattern);
	fftw_free(DFTofText);
	fftw_free(pTimesT);
	fftw_free(pTimesT_PR);
}
示例#2
0
I8 mzi_fft(U32 X, U32 Y, U32 mzi_length, I8 hann_flag, I8 dB_flag,
           U32 *mzi_indexes, T1 *in, DBL *intensity, DBL *phase, DBL *Re,
           DBL *Im) {
  DBL *hann_win = static_cast<DBL *>(fftw_malloc(sizeof(DBL) * mzi_length));
  // create FFTW plan
  fftw_plan fft_p = fftw_plan_r2r_1d(mzi_length, Re, Im, FFTW_R2HC,
                                     FFTW_ESTIMATE);
  U32 width = static_cast<U32>(mzi_length/2);
  I32 j;
  // prepare Hanning window
  for (U32 i = 0; i < mzi_length; i++) {
    if (hann_flag)
      hann_win[i] = 0.5 * (1 - cos(kTwoPi * i / mzi_length));
    else
      hann_win[i] = 1.0;
  }
  // parallel run by A-lines
  #pragma omp parallel for default(shared) private(j)
  for (j = 0; j < static_cast<I32>(Y); j++) {
    DBL *tmp_fft_in = static_cast<DBL *>(fftw_malloc(sizeof(DBL) * mzi_length));
    DBL *tmp_fft_out =static_cast<DBL *>(fftw_malloc(sizeof(DBL) * mzi_length));
    I32 pos = j * width;
    // MZI filter: pick up RAW A-line values for corresponding MZI indexes
    for (U32 i = 0; i < mzi_length; i++)
      tmp_fft_in[i] = in[j * X + mzi_indexes[i]];
    // apply hanning window or just make data cast
    transform(tmp_fft_in, tmp_fft_in + mzi_length, hann_win, tmp_fft_in,
              multiplies<DBL>());
    // perform FFT using FFTW3 library
    fftw_execute_r2r(fft_p, tmp_fft_in, tmp_fft_out);
    // ZERO components
    if (dB_flag)
      intensity[pos] = 20 * log10(abs(tmp_fft_out[0]));
    else
      intensity[pos] = tmp_fft_out[0];
    Re[pos] = tmp_fft_out[0];
    phase[pos] = Im[pos] = 0.0;
    // construct intensity and phase information, Re and Im parts
    for (U32 pos1 = 1, pos2 = mzi_length - 1; pos1 < width; pos1++, pos2--) {
      // intensity
      if (dB_flag)
        intensity[pos + pos1] = 20 * log10(sqrt(tmp_fft_out[pos1] * \
                                tmp_fft_out[pos1] + tmp_fft_out[pos2] * \
                                tmp_fft_out[pos2]));
      else
        intensity[pos + pos1] = sqrt(tmp_fft_out[pos1] * tmp_fft_out[pos1] + \
                                tmp_fft_out[pos2] * tmp_fft_out[pos2]);
      // phase
      phase[pos + pos1] = atan2(tmp_fft_out[pos2], tmp_fft_out[pos1]);
      // Re part
      Re[pos + pos1] = tmp_fft_out[pos1];
      // Im part
      Im[pos + pos1] = tmp_fft_out[pos2];
    }
    fftw_free(tmp_fft_in);
    fftw_free(tmp_fft_out);
  }  // end of parallel code
  fftw_destroy_plan(fft_p);
  fftw_free(hann_win);
  return EXIT_SUCCESS;
}
示例#3
0
void SemiNaiveReduced(double *data, 
		      int bw, 
		      int m, 
		      double *result,
		      double *workspace,
		      double *cos_pml_table, 
		      double *weights,
		      fftw_plan *fplan )
{
  int i, j, n;
  double result0, result1, result2, result3;
  double fudge ;
  double d_bw;
  int toggle ;
  double *pml_ptr, *weighted_data, *cos_data ;

  n = 2*bw;
  d_bw = (double) bw;

  weighted_data = workspace ;
  cos_data = weighted_data + (2*bw) ;

  /* for paranoia, zero out the result array */
  memset( result, 0, sizeof(double)*(bw-m));
 
  /*
    need to apply quadrature weights to the data and compute
    the cosine transform
  */
  if ( m % 2 )
    for ( i = 0; i < n    ; ++i )
      weighted_data[i] = data[ i ] * weights[ 2*bw + i ];
  else
    for ( i = 0; i < n    ; ++i )
      weighted_data[i] = data[ i ] * weights[ i ];

  /*
    smooth the weighted signal
  */

  fftw_execute_r2r( *fplan,
		    weighted_data,
		    cos_data );

  /* need to normalize */
  cos_data[0] *= 0.707106781186547 ;
  fudge = 1./sqrt(2. * ((double) n ) );
  for ( j = 0 ; j < n ; j ++ )
    cos_data[j] *= fudge ;

  /*
    do the projections; Note that the cos_pml_table has
    had all the zeroes stripped out so the indexing is
    complicated somewhat
  */
  

  /******** this is the original loop

  toggle = 0 ;
  for (i=m; i<bw; i++)
  {
  pml_ptr = cos_pml_table + NewTableOffset(m,i);

  if ((m % 2) == 0)
  {
  for (j=0; j<(i/2)+1; j++)
  result[i-m] += cos_data[(2*j)+toggle] * pml_ptr[j];
  }
  else
  {
  if (((i-m) % 2) == 0)
  {
  for (j=0; j<(i/2)+1; j++)
  result[i-m] += cos_data[(2*j)+toggle] * pml_ptr[j];
  }
  else
  {
  for (j=0; j<(i/2); j++)
  result[i-m] += cos_data[(2*j)+toggle] * pml_ptr[j];
  }
  } 
      
  toggle = (toggle+1) % 2;
  }

  *****/
 
  /******** this is the new loop *********/
  toggle = 0 ;
  for ( i=m; i<bw; i++ )
    {
      pml_ptr = cos_pml_table + NewTableOffset(m,i);

      result0 = 0.0 ; result1 = 0.0 ;
      result2 = 0.0 ; result3 = 0.0 ; 

      for ( j = 0 ; j < ( (i/2) % 4 ) ; ++j )
	result0 += cos_data[(2*j)+toggle] * pml_ptr[j];

      for ( ; j < (i/2) ; j += 4 )
	{
	  result0 += cos_data[(2*j)+toggle] * pml_ptr[j];
	  result1 += cos_data[(2*(j+1))+toggle] * pml_ptr[j+1];
	  result2 += cos_data[(2*(j+2))+toggle] * pml_ptr[j+2];
	  result3 += cos_data[(2*(j+3))+toggle] * pml_ptr[j+3];
	}

      if ((((i-m) % 2) == 0 ) || ( (m % 2) == 0 ))
	result0 += cos_data[(2*(i/2))+toggle] * pml_ptr[(i/2)];

      result[i-m] = result0 + result1 + result2 + result3 ;
	  
      toggle = (toggle + 1)%2 ;
	  
    }
}
示例#4
0
/* easy spline interpolation + FFT main function
  PURPOSE:
    calculate FFT (using fftw_plan_r2r_1d() function call from FFTW library) for
    RAW B-scan [1] converted into linear wavenumber space (k-space) from linear
    wavelength space using spline interpolation [2].
    NOTE! For spectral domain optical coherence tomography (SD-OCT) the spectrum
    can be less RAW A-line, thus, define the range to select only spectrum part
    of each RAW A-line within start_index and end_index parameters.
  
  INPUTS:
    X - number of elements in each row (RAW A-line size)
    Y - number of rows (# of RAW A-lines)
    start_index - first index for spectrum (left RAW A-line cut-off)
    end_index - last index for spectrum (right RAW A-line cut-off)
    start_wavelength - start of wavelength range for laser
    end_wavelength - end of wavelength range for laser
    hann_flag - flag for Hanning window [3]
    dB_flag - flag for scale: linear or dB (20log())
    in - pointer to buffer with RAW B-scan before FFT (size: X * Y)
  
  OUTPUTS:
    intensity - pointer to buffer contained intensities, structural B-scan
    (size: ((end_index - start_index) / 2) * Y)
    phase - pointer to buffer contained phases, phase B-scan
    (size: ((end_index - start_index) / 2) * Y)
    Re - pointer to buffer contained real part of FFT
    (size: ((end_index - start_index) / 2) * Y)
    Im - pointer to buffer contained imaginary part of FFT
    (size: ((end_index - start_index) / 2) * Y)
  
  REMARKS:
    this function is experimental to test the spline interpolation + FFT for
    spectrum linear in wavelengths. Use spline_FFT.cpp file to perform spline
    interpolation + FFT for any kind of spectrum.
  
  REFERENCES:
    [1] http://www.fftw.org/fftw3_doc/Real_002dto_002dReal-Transforms.html
    [2] http://en.wikipedia.org/wiki/Spline_interpolation#Cubic_spline_interpolation
    [3] http://en.wikipedia.org/wiki/Hann_function 
*/
I8 OL_easy_spline_fft(U32 X, U32 Y, U32 start_index, U32 end_index,
                      DBL start_wavelength, DBL end_wavelength, I8 hann_flag,
                      I8 dB_flag, DBL *in, DBL *intensity, DBL *phase, DBL *Re,
                      DBL *Im) {
  U32 size = end_index - start_index;
  DBL *hann_win = static_cast<DBL *>(fftw_malloc(sizeof(DBL) * size));
  DBL *XX = static_cast<DBL *>(fftw_malloc(sizeof(DBL) * size));
  DBL *XXX = static_cast<DBL *>(fftw_malloc(sizeof(DBL) * size));
  // create FFTW plan
  fftw_plan fft_p = fftw_plan_r2r_1d(size, Re, Im, FFTW_R2HC, FFTW_ESTIMATE);
  U32 width = static_cast<U32>(size/2);
  // linear wavelength step
  DBL wavelength_step = (end_wavelength - start_wavelength) / (size - 1);
  DBL start_wavenumber = 1 / start_wavelength;
  // linear wavenumber step
  DBL wavenumber_step = (start_wavenumber - (1 / end_wavelength)) / (size - 1);
  I32 j;
  // prepare Hanning window and linear and non-linear wavelength vectors
  for (U32 i = 0; i < size; i++) {
    if (hann_flag)
      hann_win[i] = 0.5 * (1 - cos(kTwoPi * i / size));
    else
      hann_win[i] = 1.0;
    // linear in wavelength
    XX[i] = wavelength_step * i + start_wavelength;
    // non-linear in wavelength
    XXX[i] = 1 /(start_wavenumber - wavenumber_step * i);
  }
  // parallel run by A-lines
  #pragma omp parallel for default(shared) private(j)
  for (j = 0; j < static_cast<I32>(Y); j++) {
    DBL *tmp_fft_in = static_cast<DBL *>(fftw_malloc(sizeof(DBL) * size));
    DBL *tmp_fft_out = static_cast<DBL *>(fftw_malloc(sizeof(DBL) * size));
    DBL *b = static_cast<DBL *>(fftw_malloc(sizeof(DBL) * size));
    DBL *c = static_cast<DBL *>(fftw_malloc(sizeof(DBL) * size));
    DBL *d = static_cast<DBL *>(fftw_malloc(sizeof(DBL) * size));
    U32 pos = j * width;
    // spline interpolation
    copy(in + j * X + start_index, in + j * X + end_index, tmp_fft_out);
    cubic_nak(size, XX, tmp_fft_out, b, c, d);
    // apply hanning window or just get values
    for (U32 i = 0; i < size; i++) tmp_fft_in[i] =
      hann_win[i] * spline_eval(size, XX, tmp_fft_out, b, c, d, XXX[i]);
    // perform FFT using FFTW3 library
    fftw_execute_r2r(fft_p, tmp_fft_in, tmp_fft_out);
    // ZERO components
    if (dB_flag)
      intensity[pos] = 20 * log10(abs(tmp_fft_out[0]));
    else
      intensity[pos] = tmp_fft_out[0];
    Re[pos] = tmp_fft_out[0];
    phase[pos] = Im[pos] = 0.0;
    // construct intensity and phase information, Re and Im parts
    for (U32 pos1 = 1, pos2 = size - 1; pos1 < width; pos1++, pos2--) {
      // intensity
      if (dB_flag)
        intensity[pos + pos1] = 20 * log10(sqrt(tmp_fft_out[pos1] * \
                                tmp_fft_out[pos1] + tmp_fft_out[pos2] * \
                                tmp_fft_out[pos2]));
      else
        intensity[pos + pos1] = sqrt(tmp_fft_out[pos1] * tmp_fft_out[pos1] + \
                                tmp_fft_out[pos2] * tmp_fft_out[pos2]);
      // phase
      phase[pos + pos1] = atan2(tmp_fft_out[pos2], tmp_fft_out[pos1]);
      // Re part
      Re[pos + pos1] = tmp_fft_out[pos1];
      // Im part
      Im[pos + pos1] = tmp_fft_out[pos2];
    }
    fftw_free(tmp_fft_in);
    fftw_free(tmp_fft_out);
    fftw_free(b);
    fftw_free(c);
    fftw_free(d);
  }  // end of parallel code
  fftw_destroy_plan(fft_p);
  fftw_free(hann_win);
  fftw_free(XX);
  fftw_free(XXX);
  return EXIT_SUCCESS;
}
示例#5
0
void InvSemiNaiveReduced(double *coeffs,
			 int bw, 
			 int m, 
			 double *result, 
			 double *trans_cos_pml_table, 
			 double *sin_values,
			 double *workspace,
			 fftw_plan *fplan )
{
  double *trans_tableptr;
  double *assoc_offset;
  int i, j, rowsize;
  double *p;
  double *fcos, fcos0, fcos1, fcos2, fcos3;
  double fudge ;

  fcos = workspace ;

  /* for paranoia, zero out arrays */
  memset( fcos, 0, sizeof(double) * 2 * bw );
  memset( result, 0, sizeof(double) * 2 * bw );

  trans_tableptr = trans_cos_pml_table;
  p = trans_cos_pml_table;

  /* main loop - compute each value of fcos

  Note that all zeroes have been stripped out of the
  trans_cos_pml_table, so indexing is somewhat complicated.
  */

  for (i=0; i<bw; i++)
    {
      if (i == (bw-1))
	{
	  if ( m % 2 )
	    {
	      fcos[bw-1] = 0.0;
	      break;
	    }
	}

      rowsize = Transpose_RowSize(i, m, bw);
      if (i > m)
	assoc_offset = coeffs + (i - m) + (m % 2);
      else
	assoc_offset = coeffs + (i % 2);

      fcos0 = 0.0 ; fcos1 = 0.0; fcos2 = 0.0; fcos3 = 0.0;
	  
      for (j = 0; j < rowsize % 4; ++j)
	fcos0 += assoc_offset[2*j] * trans_tableptr[j];
	  
      for ( ; j < rowsize; j += 4){
	fcos0 += assoc_offset[2*j] * trans_tableptr[j];
	fcos1 += assoc_offset[2*(j+1)] * trans_tableptr[j+1];
	fcos2 += assoc_offset[2*(j+2)] * trans_tableptr[j+2];
	fcos3 += assoc_offset[2*(j+3)] * trans_tableptr[j+3];
      }
      fcos[i] = fcos0 + fcos1 + fcos2 + fcos3 ;

      trans_tableptr += rowsize;
    }
    

  /*
    now we have the cosine series for the result,
    so now evaluate the cosine series at 2*bw Chebyshev nodes 
  */

  /* scale coefficients prior to taking inverse DCT */
  fudge = 0.5 / sqrt((double) bw) ;
  for ( j = 1 ; j < 2*bw ; j ++ )
    fcos[j] *= fudge ;
  fcos[0] /= sqrt(2. * ((double) bw));

  /* now take the inverse dct */
  /* NOTE that I am using the guru interface */
  fftw_execute_r2r( *fplan,
		    fcos, result );

  /* if m is odd, then need to multiply by sin(x) at Chebyshev nodes */
  if ( m % 2 )
    {
      for (j=0; j<(2*bw); j++)
	result[j] *= sin_values[j];
    }

  trans_tableptr = p;

  /* amscray */

}