Beispiel #1
0
void test_speed_aux(int n, fftw_direction dir, int flags, int specific)
{
     fftw_complex *in, *out;
     fftw_plan plan;
     double t;
     fftw_time begin, end;

     in = (fftw_complex *) fftw_malloc(n * howmany_fields
				       * sizeof(fftw_complex));
     out = (fftw_complex *) fftw_malloc(n * howmany_fields
					* sizeof(fftw_complex));

     if (specific) {
	  begin = fftw_get_time();
	  plan = fftw_create_plan_specific(n, dir,
					   speed_flag | flags 
					   | wisdom_flag | no_vector_flag,
					   in, howmany_fields,
					   out, howmany_fields);
	  end = fftw_get_time();
     } else {
	  begin = fftw_get_time();
	  plan = fftw_create_plan(n, dir, speed_flag | flags 
				  | wisdom_flag | no_vector_flag);
	  end = fftw_get_time();
     }
     CHECK(plan != NULL, "can't create plan");

     t = fftw_time_to_sec(fftw_time_diff(end, begin));
     WHEN_VERBOSE(2, printf("time for planner: %f s\n", t));

     WHEN_VERBOSE(2, fftw_print_plan(plan));

     if (paranoid && !(flags & FFTW_IN_PLACE)) {
	  begin = fftw_get_time();
	  test_ergun(n, dir, plan);
	  end = fftw_get_time();
	  t = fftw_time_to_sec(fftw_time_diff(end, begin));
	  WHEN_VERBOSE(2, printf("time for validation: %f s\n", t));
     }
     FFTW_TIME_FFT(fftw(plan, howmany_fields,
			in, howmany_fields, 1, out, howmany_fields, 1),
		   in, n * howmany_fields, t);

     fftw_destroy_plan(plan);

     WHEN_VERBOSE(1, printf("time for one fft: %s", smart_sprint_time(t)));
     WHEN_VERBOSE(1, printf(" (%s/point)\n", smart_sprint_time(t / n)));
     WHEN_VERBOSE(1, printf("\"mflops\" = 5 (n log2 n) / (t in microseconds)"
			    " = %f\n", howmany_fields * mflops(t, n)));

     fftw_free(in);
     fftw_free(out);

     WHEN_VERBOSE(1, printf("\n"));
}
Beispiel #2
0
void main(int argc, char *argv[], char *envp[])
{
	fftw_complex *in, *out;
	fftw_plan p;
	size_t N = 1024;
	double _FS, FS = 1e6;	// 1MHz sample rate
	double WIN = 0.0;	// 0% window overlap
	double nadd,nmul,nfma,nflops,ntotal,factor;
	char *units, *fs_units;
	bool EST = false;
	char *endp = NULL;
    cpuid_info_t cpu;
    const char *_mp = "";

	if(argc>1 && ( *argv[1]=='?' || *argv[1]=='-' ) ) usage(argv[0]);

	errno = 0;
	if(argc>1) N   = strtoul(argv[1],&endp,0); if(errno) perror("N"); errno = 0;
	     if(endp && tolower(*endp)=='k') N *= 1024;
	else if(endp && tolower(*endp)=='m') N *= 1024*1024;
	if(argc>2) FS  = strtod(argv[2],&endp); if(errno) perror("FS(Hz)"); errno = 0;
	     if(endp && *endp=='k') FS *= 1000.0;	// common use would be to qualify with a 2nd character
	else if(endp && *endp=='M') FS *= 1000.0*1000.0;
	else if(endp && *endp=='G') FS *= 1000.0*1000.0*1000.0;
	else if(endp && *endp=='T') FS *= 1000.0*1000.0*1000.0*1000.0;  // I'm dreaming of the day...
	else if(endp && *endp=='m') FS /= 1000.0;	// ok, kind of silly
	if(argc>3) WIN = strtod(argv[3],NULL); if(errno) perror("WINDOW(%)"); errno = 0;
	if(argc>4) EST = atoi(argv[4])?true:false;

    cpuid_get_info( &cpu );
    _FS = FS;

    // http://www.fftw.org/fftw3_doc/Usage-of-Multi_002dthreaded-FFTW.html#Usage-of-Multi_002dthreaded-FFTW
#if defined(_OPENMP) // || defined(_POSIX_THREADS)
    if( cpu.threads > 1 )
    {
        fftw_init_threads();
        fftw_plan_with_nthreads(cpu.threads);
        _mp = "-omp";
    }
#endif

	printf("FFT("__SIZE_T_SPECIFIER", %s) :\n",N,EST?"estimated":"measured");
	in = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * N);
	out = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * N);
	p = fftw_plan_dft_1d(N, in, out, FFTW_FORWARD, EST?FFTW_ESTIMATE:FFTW_MEASURE);
	fftw_flops(p,&nadd,&nmul,&nfma);
	fftw_print_plan(p);
	nflops = ntotal = nadd+nmul+((cpu.exts.XOP||cpu.exts.FMA3||cpu.exts.FMA4)?nfma:2*nfma);
	
	printf("\nFLOPS: add=%.0f mul=%.0f fma=%.0f total=%.0f Flops/frame\n",
		nadd, nmul, nfma, ntotal);
	factor  = (1.0 + ( 1.0 / ( 1.0 - WIN ))) / 2.0 ;	// additional FFTs required due to overlap
	ntotal *= (FS * factor) / N;						// FFTs = FS/N times the factor due to overlap
	     if( ntotal > 5e17) { ntotal /= 1e18;units = "ExaFlops"; }
	else if( ntotal > 5e14) { ntotal /= 1e15;units = "PFlops"; }
	else if( ntotal > 5e11) { ntotal /= 1e12;units = "TFlops"; }
	else if( ntotal > 5e8 ) { ntotal /= 1e9; units = "GFlops"; }
	else if( ntotal > 5e5 ) { ntotal /= 1e6; units = "MFlops"; }
	else if( ntotal > 5e2 ) { ntotal /= 1e3; units = "KFlops"; }
    else                    {                units = "Flops";  }
	     if( FS > 5e11) { FS /= 1e12;fs_units = "THz"; }
	else if( FS > 5e8 ) { FS /= 1e9; fs_units = "GHz"; }
	else if( FS > 5e5 ) { FS /= 1e6; fs_units = "MHz"; }
	else if( FS > 5e2 ) { FS /= 1e3; fs_units = "KHz"; }
	else                {            fs_units = "Hz";  }
	printf("FS=%.2f%s, %.2f%% overlap, %.2f %s (%s method)\n",
		FS, fs_units, WIN*100.0, ntotal, units, EST?"by estimate":"by measure");

#ifndef FFTW_DLL
    // TODO: doesn't work for MSVC build
    printf("FFTw Version = %s%s\n", fftw_version, _mp);
#else
    printf("FFTw Version = TBD%s\n", _mp);
#endif
    printf("Current CPU = %s\n", cpu.name.str);
    printf("CPU Threads = %d\n", cpu.threads);

	// TODO: actually compute some representative FFTs, timing them and
	//       extrapolate the performance on *THIS* machine as configured
	if( !EST )
	{
		clock_t start, stop;
		double  elapsed, fps;
		int ii, ffts = 1 * (int)((_FS * factor) / N);

		//printf("_FS=%g, factor=%g, N=%zu, ffts=%d\n", _FS, factor, N, ffts);

		// the total amount of work necessary to go through 1sec of input data...
		start = clock();
		for(ii=0;ii<ffts;ii++) fftw_execute(p); /* repeat as needed */
		stop = clock();

		elapsed = ((double)stop - (double)start) / (double)CLOCKS_PER_SEC;

		//printf("stop=%g, start=%g, elapsed=%.3f sec\n", (double)stop, (double)start, elapsed);

		fps = nflops * (double)ffts/elapsed;
		     if( fps > 5e17) { fps /= 1e18;units = "ExaFlops"; }
		else if( fps > 5e14) { fps /= 1e15;units = "PFlops"; }
		else if( fps > 5e11) { fps /= 1e12;units = "TFlops"; }
		else if( fps > 5e8 ) { fps /= 1e9; units = "GFlops"; }
		else if( fps > 5e5 ) { fps /= 1e6; units = "MFlops"; }
		else if( fps > 5e2 ) { fps /= 1e3; units = "KFlops"; }
		else                 {             units = "Flops";  }

		printf("%d FFTs in %.3f sec (%.2f %s)\n", ffts, elapsed, fps, units);
		if( elapsed > 1.1 ) printf("*** this CPU/configuration will not meet your specification ***\n");
		else if( elapsed > 0.9 ) printf("*** this configuration is close to full utilization on this CPU ***\n");
	}

	fftw_destroy_plan(p);
	fftw_free(in); fftw_free(out);
#if defined(_OPENMP) // || defined(_POSIX_THREADS)
	if( cpu.threads > 1 ) fftw_cleanup_threads();
#endif
}
Beispiel #3
0
/*--------------------------------------------------------------------------*/
void _fftwS (fftw_plan p)  /*print S==screen ;)*/
{
  fftw_print_plan(p);
  return;
}
Beispiel #4
0
void test_in_place(int n, int istride, int howmany, fftw_direction dir,
		   fftw_plan validated_plan, int specific)
{
     fftw_complex *in1, *in2, *out2;
     fftw_plan plan;
     int i, j;
     int flags = measure_flag | wisdom_flag | FFTW_IN_PLACE;

     if (coinflip())
	  flags |= FFTW_THREADSAFE;

     in1 = (fftw_complex *) fftw_malloc(istride * n * sizeof(fftw_complex) * howmany);
     in2 = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex) * howmany);
     out2 = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex) * howmany);

     if (!specific)
	  plan = fftw_create_plan(n, dir, flags);
     else
	  plan = fftw_create_plan_specific(n, dir, flags,
					   in1, istride,
					   (fftw_complex *) NULL, 0);

     /* generate random inputs */
     for (i = 0; i < n * howmany; ++i) {
	  c_re(in1[i * istride]) = c_re(in2[i]) = DRAND();
	  c_im(in1[i * istride]) = c_im(in2[i]) = DRAND();
     }

     /* 
      * fill in other positions of the array, to make sure that
      * fftw doesn't overwrite them 
      */
     for (j = 1; j < istride; ++j)
	  for (i = 0; i < n * howmany; ++i) {
	       c_re(in1[i * istride + j]) = i * istride + j;
	       c_im(in1[i * istride + j]) = i * istride - j;
	  }
     CHECK(plan != NULL, "can't create plan");
     WHEN_VERBOSE(2, fftw_print_plan(plan));

     /* fft-ize */
     if (howmany != 1 || istride != 1 || coinflip())
	  fftw(plan, howmany, in1, istride, n * istride,
	       (fftw_complex *) NULL, 0, 0);
     else
	  fftw_one(plan, in1, NULL);

     fftw_destroy_plan(plan);

     /* check for overwriting */
     for (j = 1; j < istride; ++j)
	  for (i = 0; i < n * howmany; ++i)
	       CHECK(c_re(in1[i * istride + j]) == i * istride + j &&
		     c_im(in1[i * istride + j]) == i * istride - j,
		     "input has been overwritten");

     for (i = 0; i < howmany; ++i) {
	  fftw(validated_plan, 1, in2 + n * i, 1, n, out2 + n * i, 1, n);
     }

     CHECK(compute_error_complex(in1, istride, out2, 1, n * howmany) < TOLERANCE,
	   "test_in_place: wrong answer");
     WHEN_VERBOSE(2, printf("OK\n"));

     fftw_free(in1);
     fftw_free(in2);
     fftw_free(out2);
}
Beispiel #5
0
static void fftw_plan_hook_function(fftw_plan p)
{
     WHEN_VERBOSE(3, printf("Validating tentative plan\n"));
     WHEN_VERBOSE(3, fftw_print_plan(p));
     test_ergun(p->n, p->dir, p);
}
Beispiel #6
0
void main(int argc, char *argv[], char *envp[])
{
	fftw_complex *in, *out;
	fftw_plan p;
	size_t N = 1024;
	double FS = 1e6;	// 1MHz sample rate
	double WIN = 0.0;	// 0% window overlap
	double nadd,nmul,nfma,ntotal,factor;
	char *units, *fs_units;
	bool EST = false;
	char *endp = NULL;
    cpuid_info_t cpu;

	if(argc>1 && ( *argv[1]=='?' || *argv[1]=='-' ) ) usage(argv[0]);

	errno = 0;
	if(argc>1) N   = strtoul(argv[1],&endp,0); if(errno) perror("N"); errno = 0;
	     if(endp && tolower(*endp)=='k') N *= 1024;
	else if(endp && tolower(*endp)=='m') N *= 1024*1024;
	if(argc>2) FS  = strtod(argv[2],&endp); if(errno) perror("FS(Hz)"); errno = 0;
	     if(endp && *endp=='k') FS *= 1000.0;	// common use would be to qualify with a 2nd character
	else if(endp && *endp=='M') FS *= 1000.0*1000.0;
	else if(endp && *endp=='G') FS *= 1000.0*1000.0*1000.0;
	else if(endp && *endp=='T') FS *= 1000.0*1000.0*1000.0*1000.0;  // I'm dreaming of the day...
	else if(endp && *endp=='m') FS /= 1000.0;	// ok, kind of silly
	if(argc>3) WIN = strtod(argv[3],NULL); if(errno) perror("WINDOW(%)"); errno = 0;
	if(argc>4) EST = atoi(argv[4])?true:false;

    cpuid_get_info( &cpu );

	printf("FFT(%u,%s):\n",N,EST?"estimated":"measured");
	in = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * N);
	out = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * N);
	p = fftw_plan_dft_1d(N, in, out, FFTW_FORWARD, EST?FFTW_ESTIMATE:FFTW_MEASURE);
	fftw_flops(p,&nadd,&nmul,&nfma);
	fftw_print_plan(p);
	ntotal = nadd+nmul+((cpu.exts.XOP||cpu.exts.FMA3||cpu.exts.FMA4)?nfma:2*nfma);
	
	printf("\nFLOPS: add=%.0f mul=%.0f fma=%.0f total=%.0f Flops/frame\n",
		nadd, nmul, nfma, ntotal);
	factor  = (1.0 + ( 1.0 / ( 1.0 - WIN ))) / 2.0 ;	// additional FFTs required due to overlap
	ntotal *= (FS * factor) / N;						// FFTs = FS/N times the factor due to overlap
	     if( ntotal > 5e17) { ntotal /= 1e18;units = "ExaFlops"; }
	else if( ntotal > 5e14) { ntotal /= 1e15;units = "PFlops"; }
	else if( ntotal > 5e11) { ntotal /= 1e12;units = "TFlops"; }
	else if( ntotal > 5e8 ) { ntotal /= 1e9; units = "GFlops"; }
	else if( ntotal > 5e5 ) { ntotal /= 1e6; units = "MFlops"; }
	else if( ntotal > 5e2 ) { ntotal /= 1e3; units = "KFlops"; }
    else                    {                units = "Flops";  }
	     if( FS > 5e11) { FS /= 1e12;fs_units = "THz"; }
	else if( FS > 5e8 ) { FS /= 1e9; fs_units = "GHz"; }
	else if( FS > 5e5 ) { FS /= 1e6; fs_units = "MHz"; }
	else if( FS > 5e2 ) { FS /= 1e3; fs_units = "KHz"; }
	else                {            fs_units = "Hz";  }
	printf("FS=%.2f%s, %.2f%% overlap, %.2f %s (%s method)\n",
		FS, fs_units, WIN*100.0, ntotal, units, EST?"by estimate":"by measure");

    printf("Current CPU = %s\n", cpu.name.str);
    printf("CPU Threads = %d\n", cpu.threads);

	// TODO: actually compute some representative FFTs, timing them and
	//       extrapolate the performance on *THIS* machine as configured
	//fftw_execute(p); /* repeat as needed */

	fftw_destroy_plan(p);
	fftw_free(in); fftw_free(out);
}
Beispiel #7
0
/**
 * This method configures the estimator with the supplied parameters.
 * NOTE: an estimator should be configured only once.
 */
void SiEstimator::Configure( 
    int fftSize, 
    int winType, 
    int fast,
    int mbins,
    int sbins
  )
{
    int idx;

    // Get rid of any previous work space
    Cleanup();

    // Setup the resources based on approach chosen
    if( fast ) mUseFftw = 1;
    else       mUseFftw = 0;

    // Establish the common vectors and parameters
    mNfftPoints = fftSize;
    mInOut      = (double*)malloc( mNfftPoints * sizeof(double) );
    mWin        = (double*)malloc( mNfftPoints * sizeof(double) );
    mTmp        = (double*)malloc( 2*sizeof(double)*mNfftPoints );

    // Establish the window function
    if( 0==winType ){
        for(idx=0;idx<mNfftPoints;idx++){
            mWin[idx] = 1.0;
        }
        mCoherentGain = 1.0;
    }
    else if( 1==winType ){
       double x,a0,a1,a2,sum;
       a0 = 0.42659;
       a1 = 0.49656;
       a2 = 0.076849;
       sum = 0.0;
       for(idx=0;idx<mNfftPoints;idx++){
           x = (TWOPI * idx)/(mNfftPoints-1);
           mWin[idx] = a0 - a1*cos(x) + a2*cos(2*x);
           sum+=mWin[idx];
       }
       mCoherentGain = sum / mNfftPoints;
    }

    // Establish the appropriate processing vectors depending on approach
    if( mUseFftw ){
        mFftwOutput = (fftw_complex*)fftw_malloc( 
                                        sizeof(fftw_complex) * mNfftPoints);
        mFftwPlan   = fftw_plan_dft_r2c_1d(mNfftPoints,mInOut,mFftwOutput,0);
        if( mLog & M2_LOG_PLAN ){ 
            fftw_print_plan( mFftwPlan );
            printf("\n");
        }
    }
    else{
        mNrInOut = (double*)malloc( 2*sizeof(double)*mNfftPoints );
    }

    mMeasureBins = mbins;
    mSumBins     = sbins;
    mBinAve      = 0;
    // printf("CFG: mMeasureBins=%d, mSumBins=%d\n",mMeasureBins,mSumBins);
}