Example #1
0
void main(int argc, char *argv[], char *envp[])
{
	fftw_complex *in, *out;
	fftw_plan p;
	size_t N = 1024;
	double _FS, FS = 1e6;	// 1MHz sample rate
	double WIN = 0.0;	// 0% window overlap
	double nadd,nmul,nfma,nflops,ntotal,factor;
	char *units, *fs_units;
	bool EST = false;
	char *endp = NULL;
    cpuid_info_t cpu;
    const char *_mp = "";

	if(argc>1 && ( *argv[1]=='?' || *argv[1]=='-' ) ) usage(argv[0]);

	errno = 0;
	if(argc>1) N   = strtoul(argv[1],&endp,0); if(errno) perror("N"); errno = 0;
	     if(endp && tolower(*endp)=='k') N *= 1024;
	else if(endp && tolower(*endp)=='m') N *= 1024*1024;
	if(argc>2) FS  = strtod(argv[2],&endp); if(errno) perror("FS(Hz)"); errno = 0;
	     if(endp && *endp=='k') FS *= 1000.0;	// common use would be to qualify with a 2nd character
	else if(endp && *endp=='M') FS *= 1000.0*1000.0;
	else if(endp && *endp=='G') FS *= 1000.0*1000.0*1000.0;
	else if(endp && *endp=='T') FS *= 1000.0*1000.0*1000.0*1000.0;  // I'm dreaming of the day...
	else if(endp && *endp=='m') FS /= 1000.0;	// ok, kind of silly
	if(argc>3) WIN = strtod(argv[3],NULL); if(errno) perror("WINDOW(%)"); errno = 0;
	if(argc>4) EST = atoi(argv[4])?true:false;

    cpuid_get_info( &cpu );
    _FS = FS;

    // http://www.fftw.org/fftw3_doc/Usage-of-Multi_002dthreaded-FFTW.html#Usage-of-Multi_002dthreaded-FFTW
#if defined(_OPENMP) // || defined(_POSIX_THREADS)
    if( cpu.threads > 1 )
    {
        fftw_init_threads();
        fftw_plan_with_nthreads(cpu.threads);
        _mp = "-omp";
    }
#endif

	printf("FFT("__SIZE_T_SPECIFIER", %s) :\n",N,EST?"estimated":"measured");
	in = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * N);
	out = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * N);
	p = fftw_plan_dft_1d(N, in, out, FFTW_FORWARD, EST?FFTW_ESTIMATE:FFTW_MEASURE);
	fftw_flops(p,&nadd,&nmul,&nfma);
	fftw_print_plan(p);
	nflops = ntotal = nadd+nmul+((cpu.exts.XOP||cpu.exts.FMA3||cpu.exts.FMA4)?nfma:2*nfma);
	
	printf("\nFLOPS: add=%.0f mul=%.0f fma=%.0f total=%.0f Flops/frame\n",
		nadd, nmul, nfma, ntotal);
	factor  = (1.0 + ( 1.0 / ( 1.0 - WIN ))) / 2.0 ;	// additional FFTs required due to overlap
	ntotal *= (FS * factor) / N;						// FFTs = FS/N times the factor due to overlap
	     if( ntotal > 5e17) { ntotal /= 1e18;units = "ExaFlops"; }
	else if( ntotal > 5e14) { ntotal /= 1e15;units = "PFlops"; }
	else if( ntotal > 5e11) { ntotal /= 1e12;units = "TFlops"; }
	else if( ntotal > 5e8 ) { ntotal /= 1e9; units = "GFlops"; }
	else if( ntotal > 5e5 ) { ntotal /= 1e6; units = "MFlops"; }
	else if( ntotal > 5e2 ) { ntotal /= 1e3; units = "KFlops"; }
    else                    {                units = "Flops";  }
	     if( FS > 5e11) { FS /= 1e12;fs_units = "THz"; }
	else if( FS > 5e8 ) { FS /= 1e9; fs_units = "GHz"; }
	else if( FS > 5e5 ) { FS /= 1e6; fs_units = "MHz"; }
	else if( FS > 5e2 ) { FS /= 1e3; fs_units = "KHz"; }
	else                {            fs_units = "Hz";  }
	printf("FS=%.2f%s, %.2f%% overlap, %.2f %s (%s method)\n",
		FS, fs_units, WIN*100.0, ntotal, units, EST?"by estimate":"by measure");

#ifndef FFTW_DLL
    // TODO: doesn't work for MSVC build
    printf("FFTw Version = %s%s\n", fftw_version, _mp);
#else
    printf("FFTw Version = TBD%s\n", _mp);
#endif
    printf("Current CPU = %s\n", cpu.name.str);
    printf("CPU Threads = %d\n", cpu.threads);

	// TODO: actually compute some representative FFTs, timing them and
	//       extrapolate the performance on *THIS* machine as configured
	if( !EST )
	{
		clock_t start, stop;
		double  elapsed, fps;
		int ii, ffts = 1 * (int)((_FS * factor) / N);

		//printf("_FS=%g, factor=%g, N=%zu, ffts=%d\n", _FS, factor, N, ffts);

		// the total amount of work necessary to go through 1sec of input data...
		start = clock();
		for(ii=0;ii<ffts;ii++) fftw_execute(p); /* repeat as needed */
		stop = clock();

		elapsed = ((double)stop - (double)start) / (double)CLOCKS_PER_SEC;

		//printf("stop=%g, start=%g, elapsed=%.3f sec\n", (double)stop, (double)start, elapsed);

		fps = nflops * (double)ffts/elapsed;
		     if( fps > 5e17) { fps /= 1e18;units = "ExaFlops"; }
		else if( fps > 5e14) { fps /= 1e15;units = "PFlops"; }
		else if( fps > 5e11) { fps /= 1e12;units = "TFlops"; }
		else if( fps > 5e8 ) { fps /= 1e9; units = "GFlops"; }
		else if( fps > 5e5 ) { fps /= 1e6; units = "MFlops"; }
		else if( fps > 5e2 ) { fps /= 1e3; units = "KFlops"; }
		else                 {             units = "Flops";  }

		printf("%d FFTs in %.3f sec (%.2f %s)\n", ffts, elapsed, fps, units);
		if( elapsed > 1.1 ) printf("*** this CPU/configuration will not meet your specification ***\n");
		else if( elapsed > 0.9 ) printf("*** this configuration is close to full utilization on this CPU ***\n");
	}

	fftw_destroy_plan(p);
	fftw_free(in); fftw_free(out);
#if defined(_OPENMP) // || defined(_POSIX_THREADS)
	if( cpu.threads > 1 ) fftw_cleanup_threads();
#endif
}
Example #2
0
void main(int argc, char *argv[], char *envp[])
{
	fftw_complex *in, *out;
	fftw_plan p;
	size_t N = 1024;
	double FS = 1e6;	// 1MHz sample rate
	double WIN = 0.0;	// 0% window overlap
	double nadd,nmul,nfma,ntotal,factor;
	char *units, *fs_units;
	bool EST = false;
	char *endp = NULL;
    cpuid_info_t cpu;

	if(argc>1 && ( *argv[1]=='?' || *argv[1]=='-' ) ) usage(argv[0]);

	errno = 0;
	if(argc>1) N   = strtoul(argv[1],&endp,0); if(errno) perror("N"); errno = 0;
	     if(endp && tolower(*endp)=='k') N *= 1024;
	else if(endp && tolower(*endp)=='m') N *= 1024*1024;
	if(argc>2) FS  = strtod(argv[2],&endp); if(errno) perror("FS(Hz)"); errno = 0;
	     if(endp && *endp=='k') FS *= 1000.0;	// common use would be to qualify with a 2nd character
	else if(endp && *endp=='M') FS *= 1000.0*1000.0;
	else if(endp && *endp=='G') FS *= 1000.0*1000.0*1000.0;
	else if(endp && *endp=='T') FS *= 1000.0*1000.0*1000.0*1000.0;  // I'm dreaming of the day...
	else if(endp && *endp=='m') FS /= 1000.0;	// ok, kind of silly
	if(argc>3) WIN = strtod(argv[3],NULL); if(errno) perror("WINDOW(%)"); errno = 0;
	if(argc>4) EST = atoi(argv[4])?true:false;

    cpuid_get_info( &cpu );

	printf("FFT(%u,%s):\n",N,EST?"estimated":"measured");
	in = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * N);
	out = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * N);
	p = fftw_plan_dft_1d(N, in, out, FFTW_FORWARD, EST?FFTW_ESTIMATE:FFTW_MEASURE);
	fftw_flops(p,&nadd,&nmul,&nfma);
	fftw_print_plan(p);
	ntotal = nadd+nmul+((cpu.exts.XOP||cpu.exts.FMA3||cpu.exts.FMA4)?nfma:2*nfma);
	
	printf("\nFLOPS: add=%.0f mul=%.0f fma=%.0f total=%.0f Flops/frame\n",
		nadd, nmul, nfma, ntotal);
	factor  = (1.0 + ( 1.0 / ( 1.0 - WIN ))) / 2.0 ;	// additional FFTs required due to overlap
	ntotal *= (FS * factor) / N;						// FFTs = FS/N times the factor due to overlap
	     if( ntotal > 5e17) { ntotal /= 1e18;units = "ExaFlops"; }
	else if( ntotal > 5e14) { ntotal /= 1e15;units = "PFlops"; }
	else if( ntotal > 5e11) { ntotal /= 1e12;units = "TFlops"; }
	else if( ntotal > 5e8 ) { ntotal /= 1e9; units = "GFlops"; }
	else if( ntotal > 5e5 ) { ntotal /= 1e6; units = "MFlops"; }
	else if( ntotal > 5e2 ) { ntotal /= 1e3; units = "KFlops"; }
    else                    {                units = "Flops";  }
	     if( FS > 5e11) { FS /= 1e12;fs_units = "THz"; }
	else if( FS > 5e8 ) { FS /= 1e9; fs_units = "GHz"; }
	else if( FS > 5e5 ) { FS /= 1e6; fs_units = "MHz"; }
	else if( FS > 5e2 ) { FS /= 1e3; fs_units = "KHz"; }
	else                {            fs_units = "Hz";  }
	printf("FS=%.2f%s, %.2f%% overlap, %.2f %s (%s method)\n",
		FS, fs_units, WIN*100.0, ntotal, units, EST?"by estimate":"by measure");

    printf("Current CPU = %s\n", cpu.name.str);
    printf("CPU Threads = %d\n", cpu.threads);

	// TODO: actually compute some representative FFTs, timing them and
	//       extrapolate the performance on *THIS* machine as configured
	//fftw_execute(p); /* repeat as needed */

	fftw_destroy_plan(p);
	fftw_free(in); fftw_free(out);
}