Ejemplo n.º 1
0
//--------------------------------------------------------------------------------
SFString CSlurperApp::getFormatString(COptions& options, const SFString& which)
{
	if (which == "file")
		buildDisplayStrings(options);

	SFString errMsg;

	SFString formatName = "fmt_" + options.exportFormat + "_" + which;
	SFString ret = config.GetProfileStringGH("DISPLAY_STR", formatName, EMPTY);
	if (ret.Contains("file:"))
	{
		SFString file = ret.Substitute("file:",EMPTY);
		if (!SFos::fileExists(file))
			errMsg = SFString("Formatting file '") + file + "' for display string '" + formatName + "' not found. Quiting...\n";
		else
			ret = asciiFileToString(file);

	} else if (ret.Contains("fmt_")) // it's referring to another format string...
	{
		SFString newName = ret;
		ret = config.GetProfileStringGH("DISPLAY_STR", newName, EMPTY);
		formatName += ":"+newName;
	}
	ret = ret.Substitute("\\n","\n").Substitute("\\t","\t");

	// some sanity checks
	if (countOf('{',ret) != countOf('}',ret) ||
		countOf('[',ret) != countOf(']',ret))
	{
		errMsg = SFString("Mismatched brackets in display string '") + formatName + "': '" + ret + "'. Quiting...\n";

	} else if (ret.IsEmpty())
	{
		errMsg = SFString("Empty display string '") + formatName + "'. Quiting...\n";
	}

	if (!errMsg.IsEmpty())
	{
		outErr << errMsg;
		exit(0);
	}

	return ret;
}
Ejemplo n.º 2
0
bool str::isFloat() const
{
    const int ourLen = getLen();
    bool strIsAFloat = countOf(".") <= 1;

    if(strIsAFloat)
    {
        for(int i = mpStr[0] == '-' ? 1 : 0; i < ourLen; i++)
        {
            const char thisChar = mpStr[i];
            if(thisChar != '.' && ! isdigit(thisChar))
            {
                strIsAFloat = false;
                break;
            }
        }
    }

    return strIsAFloat;
}
Ejemplo n.º 3
0
void evaluateAllCells(){
    int remainingFormulas;
    do{
        int i,j;
        for(i=0;i<Height;i++){
            for(j=0;j<Width;j++){
                if(  *(*(cellsTypes+i)+j) == 'f' ){
                    int evaluatedValue;
                    int success = evaluate((*(inputTable+i))+j,&evaluatedValue);
                    if(success!=1)
                        continue;
                    *(*(evaluated+i)+j)=evaluatedValue;
                    *(*(cellsTypes+i)+j)='e';
                  //  printf("\n[%d][%d]=%d\n",i,j,evaluatedValue);
                }
            }
        }
        remainingFormulas=countOf(cellsTypes,'f');

    }while(remainingFormulas>0);
}
Ejemplo n.º 4
0
std::basic_string<TCHAR> commatize (T number)
{
	static TCHAR scratch [8*sizeof(T)];

	register TCHAR * ptr = scratch + countOf( scratch );
	*(--ptr) = 0;

	for (int digits = 3; ; )
	{
		*(--ptr) = '0' + int (number % 10);
		number /= 10;
		if (0 == number)
			break;
		if (--digits <= 0)
		{
			*(--ptr) = ',';
			digits = 3;
		}
	}

	return std::basic_string<TCHAR> (ptr);
}
Ejemplo n.º 5
0
int main() {
    Solution s;
    { 
        int A[] = {0};
        s.sortColors(A, 0);
        printResult(A, 0);
    }
    {
        int A[] = { 0 };
        s.sortColors(A, countOf(A));
        printResult(A, countOf(A));
    }
    {
        int A[] = { 0, 1 };
        s.sortColors(A, countOf(A));
        printResult(A, countOf(A));
    }
    {
        int A[] = { 0, 2 };
        s.sortColors(A, countOf(A));
        printResult(A, countOf(A));
    }
    {
        int A[] = { 1, 2 };
        s.sortColors(A, countOf(A));
        printResult(A, countOf(A));
    }
    {
        int A[] = { 0, 1, 2 };
        s.sortColors(A, countOf(A));
        printResult(A, countOf(A));
    }
    {
        int A[] = { 0, 1, 2 };
        s.sortColors(A, countOf(A));
        printResult(A, countOf(A));
    }
    {
        int A[] = { 0, 2, 2 };
        s.sortColors(A, countOf(A));
        printResult(A, countOf(A));
    }
    {
        int A[] = { 2, 0, 2 };
        s.sortColors(A, countOf(A));
        printResult(A, countOf(A));
    }
    {
        int A[] = { 2, 2, 0 };
        s.sortColors(A, countOf(A));
        printResult(A, countOf(A));
    }
    {
        int A[] = { 2, 1, 2, 0 };
        s.sortColors(A, countOf(A));
        printResult(A, countOf(A));
    }
    {
        int A[] = { 2, 1, 2, 0, 1 };
        s.sortColors(A, countOf(A));
        printResult(A, countOf(A));
    }
    {
        int A[] = { 2, 1, 2, 0, 1, 0, 2 };
        s.sortColors(A, countOf(A));
        printResult(A, countOf(A));
    }
    {
        int A[] = { 2, 0, 1, 1, 2, 1, 2, 1, 2, 0, 1, 0, 2 };
        s.sortColors(A, countOf(A));
        printResult(A, countOf(A));
    }
}
Ejemplo n.º 6
0
int main( int argc, char **argv )
{
	//	Define MEMORYREPORT on windows platfroms to enable debug memory heap checking
#if defined( MEMORYREPORT ) && defined( _WIN32 )
	TCHAR logPath[ MAX_PATH ];
	::GetCurrentDirectory( MAX_PATH, logPath );
	::_tcscat_s( logPath, _T( "\\MemoryReport.txt") );

	//	We leak the handle to this file, on purpose, so that the ::_CrtSetReportFile() can output it's memory
	//	statistics on app shutdown
	HANDLE hLogFile;
	hLogFile = ::CreateFile( logPath, GENERIC_WRITE,
		FILE_SHARE_READ|FILE_SHARE_WRITE, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL );

	::_CrtSetReportMode( _CRT_ASSERT, _CRTDBG_MODE_FILE | _CRTDBG_MODE_WNDW | _CRTDBG_MODE_DEBUG );
	::_CrtSetReportMode( _CRT_ERROR, _CRTDBG_MODE_FILE | _CRTDBG_MODE_WNDW | _CRTDBG_MODE_DEBUG );
	::_CrtSetReportMode( _CRT_WARN, _CRTDBG_MODE_FILE | _CRTDBG_MODE_DEBUG );

	::_CrtSetReportFile( _CRT_ASSERT, hLogFile );
	::_CrtSetReportFile( _CRT_ERROR, hLogFile );
	::_CrtSetReportFile( _CRT_WARN, hLogFile );

	int tmp = ::_CrtSetDbgFlag( _CRTDBG_REPORT_FLAG );
	tmp |= _CRTDBG_LEAK_CHECK_DF | _CRTDBG_ALLOC_MEM_DF | _CRTDBG_CHECK_ALWAYS_DF;
	::_CrtSetDbgFlag( tmp );

	//	By looking at the memory leak report that is generated by this debug heap, there is a number with
	//	{} brackets that indicates the incremental allocation number of that block.  If you wish to set
	//	a breakpoint on that allocation number, put it in the _CrtSetBreakAlloc() call below, and the heap
	//	will issue a bp on the request, allowing you to look at the call stack
	//	::_CrtSetBreakAlloc( 997 );

#endif /* MEMORYREPORT */

	// Declare the supported options.
	po::options_description desc( "clFFT Runtime Test command line options" );
	desc.add_options()
		( "help,h",				"produces this help message" )
		( "verbose,v",			"print out detailed information for the tests" )
		( "noVersion",     "Don't print version information from the clFFT library" )
		( "noInfoCL",      "Don't print information from the OpenCL runtime" )
		( "cpu,c",         "Run tests on a CPU device" )
		( "gpu,g",         "Run tests on a GPU device (default)" )
		( "pointwise,p",         "Do a pointwise comparison to determine test correctness (default: use root mean square)" )
		( "tolerance,t",        po::value< float >( &tolerance )->default_value( 0.001f ),   "tolerance level to use when determining test pass/fail" )
		( "numRandom,r",        po::value< size_t >( &number_of_random_tests )->default_value( 2000 ),   "number of random tests to run" )
		( "seed",        po::value< time_t >( &random_test_parameter_seed )->default_value( time(NULL)%1308000000 ),
						"seed to use for the random test. defaults to time(NULL)" )
						// modulo lops off the first few digits of the time value to make the seed easier to type
						// even without these digits, the seed value won't wrap around until 2036 or later
		( "short,s",         "Run radix 2 tests; no random testing" )
		( "medium,m",         "Run all radices; no random testing" )
		;

	//	Parse the command line options, ignore unrecognized options and collect them into a vector of strings
	po::variables_map vm;
	po::parsed_options parsed = po::command_line_parser( argc, argv ).options( desc ).allow_unregistered( ).run( );
	po::store( parsed, vm );
	po::notify( vm );
	std::vector< std::string > to_pass_further = po::collect_unrecognized( parsed.options, po::include_positional );

	std::cout << std::endl;

	size_t mutex = ((vm.count( "gpu" ) > 0) ? 1 : 0)
		| ((vm.count( "cpu" ) > 0) ? 2 : 0);
	if ((mutex & (mutex-1)) != 0) {
		terr << _T("You have selected mutually-exclusive OpenCL device options:") << std::endl;
		if (vm.count ( "cpu" )  > 0) terr << _T("    cpu, c	Run tests on a CPU device" ) << std::endl;
		if (vm.count ( "gpu" )  > 0) terr << _T("    gpu, g	Run tests on a GPU device" ) << std::endl;
		return 1;
	}

	if( vm.count( "cpu" ) )
	{
		device_type = CL_DEVICE_TYPE_CPU;
	}

	if( vm.count( "gpu" ) )
	{
		device_type	= CL_DEVICE_TYPE_GPU;
		device_gpu_list = ~0;
	}

	//	Print version by default
	if( !vm.count( "noVersion" ) )
	{
		const int indent = countOf( "clFFT client API version: " );
		tout << std::left << std::setw( indent ) << _T( "clFFT client API version: " )
			<< clfftVersionMajor << _T( "." )
			<< clfftVersionMinor << _T( "." )
			<< clfftVersionPatch << std::endl;

		cl_uint libMajor, libMinor, libPatch;
		clfftGetVersion( &libMajor, &libMinor, &libPatch );

		tout << std::left << std::setw( indent ) << _T( "clFFT runtime version: " )
			<< libMajor << _T( "." )
			<< libMinor << _T( "." )
			<< libPatch << std::endl << std::endl;
	}

	//	Print clInfo by default
	if( !vm.count( "noInfoCL" ) )
	{
		cl_context tempContext = NULL;
		cl_command_queue tempQueue = NULL;
		cl_event tempEvent = NULL;
		std::vector< cl_device_id > device_id = ::initializeCL( device_type, device_gpu_list, tempContext, true );
		::cleanupCL( &tempContext, &tempQueue, 0, NULL, 0, NULL, &tempEvent );
	}

	if( vm.count( "help" ) )
	{
		std::cout << desc << std::endl;
		return 0;
	}

	if( vm.count( "verbose" ) )
	{
		verbose = true;
	}
	else
	{
		verbose = false;
	}

	if( vm.count( "short" ) && vm.count( "medium" ) )
	{
		terr << _T("Options 'short' and 'medium' are mutually-exclusive.  Please select only one.") << std::endl;
		return 1;
	}

	//	Create a new argc,argv to pass to InitGoogleTest
	//	First parameter of course is the name of this program
	std::vector< const char* > myArgv;

	//	Push back a pointer to the executable name
	if( argc > 0 )
		myArgv.push_back( *argv );

	//	Push into our new argv vector any parameter the user passed, except to filter their gtest_filter expressions
	std::string userFilter;
	for( int i = 1; i < argc; ++i )
	{
		if( vm.count( "short" ) || vm.count( "medium" ) )
		{
			std::string tmpStr( argv[ i ] );
			std::string::size_type pos = tmpStr.find( "gtest_filter" );
			if( pos == std::string::npos )
			{
				myArgv.push_back( argv[ i ] );
			}
			else
			{
				//  Capture the users filter, but only the regexp portion
				userFilter = argv[ i ];
				userFilter.erase( 0, 15 );
			}
		}
		else
		{
			myArgv.push_back( argv[ i ] );
		}
	}

	std::string newFilter;
	if( vm.count( "short" ) )
	{
		newFilter += "--gtest_filter=*accuracy_test_pow2*";
		if( userFilter.size( ) )
		{
			newFilter += ":";
			newFilter += userFilter;
		}
		myArgv.push_back( newFilter.c_str( ) );
	}

	if( vm.count( "medium" ) )
	{
		newFilter += "--gtest_filter=";
		if( userFilter.size( ) )
		{
			newFilter += userFilter;
			newFilter += ":";
		}
		newFilter += "-*Random*";
		myArgv.push_back( newFilter.c_str( ) );
	}

	if( vm.count( "pointwise" ) )
	{
		comparison_type = pointwise_compare;
	}
	else
	{
		comparison_type = root_mean_square;
	}

	int myArgc	= static_cast< int >( myArgv.size( ) );

	std::cout << "Result comparison tolerance is " << tolerance << std::endl;

	::testing::InitGoogleTest( &myArgc, const_cast< char** >( &myArgv[ 0 ] ) );

	return RUN_ALL_TESTS();
}
Ejemplo n.º 7
0
int _tmain( int argc, _TCHAR* argv[] )
{
    size_t length = 0;
    size_t iDevice = 0;
    size_t numLoops = 0;
    bool defaultDevice = true;

    try
    {
        // Declare the supported options.
        po::options_description desc( "AMP Scan command line options" );
        desc.add_options()
        ( "help,h",			"produces this help message" )
        ( "version,v",		"Print queryable version information from the Bolt AMP library" )
        ( "ampInfo,i",		"Print queryable information of the AMP runtime" )
        ( "device,d",		po::value< size_t >( &iDevice ), "Choose specific AMP device, otherwise system default (AMP choose)" )
        ( "length,l",		po::value< size_t >( &length )->default_value( 4096 ), "Specify the length of scan array" )
        ( "profile,p",		po::value< size_t >( &numLoops )->default_value( 1 ), "Time and report Scan speed GB/s (default: profiling off)" )
        ;

        po::variables_map vm;
        po::store( po::parse_command_line( argc, argv, desc ), vm );
        po::notify( vm );

        if( vm.count( "version" ) )
        {
            //	TODO:  Query Bolt for its version information
            size_t libMajor, libMinor, libPatch;
            libMajor = 0;
            libMinor = 0;
            libPatch = 1;

            const int indent = countOf( "Bolt version: " );
            bolt::tout << std::left << std::setw( indent ) << _T( "Bolt version: " )
                       << libMajor << _T( "." )
                       << libMinor << _T( "." )
                       << libPatch << std::endl;
        }

        if( vm.count( "help" ) )
        {
            //	This needs to be 'cout' as program-options does not support wcout yet
            std::cout << desc << std::endl;
            return 0;
        }

        if( vm.count( "ampInfo" ) )
        {
            concurrency::accelerator default_acc;
            std::wcout << std::left;
            std::wcout << std::setw( colWidth ) << _T( "Default device: " ) << default_acc.description << std::endl;
            std::wcout << std::setw( colWidth ) << _T( "Default device path: " ) << default_acc.device_path << std::endl << std::endl;

            //std::for_each( allDevices.begin( ), allDevices.end( ), printAccelerator );
            std::vector< concurrency::accelerator > allDevices = concurrency::accelerator::get_all( );
            for( unsigned int i = 0; i < allDevices.size( ); ++i )
                printAccelerator( i, allDevices.at( i ) );

            return 0;
        }

        if( vm.count( "device" ) )
        {
            defaultDevice = false;
        }

    }
    catch( std::exception& e )
    {
        bolt::terr << _T( "Bolt AMP error reported:" ) << std::endl << e.what() << std::endl;
        return 1;
    }

//	bolt::control::getDefault( );
    std::vector< int > input( length, 1 );

    bolt::statTimer& myTimer = bolt::statTimer::getInstance( );
    myTimer.Reserve( 1, numLoops );

    size_t reduceId	= myTimer.getUniqueID( _T( "reduce" ), 0 );

    for( unsigned i = 0; i < numLoops; ++i )
    {
        myTimer.Start( reduceId );
        int res = bolt::amp::reduce( input.begin( ), input.end( ), 0 );
        myTimer.Stop( reduceId );
    }

    //	Remove all timings that are outside of 2 stddev (keep 65% of samples); we ignore outliers to get a more consistent result
    size_t pruned = myTimer.pruneOutliers( 1.0 );
    double scanTime = myTimer.getAverageTime( reduceId );
    double scanGB = ( input.size( ) * sizeof( int ) ) / (1024.0 * 1024.0 * 1024.0);

    bolt::tout << std::left;
    bolt::tout << std::setw( colWidth ) << _T( "Reduce profile: " ) << _T( "[" ) << numLoops-pruned << _T( "] samples" ) << std::endl;
    bolt::tout << std::setw( colWidth ) << _T( "    Size (GB): " ) << scanGB << std::endl;
    bolt::tout << std::setw( colWidth ) << _T( "    Time (s): " ) << scanTime << std::endl;
    bolt::tout << std::setw( colWidth ) << _T( "    Speed (GB/s): " ) << scanGB / scanTime << std::endl;
    bolt::tout << std::endl;

//	bolt::tout << myTimer;

    return 0;
}
Ejemplo n.º 8
0
int _tmain( int argc, _TCHAR* argv[] )
{
	//	This helps with mixing output of both wide and narrow characters to the screen
	std::ios::sync_with_stdio( false );

	//	Define MEMORYREPORT on windows platfroms to enable debug memory heap checking
#if defined( MEMORYREPORT ) && defined( _WIN32 )
	TCHAR logPath[ MAX_PATH ];
	::GetCurrentDirectory( MAX_PATH, logPath );
	::_tcscat_s( logPath, _T( "\\MemoryReport.txt") );

	//	We leak the handle to this file, on purpose, so that the ::_CrtSetReportFile() can output it's memory
	//	statistics on app shutdown
	HANDLE hLogFile;
	hLogFile = ::CreateFile( logPath, GENERIC_WRITE,
		FILE_SHARE_READ|FILE_SHARE_WRITE, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL );

	::_CrtSetReportMode( _CRT_ASSERT, _CRTDBG_MODE_FILE | _CRTDBG_MODE_WNDW | _CRTDBG_MODE_DEBUG );
	::_CrtSetReportMode( _CRT_ERROR, _CRTDBG_MODE_FILE | _CRTDBG_MODE_WNDW | _CRTDBG_MODE_DEBUG );
	::_CrtSetReportMode( _CRT_WARN, _CRTDBG_MODE_FILE | _CRTDBG_MODE_DEBUG );

	::_CrtSetReportFile( _CRT_ASSERT, hLogFile );
	::_CrtSetReportFile( _CRT_ERROR, hLogFile );
	::_CrtSetReportFile( _CRT_WARN, hLogFile );

	int tmp = ::_CrtSetDbgFlag( _CRTDBG_REPORT_FLAG );
	tmp |= _CRTDBG_LEAK_CHECK_DF | _CRTDBG_ALLOC_MEM_DF | _CRTDBG_CHECK_ALWAYS_DF;
	::_CrtSetDbgFlag( tmp );

	//	By looking at the memory leak report that is generated by this debug heap, there is a number with
	//	{} brackets that indicates the incremental allocation number of that block.  If you wish to set
	//	a breakpoint on that allocation number, put it in the _CrtSetBreakAlloc() call below, and the heap
	//	will issue a bp on the request, allowing you to look at the call stack
	//	::_CrtSetBreakAlloc( 1833 );

#endif /* MEMORYREPORT */

	//	OpenCL state 
	cl_device_type		deviceType	= CL_DEVICE_TYPE_ALL;
	cl_int				deviceId = 0;
	cl_int				platformId = 0;

	//	FFT state

	clfftResultLocation	place = CLFFT_INPLACE;
	clfftLayout	inLayout  = CLFFT_COMPLEX_INTERLEAVED;
	clfftLayout	outLayout = CLFFT_COMPLEX_INTERLEAVED;
	clfftPrecision precision = CLFFT_SINGLE;
	clfftDirection dir = CLFFT_FORWARD;
	size_t lengths[ 3 ] = {1,1,1};
	size_t iStrides[ 4 ] = {0,0,0,0};
	size_t oStrides[ 4 ] = {0,0,0,0};
	cl_uint profile_count = 0;

	cl_uint command_queue_flags = 0;
	size_t batchSize = 1;


	//	Initialize flags for FFT library
	std::auto_ptr< clfftSetupData > setupData( new clfftSetupData );
	OPENCL_V_THROW( clfftInitSetupData( setupData.get( ) ),
		"clfftInitSetupData failed" );

	try
	{
		// Declare the supported options.
		po::options_description desc( "clFFT client command line options" );
		desc.add_options()
			( "help,h",        "produces this help message" )
			( "version,v",     "Print queryable version information from the clFFT library" )
			( "clinfo,i",      "Print queryable information of all the OpenCL runtimes and devices" )
			( "printChosen",   "Print queryable information of the selected OpenCL runtime and device" )
			( "gpu,g",         "Force selection of OpenCL GPU devices only" )
			( "cpu,c",         "Force selection of OpenCL CPU devices only" )
			( "all,a",         "Force selection of all OpenCL devices (default)" )
			( "platform",      po::value< cl_int >( &platformId )->default_value( 0 ),   "Select a specific OpenCL platform id as it is reported by clinfo" )
			( "device",        po::value< cl_int >( &deviceId )->default_value( 0 ),   "Select a specific OpenCL device id as it is reported by clinfo" )
			( "outPlace,o",    "Out of place FFT transform (default: in place)" )
			( "double",		   "Double precision transform (default: single)" )
			( "inv",			"Backward transform (default: forward)" )
			( "dumpKernels,d", "FFT engine will dump generated OpenCL FFT kernels to disk (default: dump off)" )
			( "lenX,x",        po::value< size_t >( &lengths[ 0 ] )->default_value( 1024 ),   "Specify the length of the 1st dimension of a test array" )
			( "lenY,y",        po::value< size_t >( &lengths[ 1 ] )->default_value( 1 ),      "Specify the length of the 2nd dimension of a test array" )
			( "lenZ,z",        po::value< size_t >( &lengths[ 2 ] )->default_value( 1 ),      "Specify the length of the 3rd dimension of a test array" )
			( "isX",   po::value< size_t >( &iStrides[ 0 ] )->default_value( 1 ),						"Specify the input stride of the 1st dimension of a test array" )
			( "isY",   po::value< size_t >( &iStrides[ 1 ] )->default_value( 0 ),	"Specify the input stride of the 2nd dimension of a test array" )
			( "isZ",   po::value< size_t >( &iStrides[ 2 ] )->default_value( 0 ),	"Specify the input stride of the 3rd dimension of a test array" )
			( "iD", po::value< size_t >( &iStrides[ 3 ] )->default_value( 0 ), "input distance between subsequent sets of data when batch size > 1" )
			( "osX",   po::value< size_t >( &oStrides[ 0 ] )->default_value( 1 ),						"Specify the output stride of the 1st dimension of a test array" )
			( "osY",   po::value< size_t >( &oStrides[ 1 ] )->default_value( 0 ),	"Specify the output stride of the 2nd dimension of a test array" )
			( "osZ",   po::value< size_t >( &oStrides[ 2 ] )->default_value( 0 ),	"Specify the output stride of the 3rd dimension of a test array" )
			( "oD", po::value< size_t >( &oStrides[ 3 ] )->default_value( 0 ), "output distance between subsequent sets of data when batch size > 1" )
			( "batchSize,b",   po::value< size_t >( &batchSize )->default_value( 1 ), "If this value is greater than one, arrays will be used " )
			( "profile,p",     po::value< cl_uint >( &profile_count )->default_value( 1 ), "Time and report the kernel speed of the FFT (default: profiling off)" )
			( "inLayout",      po::value< clfftLayout >( &inLayout )->default_value( CLFFT_COMPLEX_INTERLEAVED ), "Layout of input data:\n1) interleaved\n2) planar\n3) hermitian interleaved\n4) hermitian planar\n5) real" )
			( "outLayout",     po::value< clfftLayout >( &outLayout )->default_value( CLFFT_COMPLEX_INTERLEAVED ), "Layout of input data:\n1) interleaved\n2) planar\n3) hermitian interleaved\n4) hermitian planar\n5) real" )
			;

		po::variables_map vm;
		po::store( po::parse_command_line( argc, argv, desc ), vm );
		po::notify( vm );

		if( vm.count( "version" ) )
		{
			const int indent = countOf( "clFFT client API version: " );
			tout << std::left << std::setw( indent ) << _T( "clFFT client API version: " )
				<< clfftVersionMajor << _T( "." )
				<< clfftVersionMinor << _T( "." )
				<< clfftVersionPatch << std::endl;

			cl_uint libMajor, libMinor, libPatch;
			clfftGetVersion( &libMajor, &libMinor, &libPatch );

			tout << std::left << std::setw( indent ) << _T( "clFFT runtime version: " )
				<< libMajor << _T( "." )
				<< libMinor << _T( "." )
				<< libPatch << std::endl << std::endl;
		}

		if( vm.count( "help" ) )
		{
			//	This needs to be 'cout' as program-options does not support wcout yet
			std::cout << desc << std::endl;
			return 0;
		}

		size_t mutex = ((vm.count( "gpu" ) > 0) ? 1 : 0)
			| ((vm.count( "cpu" ) > 0) ? 2 : 0)
			| ((vm.count( "all" ) > 0) ? 4 : 0);
		if ((mutex & (mutex-1)) != 0) {
			terr << _T("You have selected mutually-exclusive OpenCL device options:") << std::endl;
			if (vm.count ( "gpu" )  > 0) terr << _T("    gpu,g   Force selection of OpenCL GPU devices only" ) << std::endl;
			if (vm.count ( "cpu" )  > 0) terr << _T("    cpu,c   Force selection of OpenCL CPU devices only" ) << std::endl;
			if (vm.count ( "all" )  > 0) terr << _T("    all,a   Force selection of all OpenCL devices (default)" ) << std::endl;
			return 1;
		}

		if( vm.count( "gpu" ) )
		{
			deviceType	= CL_DEVICE_TYPE_GPU;
		}

		if( vm.count( "cpu" ) )
		{
			deviceType	= CL_DEVICE_TYPE_CPU;
		}

		if( vm.count( "all" ) )
		{
			deviceType	= CL_DEVICE_TYPE_ALL;
		}

		if( vm.count( "clinfo" ) )
		{
			std::vector< cl_platform_id > platformInfos;
			std::vector< std::vector< cl_device_id > > deviceInfos;
			discoverCLPlatforms( deviceType, platformInfos, deviceInfos );
			prettyPrintCLPlatforms(platformInfos, deviceInfos);
			return 0;
		}

		bool printInfo = false;
		if( vm.count( "printChosen" ) )
		{
			printInfo = true;
		}

		if( vm.count( "outPlace" ) )
		{
			place = CLFFT_OUTOFPLACE;
		}

		if( vm.count( "double" ) )
		{
			precision = CLFFT_DOUBLE;
		}

		if( vm.count( "inv" ) )
		{
			dir = CLFFT_BACKWARD;
		}

		if( profile_count > 1 )
		{
			command_queue_flags |= CL_QUEUE_PROFILING_ENABLE;
		}

		if( vm.count( "dumpKernels" ) )
		{
			setupData->debugFlags	|= CLFFT_DUMP_PROGRAMS;
		}

		int inL = (int)inLayout;
		int otL = (int)outLayout;

		// input output layout support matrix
		int ioLayoutSupport[5][5] =		{
										{ 1, 1, 0, 0, 1 },
										{ 1, 1, 0, 0, 1 },
										{ 0, 0, 0, 0, 1 },
										{ 0, 0, 0, 0, 1 },
										{ 1, 1, 1, 1, 0 },
										};

		if((inL < 1) || (inL > 5)) throw std::runtime_error( "Invalid Input layout format" );
		if((otL < 1) || (otL > 5)) throw std::runtime_error( "Invalid Output layout format" );

		if(ioLayoutSupport[inL-1][otL-1] == 0) throw std::runtime_error( "Invalid combination of Input/Output layout formats" );

		if( ((inL == 1) || (inL == 2)) && ((otL == 1) || (otL == 2)) ) // Complex-Complex cases
		{
			iStrides[1] = iStrides[1] ? iStrides[1] : lengths[0] * iStrides[0];
			iStrides[2] = iStrides[2] ? iStrides[2] : lengths[1] * iStrides[1];
			iStrides[3] = iStrides[3] ? iStrides[3] : lengths[2] * iStrides[2];



			if(place == CLFFT_INPLACE)
			{
				oStrides[0] = iStrides[0];
				oStrides[1] = iStrides[1];
				oStrides[2] = iStrides[2];
				oStrides[3] = iStrides[3];
			}
			else
			{
				oStrides[1] = oStrides[1] ? oStrides[1] : lengths[0] * oStrides[0];
				oStrides[2] = oStrides[2] ? oStrides[2] : lengths[1] * oStrides[1];
				oStrides[3] = oStrides[3] ? oStrides[3] : lengths[2] * oStrides[2];
			}
		}
		else // Real-Complex and Complex-Real cases
		{
			size_t *rst, *cst;
			size_t N = lengths[0];
			size_t Nt = 1 + lengths[0]/2;
			bool iflag = false;
			bool rcFull = (inL == 1) || (inL == 2) || (otL == 1) || (otL == 2);

			if(inLayout == CLFFT_REAL) { iflag = true; rst = iStrides; }
			else { rst = oStrides; } // either in or out should be REAL

			// Set either in or out strides whichever is real
			if(place == CLFFT_INPLACE)
			{
				if(rcFull)	{ rst[1] = rst[1] ? rst[1] :  N * 2 * rst[0]; }
				else		{ rst[1] = rst[1] ? rst[1] : Nt * 2 * rst[0]; }

				rst[2] = rst[2] ? rst[2] : lengths[1] * rst[1];
				rst[3] = rst[3] ? rst[3] : lengths[2] * rst[2];
			}
			else
			{
				rst[1] = rst[1] ? rst[1] : lengths[0] * rst[0];
				rst[2] = rst[2] ? rst[2] : lengths[1] * rst[1];
				rst[3] = rst[3] ? rst[3] : lengths[2] * rst[2];
			}

			// Set the remaining of in or out strides that is not real
			if(iflag) { cst = oStrides; }
			else	  { cst = iStrides; }

			if(rcFull)	{ cst[1] = cst[1] ? cst[1] :  N * cst[0]; }
			else		{ cst[1] = cst[1] ? cst[1] : Nt * cst[0]; }

			cst[2] = cst[2] ? cst[2] : lengths[1] * cst[1];
			cst[3] = cst[3] ? cst[3] : lengths[2] * cst[2];
		}

		if( precision == CLFFT_SINGLE )
			transform<float>( lengths, iStrides, oStrides, batchSize, inLayout, outLayout, place, precision, dir, deviceType, deviceId, platformId, printInfo, command_queue_flags, profile_count, setupData );
		else
			transform<double>( lengths, iStrides, oStrides, batchSize, inLayout, outLayout, place, precision, dir, deviceType, deviceId, platformId, printInfo, command_queue_flags, profile_count, setupData );
	}
	catch( std::exception& e )
	{
		terr << _T( "clFFT error condition reported:" ) << std::endl << e.what() << std::endl;
		return 1;
	}
	return 0;
}
Ejemplo n.º 9
0
int transform( size_t* lengths, const size_t *inStrides, const size_t *outStrides, size_t batch_size,
				clfftLayout in_layout, clfftLayout out_layout,
				clfftResultLocation place, clfftPrecision precision, clfftDirection dir,
				cl_device_type deviceType, cl_int deviceId, cl_int platformId, bool printInfo,
				cl_uint command_queue_flags, cl_uint profile_count,
				std::auto_ptr< clfftSetupData > setupData )
{
	//	Our command line does not specify what dimension FFT we wish to transform; we decode
	//	this from the lengths that the user specifies for X, Y, Z.  A length of one means that
	//	The user does not want that dimension.

	const size_t max_dimensions = 3;
	size_t strides[ 4 ];
	size_t o_strides[ 4 ];
	size_t fftVectorSize = 0;
	size_t fftVectorSizePadded = 0;
	size_t fftBatchSize = 0;
	size_t outfftVectorSize = 0;
	size_t outfftVectorSizePadded = 0;
	size_t outfftBatchSize = 0;
	size_t size_of_input_buffers_in_bytes = 0;
	size_t size_of_output_buffers_in_bytes = 0;
	cl_uint number_of_output_buffers = 0;
	clfftDim	dim = CLFFT_1D;
	cl_mem input_cl_mem_buffers [2] = { NULL, NULL };
	cl_mem output_cl_mem_buffers[2] = { NULL, NULL };
	std::vector< cl_device_id > device_id;
	cl_context context;
	cl_command_queue queue;
	cl_event outEvent = NULL;
	clfftPlanHandle plan_handle;

	for (unsigned u = 0; u < max_dimensions; ++u) {
		if (0 != lengths[u])
			continue;
		lengths[u] = 1;
	}

	if( lengths[ 1 ] > 1 )
	{
		dim	= CLFFT_2D;
	}
	if( lengths[ 2 ] > 1 )
	{
		dim	= CLFFT_3D;
	}

	strides[ 0 ] = inStrides[0];
	strides[ 1 ] = inStrides[1];
	strides[ 2 ] = inStrides[2];
	strides[ 3 ] = inStrides[3];

	o_strides[ 0 ] = outStrides[0];
	o_strides[ 1 ] = outStrides[1];
	o_strides[ 2 ] = outStrides[2];
	o_strides[ 3 ] = outStrides[3];

	fftVectorSize = lengths[0] * lengths[1] * lengths[2];
	fftVectorSizePadded = strides[3];
	fftBatchSize = fftVectorSizePadded * batch_size;

	size_t Nt = 1 + lengths[0]/2;

	if(place == CLFFT_INPLACE)
	{
		outfftVectorSize = fftVectorSize;
		outfftVectorSizePadded = fftVectorSizePadded;
		outfftBatchSize = fftBatchSize;
	}
	else
	{
		outfftVectorSize = lengths[0] * lengths[1] * lengths[2];
		outfftVectorSizePadded = o_strides[3];
		outfftBatchSize = outfftVectorSizePadded * batch_size;
	}


	// Real to complex case
	if( (in_layout == CLFFT_REAL) || (out_layout == CLFFT_REAL) )
	{
		fftVectorSizePadded = strides[3];
		fftBatchSize = fftVectorSizePadded * batch_size;

		outfftVectorSizePadded = o_strides[3];
		outfftBatchSize = outfftVectorSizePadded * batch_size;

		fftVectorSize = lengths[0] * lengths[1] * lengths[2];
		outfftVectorSize = fftVectorSize;

	}


	switch( out_layout )
	{
	case CLFFT_COMPLEX_INTERLEAVED:
		number_of_output_buffers = 1;
		size_of_output_buffers_in_bytes = outfftBatchSize * sizeof( std::complex< T > );
		break;
	case CLFFT_COMPLEX_PLANAR:
		number_of_output_buffers = 2;
		size_of_output_buffers_in_bytes = outfftBatchSize * sizeof(T);
		break;
	case CLFFT_HERMITIAN_INTERLEAVED:
		number_of_output_buffers = 1;
		size_of_output_buffers_in_bytes = outfftBatchSize * sizeof( std::complex< T > );
		break;
	case CLFFT_HERMITIAN_PLANAR:
		number_of_output_buffers = 2;
		size_of_output_buffers_in_bytes = outfftBatchSize * sizeof(T);
		break;
	case CLFFT_REAL:
		number_of_output_buffers = 1;
		size_of_output_buffers_in_bytes = outfftBatchSize * sizeof(T);
		break;
	}

	// Fill the input buffers
	switch( in_layout )
	{
	case CLFFT_COMPLEX_INTERLEAVED:
		{
			//	This call creates our openCL context and sets up our devices; expected to throw on error
			size_of_input_buffers_in_bytes = fftBatchSize * sizeof( std::complex< T > );

			device_id = initializeCL( deviceType, deviceId, platformId, context, printInfo );
			createOpenCLCommandQueue( context,
				command_queue_flags, queue,
				device_id,
				size_of_input_buffers_in_bytes, 1, input_cl_mem_buffers,
				size_of_output_buffers_in_bytes, number_of_output_buffers, output_cl_mem_buffers);

			std::vector< std::complex< T > > input( fftBatchSize );

			// set zero
			for( cl_uint i = 0; i < fftBatchSize; ++i )
			{
				input[ i ] = 0;
			}

			// impulse test case
			for(size_t b = 0; b < batch_size; b++)
			{
				size_t p3 = b * strides[3];
				for(size_t k = 0; k < lengths[2]; k++)
				{
					size_t p2 = p3 + k * strides[2];
					for(size_t j = 0; j < lengths[1]; j++)
					{
						size_t p1 = p2 + j * strides[1];
						for(size_t i = 0; i < lengths[0]; i++)
						{
							size_t p0 = p1 + i * strides[0];
							input[p0] = 1;
						}
					}
				}
			}


			OPENCL_V_THROW( clEnqueueWriteBuffer( queue, input_cl_mem_buffers[ 0 ], CL_TRUE, 0, size_of_input_buffers_in_bytes, &input[ 0 ],
				0, NULL, &outEvent ),
				"clEnqueueWriteBuffer failed" );

		}
		break;
	case CLFFT_COMPLEX_PLANAR:
		{
			//	This call creates our openCL context and sets up our devices; expected to throw on error
			size_of_input_buffers_in_bytes = fftBatchSize * sizeof( T );

			device_id = initializeCL( deviceType, deviceId, platformId, context, printInfo );
			createOpenCLCommandQueue( context,
				command_queue_flags, queue,
				device_id,
				size_of_input_buffers_in_bytes, 2, input_cl_mem_buffers,
				size_of_output_buffers_in_bytes, number_of_output_buffers, output_cl_mem_buffers);

			std::vector< T > real( fftBatchSize );
			std::vector< T > imag( fftBatchSize );

			// set zero
			for( cl_uint i = 0; i < fftBatchSize; ++i )
			{
				real[ i ] = 0;
				imag[ i ] = 0;
			}

			// impulse test case
			for(size_t b = 0; b < batch_size; b++)
			{
				size_t p3 = b * strides[3];
				for(size_t k = 0; k < lengths[2]; k++)
				{
					size_t p2 = p3 + k * strides[2];
					for(size_t j = 0; j < lengths[1]; j++)
					{
						size_t p1 = p2 + j * strides[1];
						for(size_t i = 0; i < lengths[0]; i++)
						{
							size_t p0 = p1 + i * strides[0];
							real[p0] = 1;
						}
					}
				}
			}


			OPENCL_V_THROW( clEnqueueWriteBuffer( queue, input_cl_mem_buffers[ 0 ], CL_TRUE, 0, size_of_input_buffers_in_bytes, &real[ 0 ],
				0, NULL, &outEvent ),
				"clEnqueueWriteBuffer failed" );
			OPENCL_V_THROW( clEnqueueWriteBuffer( queue, input_cl_mem_buffers[ 1 ], CL_TRUE, 0, size_of_input_buffers_in_bytes, &imag[ 0 ],
				0, NULL, &outEvent ),
				"clEnqueueWriteBuffer failed" );
		}
		break;
	case CLFFT_HERMITIAN_INTERLEAVED:
		{
			//	This call creates our openCL context and sets up our devices; expected to throw on error
			size_of_input_buffers_in_bytes = fftBatchSize * sizeof( std::complex< T > );

			device_id = initializeCL( deviceType, deviceId, platformId, context, printInfo );
			createOpenCLCommandQueue( context,
				command_queue_flags, queue,
				device_id,
				size_of_input_buffers_in_bytes, 1, input_cl_mem_buffers,
				size_of_output_buffers_in_bytes, number_of_output_buffers, output_cl_mem_buffers);

			std::vector< std::complex< T > > input( fftBatchSize );

			// set zero
			for( cl_uint i = 0; i < fftBatchSize; ++i )
			{
				input[ i ] = 0;
			}

			// impulse test case
			for(size_t b = 0; b < batch_size; b++)
			{
				size_t p3 = b * strides[3];
				input[p3] = static_cast<T>(outfftVectorSize);

			}


			OPENCL_V_THROW( clEnqueueWriteBuffer( queue, input_cl_mem_buffers[ 0 ], CL_TRUE, 0, size_of_input_buffers_in_bytes, &input[ 0 ],
				0, NULL, &outEvent ),
				"clEnqueueWriteBuffer failed" );
		}
		break;
	case CLFFT_HERMITIAN_PLANAR:
		{
			//	This call creates our openCL context and sets up our devices; expected to throw on error
			size_of_input_buffers_in_bytes = fftBatchSize * sizeof( T );

			device_id = initializeCL( deviceType, deviceId, platformId, context, printInfo );
			createOpenCLCommandQueue( context,
				command_queue_flags, queue,
				device_id,
				size_of_input_buffers_in_bytes, 2, input_cl_mem_buffers,
				size_of_output_buffers_in_bytes, number_of_output_buffers, output_cl_mem_buffers);

			std::vector< T > real( fftBatchSize );
			std::vector< T > imag( fftBatchSize );

			// set zero
			for( cl_uint i = 0; i < fftBatchSize; ++i )
			{
				real[ i ] = 0;
				imag[ i ] = 0;
			}

			// impulse test case
			for(size_t b = 0; b < batch_size; b++)
			{
				size_t p3 = b * strides[3];
				real[p3] = static_cast<T>(outfftVectorSize);
			}



			OPENCL_V_THROW( clEnqueueWriteBuffer( queue, input_cl_mem_buffers[ 0 ], CL_TRUE, 0, size_of_input_buffers_in_bytes, &real[ 0 ],
				0, NULL, &outEvent ),
				"clEnqueueWriteBuffer failed" );
			OPENCL_V_THROW( clEnqueueWriteBuffer( queue, input_cl_mem_buffers[ 1 ], CL_TRUE, 0, size_of_input_buffers_in_bytes, &imag[ 0 ],
				0, NULL, &outEvent ),
				"clEnqueueWriteBuffer failed" );
		}
		break;
	case CLFFT_REAL:
		{
			//	This call creates our openCL context and sets up our devices; expected to throw on error
			size_of_input_buffers_in_bytes = fftBatchSize * sizeof( T );

			device_id = initializeCL( deviceType, deviceId, platformId, context, printInfo );
			createOpenCLCommandQueue( context,
				command_queue_flags, queue,
				device_id,
				size_of_input_buffers_in_bytes, 1, input_cl_mem_buffers,
				size_of_output_buffers_in_bytes, number_of_output_buffers, output_cl_mem_buffers);

			std::vector< T > real( fftBatchSize );

			// set zero
			for( cl_uint i = 0; i < fftBatchSize; ++i )
			{
				real[ i ] = 0;
			}

			// impulse test case
			for(size_t b = 0; b < batch_size; b++)
			{
				size_t p3 = b * strides[3];
				for(size_t k = 0; k < lengths[2]; k++)
				{
					size_t p2 = p3 + k * strides[2];
					for(size_t j = 0; j < lengths[1]; j++)
					{
						size_t p1 = p2 + j * strides[1];
						for(size_t i = 0; i < lengths[0]; i++)
						{
							size_t p0 = p1 + i * strides[0];
							real[p0] = 1;
						}
					}
				}
			}


			OPENCL_V_THROW( clEnqueueWriteBuffer( queue, input_cl_mem_buffers[ 0 ], CL_TRUE, 0, size_of_input_buffers_in_bytes, &real[ 0 ],
				0, NULL, &outEvent ),
				"clEnqueueWriteBuffer failed" );
		}
		break;
	default:
		{
			throw std::runtime_error( "Input layout format not yet supported" );
		}
		break;
	}

	//	Discover and load the timer module if present
	void* timerLibHandle = LoadSharedLibrary( "lib", "StatTimer", false );
	if( timerLibHandle == NULL )
	{
		terr << _T( "Could not find the external timing library; timings disabled" ) << std::endl;
	}


	//	Timer module discovered and loaded successfully
	//	Initialize function pointers to call into the shared module
	PFGETSTATTIMER get_timer = reinterpret_cast< PFGETSTATTIMER > ( LoadFunctionAddr( timerLibHandle, "getStatTimer" ) );

	//	Create and initialize our timer class, if the external timer shared library loaded
	baseStatTimer* timer = NULL;
	size_t	clFFTID = 0;
	if( get_timer )
	{
		timer = get_timer( CLFFT_GPU );
		timer->Reserve( 1, profile_count );
		timer->setNormalize( true );

		clFFTID	= timer->getUniqueID( "clFFT", 0 );
	}

	OPENCL_V_THROW( clfftSetup( setupData.get( ) ), "clfftSetup failed" );
	OPENCL_V_THROW( clfftCreateDefaultPlan( &plan_handle, context, dim, lengths ), "clfftCreateDefaultPlan failed" );

	//	Default plan creates a plan that expects an inPlace transform with interleaved complex numbers
	OPENCL_V_THROW( clfftSetResultLocation( plan_handle, place ), "clfftSetResultLocation failed" );
	OPENCL_V_THROW( clfftSetLayout( plan_handle, in_layout, out_layout ), "clfftSetLayout failed" );
	OPENCL_V_THROW( clfftSetPlanBatchSize( plan_handle, batch_size ), "clfftSetPlanBatchSize failed" );
	OPENCL_V_THROW( clfftSetPlanPrecision( plan_handle, precision ), "clfftSetPlanPrecision failed" );

	OPENCL_V_THROW (clfftSetPlanInStride  ( plan_handle, dim, strides ), "clfftSetPlanInStride failed" );
	OPENCL_V_THROW (clfftSetPlanOutStride ( plan_handle, dim, o_strides ), "clfftSetPlanOutStride failed" );
	OPENCL_V_THROW (clfftSetPlanDistance  ( plan_handle, strides[ 3 ], o_strides[ 3 ]), "clfftSetPlanDistance failed" );

	// Set backward scale factor to 1.0 for non real FFTs to do correct output checks
	if(dir == CLFFT_BACKWARD && in_layout != CLFFT_REAL && out_layout != CLFFT_REAL)
		OPENCL_V_THROW (clfftSetPlanScale( plan_handle, CLFFT_BACKWARD, (cl_float)1.0f ), "clfftSetPlanScale failed" );

	OPENCL_V_THROW( clfftBakePlan( plan_handle, 1, &queue, NULL, NULL ), "clfftBakePlan failed" );

	//get the buffersize
	size_t buffersize=0;
	OPENCL_V_THROW( clfftGetTmpBufSize(plan_handle, &buffersize ), "clfftGetTmpBufSize failed" );

	//allocate the intermediate buffer
	cl_mem clMedBuffer=NULL;

	if (buffersize)
	{
		cl_int medstatus;
		clMedBuffer = clCreateBuffer ( context, CL_MEM_READ_WRITE, buffersize, 0, &medstatus);
		OPENCL_V_THROW( medstatus, "Creating intmediate Buffer failed" );
	}

	switch( in_layout )
	{
	case CLFFT_COMPLEX_INTERLEAVED:
	case CLFFT_COMPLEX_PLANAR:
	case CLFFT_HERMITIAN_INTERLEAVED:
	case CLFFT_HERMITIAN_PLANAR:
	case CLFFT_REAL:
		break;
	default:
		//	Don't recognize input layout
		return CLFFT_INVALID_ARG_VALUE;
	}

	switch( out_layout )
	{
	case CLFFT_COMPLEX_INTERLEAVED:
	case CLFFT_COMPLEX_PLANAR:
	case CLFFT_HERMITIAN_INTERLEAVED:
	case CLFFT_HERMITIAN_PLANAR:
	case CLFFT_REAL:
		break;
	default:
		//	Don't recognize output layout
		return CLFFT_INVALID_ARG_VALUE;
	}

	if (( place == CLFFT_INPLACE )
	&&  ( in_layout != out_layout )) {
		switch( in_layout )
		{
		case CLFFT_COMPLEX_INTERLEAVED:
			{
				if( (out_layout == CLFFT_COMPLEX_PLANAR) || (out_layout == CLFFT_HERMITIAN_PLANAR) )
				{
					throw std::runtime_error( "Cannot use the same buffer for interleaved->planar in-place transforms" );
				}
				break;
			}
		case CLFFT_COMPLEX_PLANAR:
			{
				if( (out_layout == CLFFT_COMPLEX_INTERLEAVED) || (out_layout == CLFFT_HERMITIAN_INTERLEAVED) )
				{
					throw std::runtime_error( "Cannot use the same buffer for planar->interleaved in-place transforms" );
				}
				break;
			}
		case CLFFT_HERMITIAN_INTERLEAVED:
			{
				if( out_layout != CLFFT_REAL )
				{
					throw std::runtime_error( "Cannot use the same buffer for interleaved->planar in-place transforms" );
				}
				break;
			}
		case CLFFT_HERMITIAN_PLANAR:
			{
				throw std::runtime_error( "Cannot use the same buffer for planar->interleaved in-place transforms" );
				break;
			}
		case CLFFT_REAL:
			{
				if( (out_layout == CLFFT_COMPLEX_PLANAR) || (out_layout == CLFFT_HERMITIAN_PLANAR) )
				{
					throw std::runtime_error( "Cannot use the same buffer for interleaved->planar in-place transforms" );
				}
				break;
			}
		}
	}

	//	Loop as many times as the user specifies to average out the timings
	//
	cl_mem * BuffersOut = ( place == CLFFT_INPLACE ) ? NULL : &output_cl_mem_buffers[ 0 ];

	Timer tr;
	tr.Start();

	for( cl_uint i = 0; i < profile_count; ++i )
	{
		if( timer ) timer->Start( clFFTID );

		OPENCL_V_THROW( clfftEnqueueTransform( plan_handle, dir, 1, &queue, 0, NULL, &outEvent,
			&input_cl_mem_buffers[ 0 ], BuffersOut, clMedBuffer ),
			"clfftEnqueueTransform failed" );

		if( timer ) timer->Stop( clFFTID );
	}
	OPENCL_V_THROW( clFinish( queue ), "clFinish failed" );
	if(clMedBuffer) clReleaseMemObject(clMedBuffer);

	double wtime = tr.Sample()/((double)profile_count);
	size_t totalLen = 1;
	for(int i=0; i<dim; i++) totalLen *= lengths[i];
	double opsconst = 5.0 * (double)totalLen * log((double)totalLen) / log(2.0);

	if(profile_count > 1)
	{
		tout << "\nExecution wall time: " << 1000.0*wtime << " ms" << std::endl;
		tout << "Execution gflops: " << ((double)batch_size * opsconst)/(1000000000.0*wtime) << std::endl;
	}

	if( timer && (command_queue_flags & CL_QUEUE_PROFILING_ENABLE) )
	{
		//	Remove all timings that are outside of 2 stddev (keep 65% of samples); we ignore outliers to get a more consistent result
		timer->pruneOutliers( 2.0 );
		timer->Print( );
		timer->Reset( );
	}

	/*****************/
	FreeSharedLibrary( timerLibHandle );

	// Read and check output data
	// This check is not valid if the FFT is executed multiple times inplace.
	//
	if (( place == CLFFT_OUTOFPLACE )
	||  ( profile_count == 1))
	{
		bool checkflag= false;
		switch( out_layout )
		{
		case CLFFT_HERMITIAN_INTERLEAVED:
		case CLFFT_COMPLEX_INTERLEAVED:
			{
				std::vector< std::complex< T > > output( outfftBatchSize );

				if( place == CLFFT_INPLACE )
				{
					OPENCL_V_THROW( clEnqueueReadBuffer( queue, input_cl_mem_buffers[ 0 ], CL_TRUE, 0, size_of_input_buffers_in_bytes, &output[ 0 ],
						0, NULL, NULL ),
						"Reading the result buffer failed" );
				}
				else
				{
					OPENCL_V_THROW( clEnqueueReadBuffer( queue, BuffersOut[ 0 ], CL_TRUE, 0, size_of_output_buffers_in_bytes, &output[ 0 ],
						0, NULL, NULL ),
						"Reading the result buffer failed" );
				}

				//check output data
				for( cl_uint i = 0; i < outfftBatchSize; ++i )
				{
					if (0 == (i % outfftVectorSizePadded))
					{
						if (output[i].real() != outfftVectorSize)
						{
							checkflag = true;
							break;
						}
					}
					else
					{
						if (output[ i ].real() != 0)
						{
							checkflag = true;
							break;
						}
					}

					if (output[ i ].imag() != 0)
					{
						checkflag = true;
						break;
					}
				}
			}
			break;
		case CLFFT_HERMITIAN_PLANAR:
		case CLFFT_COMPLEX_PLANAR:
			{
				std::valarray< T > real( outfftBatchSize );
				std::valarray< T > imag( outfftBatchSize );

				if( place == CLFFT_INPLACE )
				{
					OPENCL_V_THROW( clEnqueueReadBuffer( queue, input_cl_mem_buffers[ 0 ], CL_TRUE, 0, size_of_input_buffers_in_bytes, &real[ 0 ],
						0, NULL, NULL ),
						"Reading the result buffer failed" );
					OPENCL_V_THROW( clEnqueueReadBuffer( queue, input_cl_mem_buffers[ 1 ], CL_TRUE, 0, size_of_input_buffers_in_bytes, &imag[ 0 ],
						0, NULL, NULL ),
						"Reading the result buffer failed" );
				}
				else
				{
					OPENCL_V_THROW( clEnqueueReadBuffer( queue, BuffersOut[ 0 ], CL_TRUE, 0, size_of_output_buffers_in_bytes, &real[ 0 ],
						0, NULL, NULL ),
						"Reading the result buffer failed" );
					OPENCL_V_THROW( clEnqueueReadBuffer( queue, BuffersOut[ 1 ], CL_TRUE, 0, size_of_output_buffers_in_bytes, &imag[ 0 ],
						0, NULL, NULL ),
						"Reading the result buffer failed" );
				}

				//  Check output data
				for( cl_uint i = 0; i < outfftBatchSize; ++i )
				{
					if (0 == (i % outfftVectorSizePadded))
					{
						if (real[i] != outfftVectorSize)
						{
							checkflag = true;
							break;
						}
					}
					else
					{
						if (real[i] != 0)
						{
							checkflag = true;
							break;
						}
					}

					if (imag[i] != 0)
					{
						checkflag = true;
						break;
					}
				}
			}
			break;
		case CLFFT_REAL:
			{
				std::valarray< T > real( outfftBatchSize );

				if( place == CLFFT_INPLACE )
				{
					OPENCL_V_THROW( clEnqueueReadBuffer( queue, input_cl_mem_buffers[ 0 ], CL_TRUE, 0, size_of_input_buffers_in_bytes, &real[ 0 ],
						0, NULL, NULL ),
						"Reading the result buffer failed" );
				}
				else
				{
					OPENCL_V_THROW( clEnqueueReadBuffer( queue, BuffersOut[ 0 ], CL_TRUE, 0, size_of_output_buffers_in_bytes, &real[ 0 ],
						0, NULL, NULL ),
						"Reading the result buffer failed" );
				}

				////check output data

				for(size_t b = 0; b < batch_size; b++)
				{
					size_t p3 = b * o_strides[3];
					for(size_t k = 0; k < lengths[2]; k++)
					{
						size_t p2 = p3 + k * o_strides[2];
						for(size_t j = 0; j < lengths[1]; j++)
						{
							size_t p1 = p2 + j * o_strides[1];
							for(size_t i = 0; i < lengths[0]; i++)
							{
								size_t p0 = p1 + i * o_strides[0];

								if (real[p0] != 1)
								{
									checkflag = true;
									break;
								}

							}
						}
					}
				}
			}
			break;
		default:
			{
				throw std::runtime_error( "Input layout format not yet supported" );
			}
			break;
		}

		if (checkflag)
		{
			std::cout << "\n\n\t\tInternal Client Test *****FAIL*****" << std::endl;
		}
		else
		{
			std::cout << "\n\n\t\tInternal Client Test *****PASS*****" << std::endl;
		}
	}

	OPENCL_V_THROW( clfftDestroyPlan( &plan_handle ), "clfftDestroyPlan failed" );
	OPENCL_V_THROW( clfftTeardown( ), "clfftTeardown failed" );

	cleanupCL( &context, &queue, countOf( input_cl_mem_buffers ), input_cl_mem_buffers, countOf( output_cl_mem_buffers ), output_cl_mem_buffers, &outEvent );
	return 0;
}
Ejemplo n.º 10
0
//--------------------------------------------------------------------------------
SFBool CSlurperApp::Slurp(COptions& options, SFString& message)
{
	double start = vrNow();

	// Do we have the data for this address cached?
	SFString cacheFilename = cachePath(theAccount.addr+".bin");
	SFBool needToRead = SFos::fileExists(cacheFilename);
	if (options.rerun && theAccount.transactions.getCount())
		needToRead=FALSE;
	if (needToRead)
	{
		// Once a transaction is on the blockchain, it will never change
		// therefore, we can store them in a binary cache. Here we read
		// from a previously stored cache.
		SFArchive archive(TRUE, NO_SCHEMA);
		if (!archive.Lock(cacheFilename, binaryReadOnly, LOCK_NOWAIT))
		{
			message = "Could not open file: '" + cacheFilename + "'\n";
			return options.cmdFile;
		}
		theAccount.Serialize(archive);
		archive.Close();
	}

	SFTime now = Now();
	SFTime fileTime = SFos::fileLastModifyDate(cacheFilename);

	// If the user tells us he/she wants to update the cache, or the cache
	// hasn't been updated in five minutes, then update it
	SFInt32 nSeconds = MAX(60,config.GetProfileIntGH("SETTINGS", "update_freq", 300));
	if (options.slurp || (now - fileTime) > SFTimeSpan(0,0,0,nSeconds))
	{
		// This is how many records we currently have
		SFInt32 origCount  = theAccount.transactions.getCount();
		SFInt32 nNewBlocks = 0;
		SFInt32 nextRecord = origCount;

		outErr << "\tSlurping new transactions from blockchain...\n";
		SFInt32  nRequests = 0, nRead = 0;

		// We already have 'page' pages, so start there.
		SFInt32  page = MAX(theAccount.lastPage,1);

		// Keep reading until we get less than a full page
		SFString contents;
		SFBool done = FALSE;
		while (!done)
		{
			SFString url = SFString("https://api.etherscan.io/api?module=account&action=txlist&sort=asc") +
				"&address=" + theAccount.addr + "&page="    + asString(page) + "&offset="  + asString(options.pageSize) + "&apikey="  + api.getKey();
			// Grab a page of data from the web api
			SFString thisPage = urlToString(url);

			// See if it's good data, if not, bail
			message = nextTokenClear(thisPage, '[');
			if (!message.Contains("{\"status\":\"1\",\"message\":\"OK\""))
			{
				if (message.Contains("{\"status\":\"0\",\"message\":\"No transactions found\",\"result\":"))
					message = "No transactions were found for address '" + theAccount.addr + "'. Is it correct?";
				return options.cmdFile;
			}

			contents += thisPage;

			SFInt32 nRecords = countOf('}',thisPage)-1;
			nRead += nRecords;
			outErr << "\tDownloaded " << nRead << " potentially new transactions." << (isTesting?"\n":"\r");

			// If we got a full page, there are more to come
			done = (nRecords < options.pageSize);
			if (!done)
				page++;

			// Etherscan.io doesn't want more than five pages per second, so sleep for a second
			if (++nRequests==4)
			{
				SFos::sleep(1.0);
				nRequests=0;
			}

			// Make sure we don't spin forever
			if (nRead >= options.maxTransactions)
				done=TRUE;
		}

		SFInt32 minBlock=0,maxBlock=0;
		findBlockRange(contents, minBlock, maxBlock);
		outErr << "\n\tDownload contains blocks from " << minBlock << " to " << maxBlock << "\n";

		// Keep track of which last full page we've read
		theAccount.lastPage = page;
		theAccount.pageSize = options.pageSize;

		// pre allocate the array
		theAccount.transactions.Grow(nRead);

		SFInt32 lastBlock=0;
		char *p = cleanUpJson((char *)(const char*)contents);
		while (p && *p)
		{
			CTransaction trans;SFInt32 nFields=0;
			p = trans.parseJson(p,nFields);
			if (nFields)
			{
				SFInt32 transBlock = trans.blockNumber;
				if (transBlock > theAccount.lastBlock) // add the new transaction if it's in a new block
				{
					theAccount.transactions[nextRecord++] = trans;
					lastBlock = transBlock;
					if (!(++nNewBlocks%REP_FREQ))
					{
						outErr << "\tFound new transaction at block " << transBlock << ". Importing..." << (isTesting?"\n":"\r");
						outErr.Flush();
					}
				}
			}
		}
		if (!isTesting && nNewBlocks) { outErr << "\tFound new transaction at block " << lastBlock << ". Importing...\n"; outErr.Flush(); }
		theAccount.lastBlock = lastBlock;

		// Write the data if we got new data
		SFInt32 newRecords = (theAccount.transactions.getCount() - origCount);
		if (newRecords)
		{
			outErr << "\tWriting " << newRecords << " new records to cache\n";
			SFArchive archive(FALSE, NO_SCHEMA);
			if (archive.Lock(cacheFilename, binaryWriteCreate, LOCK_CREATE))
			{
				theAccount.transactions.Sort(sortTransactionsForWrite);
				theAccount.Serialize(archive);
				archive.Close();

			} else
			{
				message = "Could not open file: '" + cacheFilename + "'\n";
				return options.cmdFile;
			}
		}
	}

	if (!isTesting)
	{
		double stop = vrNow();
		double timeSpent = stop-start;
		fprintf(stderr, "\tLoaded %ld total records in %f seconds\n", theAccount.transactions.getCount(), timeSpent);
		fflush(stderr);
	}

	return (options.cmdFile || theAccount.transactions.getCount()>0);
}
Ejemplo n.º 11
0
int _tmain( int argc, _TCHAR* argv[] )
{
    cl_uint userPlatform = 0;
    cl_uint userDevice = 0;
    size_t iterations = 0;
    size_t length = 0;
    size_t algo = 1;
    cl_device_type deviceType = CL_DEVICE_TYPE_DEFAULT;
    bool defaultDevice = true;
    bool print_clInfo = false;
    bool systemMemory = false;

    /******************************************************************************
    * Parameter parsing                                                           *
    ******************************************************************************/
    try
    {
        // Declare the supported options.
        po::options_description desc( "OpenCL CopyBuffer command line options" );
        desc.add_options()
            ( "help,h",			"produces this help message" )
            ( "version,v",		"Print queryable version information from the Bolt CL library" )
            ( "queryOpenCL,q",  "Print queryable platform and device info and return" )
            ( "gpu,g",          "Report only OpenCL GPU devices" )
            ( "cpu,c",          "Report only OpenCL CPU devices" )
            ( "all,a",          "Report all OpenCL devices" )
            ( "systemMemory,s", "Allocate vectors in system memory, otherwise device memory" )
            ( "platform,p",     po::value< cl_uint >( &userPlatform )->default_value( 0 ), "Specify the platform under test using the index reported by -q flag" )
            ( "device,d",       po::value< cl_uint >( &userDevice )->default_value( 0 ), "Specify the device under test using the index reported by the -q flag.  "
                    "Index is relative with respect to -g, -c or -a flags" )
            ( "length,l",       po::value< size_t >( &length )->default_value( 1048576 ), "Specify the length of scan array" )
            ( "iterations,i",   po::value< size_t >( &iterations )->default_value( 50 ), "Number of samples in timing loop" )
			//( "algo,a",		    po::value< size_t >( &algo )->default_value( 1 ), "Algorithm used [1,2]  1:SCAN_BOLT, 2:XYZ" )//Not used in this file
            ;

        po::variables_map vm;
        po::store( po::parse_command_line( argc, argv, desc ), vm );
        po::notify( vm );

        if( vm.count( "version" ) )
        {
            cl_uint libMajor, libMinor, libPatch;
            bolt::cl::getVersion( libMajor, libMinor, libPatch );

            const int indent = countOf( "Bolt version: " );
            bolt::tout << std::left << std::setw( indent ) << _T( "Bolt version: " )
                << libMajor << _T( "." )
                << libMinor << _T( "." )
                << libPatch << std::endl;
        }

        if( vm.count( "help" ) )
        {
            //	This needs to be 'cout' as program-options does not support wcout yet
            std::cout << desc << std::endl;
            return 0;
        }

        if( vm.count( "queryOpenCL" ) )
        {
            print_clInfo = true;
        }

        if( vm.count( "gpu" ) )
        {
            deviceType	= CL_DEVICE_TYPE_GPU;
        }
        
        if( vm.count( "cpu" ) )
        {
            deviceType	= CL_DEVICE_TYPE_CPU;
        }

        if( vm.count( "all" ) )
        {
            deviceType	= CL_DEVICE_TYPE_ALL;
        }

        if( vm.count( "systemMemory" ) )
        {
            systemMemory = true;
        }

    }
    catch( std::exception& e )
    {
        std::cout << _T( "Scan Benchmark error condition reported:" ) << std::endl << e.what() << std::endl;
        return 1;
    }

    /******************************************************************************
    * Initialize platforms and devices                                            *
    * /todo we should move this logic inside of the control class                 *
    ******************************************************************************/
    //  Query OpenCL for available platforms
    cl_int err = CL_SUCCESS;

    // Platform vector contains all available platforms on system
    std::vector< cl::Platform > platforms;
    bolt::cl::V_OPENCL( cl::Platform::get( &platforms ), "Platform::get() failed" );

    if( print_clInfo )
    {
        //  /todo: port the printing code from test/scan to control class
        //std::for_each( platforms.begin( ), platforms.end( ), printPlatformFunctor( 0 ) );
        return 0;
    }

    // Device info
    std::vector< cl::Device > devices;
    bolt::cl::V_OPENCL( platforms.at( userPlatform ).getDevices( deviceType, &devices ), "Platform::getDevices() failed" );

    cl::Context myContext( devices.at( userDevice ) );
    cl::CommandQueue myQueue( myContext, devices.at( userDevice ) );

    //  Now that the device we want is selected and we have created our own cl::CommandQueue, set it as the
    //  default cl::CommandQueue for the Bolt API
    bolt::cl::control::getDefault( ).setCommandQueue( myQueue );

    std::string strDeviceName = bolt::cl::control::getDefault( ).getDevice( ).getInfo< CL_DEVICE_NAME >( &err );
    bolt::cl::V_OPENCL( err, "Device::getInfo< CL_DEVICE_NAME > failed" );

    std::cout << "Device under test : " << strDeviceName << std::endl;

    /******************************************************************************
    * Benchmark logic                                                             *
    ******************************************************************************/
    bolt::statTimer& myTimer = bolt::statTimer::getInstance( );
    myTimer.Reserve( 1, iterations );
    size_t scanId	= myTimer.getUniqueID( _T( "copybuffer" ), 0 );

    size_t pruned = 0;
    double scanTime = std::numeric_limits< double >::max( );
    double scanGB = ( length * sizeof( int ) ) / (1024.0 * 1024.0 * 1024.0);
    ::cl::CommandQueue& boltQueue = bolt::cl::control::getDefault( ).getCommandQueue( );

    //  ::cl::Buffer can not handle buffers of size 0
    if( length > 0 )
    {
        if( systemMemory )
        {
            std::vector< int > input( length, 1 );
            std::vector< int > output( length );
            ::cl::Buffer inputBuffer( bolt::cl::control::getDefault( ).getContext( ), CL_MEM_USE_HOST_PTR|CL_MEM_READ_ONLY, length * sizeof( int ), input.data( ) );
            ::cl::Buffer outputBuffer( bolt::cl::control::getDefault( ).getContext( ), CL_MEM_USE_HOST_PTR|CL_MEM_WRITE_ONLY, length * sizeof( int ), output.data( ) );

            for( unsigned i = 0; i < iterations; ++i )
            {
                myTimer.Start( scanId );
                boltQueue.enqueueCopyBuffer( inputBuffer, outputBuffer, 0, 0, length * sizeof( int ) );
                void* tmpPtr = boltQueue.enqueueMapBuffer( outputBuffer, true, CL_MAP_READ, 0, length * sizeof( int ) );
                boltQueue.enqueueUnmapMemObject( outputBuffer, tmpPtr );
                boltQueue.finish( );
                myTimer.Stop( scanId );
            }
        }
        else
        {
            ::cl::Buffer inputBuffer( bolt::cl::control::getDefault( ).getContext( ), CL_MEM_READ_ONLY, length * sizeof( int ) );
            ::cl::Buffer outputBuffer( bolt::cl::control::getDefault( ).getContext( ), CL_MEM_WRITE_ONLY, length * sizeof( int ) );

            for( unsigned i = 0; i < iterations; ++i )
            {
                myTimer.Start( scanId );
                boltQueue.enqueueCopyBuffer( inputBuffer, outputBuffer, 0, 0, length * sizeof( int ) );
                boltQueue.finish( );
                myTimer.Stop( scanId );
            }
        }

        //	Remove all timings that are outside of 2 stddev (keep 65% of samples); we ignore outliers to get a more consistent result
        pruned = myTimer.pruneOutliers( 1.0 );
        scanTime = myTimer.getAverageTime( scanId );
    }
    else
    {
        iterations = 0;
    }

    bolt::tout << std::left;
    bolt::tout << std::setw( colWidth ) << _T( "CopyBuffer profile: " ) << _T( "[" ) << iterations-pruned << _T( "] samples" ) << std::endl;
    bolt::tout << std::setw( colWidth ) << _T( "    Size (GB): " ) << scanGB << std::endl;
    bolt::tout << std::setw( colWidth ) << _T( "    Time (s): " ) << scanTime << std::endl;
    bolt::tout << std::setw( colWidth ) << _T( "    Speed (GB/s): " ) << scanGB / scanTime << std::endl;
    bolt::tout << std::endl;

//	bolt::tout << myTimer;

    return 0;
}