void FC_FUNC_(clfftgetversion_low, CLFFTGETVERSION_LOW)(int * major, int * minor, int * patch, int * status){ cl_uint cl_major, cl_minor, cl_patch; *status = clfftGetVersion(&cl_major, &cl_minor, &cl_patch); *major = cl_major; *minor = cl_minor; *patch = cl_patch; }
int main( int argc, char **argv ) { // Define MEMORYREPORT on windows platfroms to enable debug memory heap checking #if defined( MEMORYREPORT ) && defined( _WIN32 ) TCHAR logPath[ MAX_PATH ]; ::GetCurrentDirectory( MAX_PATH, logPath ); ::_tcscat_s( logPath, _T( "\\MemoryReport.txt") ); // We leak the handle to this file, on purpose, so that the ::_CrtSetReportFile() can output it's memory // statistics on app shutdown HANDLE hLogFile; hLogFile = ::CreateFile( logPath, GENERIC_WRITE, FILE_SHARE_READ|FILE_SHARE_WRITE, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL ); ::_CrtSetReportMode( _CRT_ASSERT, _CRTDBG_MODE_FILE | _CRTDBG_MODE_WNDW | _CRTDBG_MODE_DEBUG ); ::_CrtSetReportMode( _CRT_ERROR, _CRTDBG_MODE_FILE | _CRTDBG_MODE_WNDW | _CRTDBG_MODE_DEBUG ); ::_CrtSetReportMode( _CRT_WARN, _CRTDBG_MODE_FILE | _CRTDBG_MODE_DEBUG ); ::_CrtSetReportFile( _CRT_ASSERT, hLogFile ); ::_CrtSetReportFile( _CRT_ERROR, hLogFile ); ::_CrtSetReportFile( _CRT_WARN, hLogFile ); int tmp = ::_CrtSetDbgFlag( _CRTDBG_REPORT_FLAG ); tmp |= _CRTDBG_LEAK_CHECK_DF | _CRTDBG_ALLOC_MEM_DF | _CRTDBG_CHECK_ALWAYS_DF; ::_CrtSetDbgFlag( tmp ); // By looking at the memory leak report that is generated by this debug heap, there is a number with // {} brackets that indicates the incremental allocation number of that block. If you wish to set // a breakpoint on that allocation number, put it in the _CrtSetBreakAlloc() call below, and the heap // will issue a bp on the request, allowing you to look at the call stack // ::_CrtSetBreakAlloc( 997 ); #endif /* MEMORYREPORT */ // Declare the supported options. po::options_description desc( "clFFT Runtime Test command line options" ); desc.add_options() ( "help,h", "produces this help message" ) ( "verbose,v", "print out detailed information for the tests" ) ( "noVersion", "Don't print version information from the clFFT library" ) ( "noInfoCL", "Don't print information from the OpenCL runtime" ) ( "cpu,c", "Run tests on a CPU device" ) ( "gpu,g", "Run tests on a GPU device (default)" ) ( "pointwise,p", "Do a pointwise comparison to determine test correctness (default: use root mean square)" ) ( "tolerance,t", po::value< float >( &tolerance )->default_value( 0.001f ), "tolerance level to use when determining test pass/fail" ) ( "numRandom,r", po::value< size_t >( &number_of_random_tests )->default_value( 2000 ), "number of random tests to run" ) ( "seed", po::value< time_t >( &random_test_parameter_seed )->default_value( time(NULL)%1308000000 ), "seed to use for the random test. defaults to time(NULL)" ) // modulo lops off the first few digits of the time value to make the seed easier to type // even without these digits, the seed value won't wrap around until 2036 or later ( "short,s", "Run radix 2 tests; no random testing" ) ( "medium,m", "Run all radices; no random testing" ) ; // Parse the command line options, ignore unrecognized options and collect them into a vector of strings po::variables_map vm; po::parsed_options parsed = po::command_line_parser( argc, argv ).options( desc ).allow_unregistered( ).run( ); po::store( parsed, vm ); po::notify( vm ); std::vector< std::string > to_pass_further = po::collect_unrecognized( parsed.options, po::include_positional ); std::cout << std::endl; size_t mutex = ((vm.count( "gpu" ) > 0) ? 1 : 0) | ((vm.count( "cpu" ) > 0) ? 2 : 0); if ((mutex & (mutex-1)) != 0) { terr << _T("You have selected mutually-exclusive OpenCL device options:") << std::endl; if (vm.count ( "cpu" ) > 0) terr << _T(" cpu, c Run tests on a CPU device" ) << std::endl; if (vm.count ( "gpu" ) > 0) terr << _T(" gpu, g Run tests on a GPU device" ) << std::endl; return 1; } if( vm.count( "cpu" ) ) { device_type = CL_DEVICE_TYPE_CPU; } if( vm.count( "gpu" ) ) { device_type = CL_DEVICE_TYPE_GPU; device_gpu_list = ~0; } // Print version by default if( !vm.count( "noVersion" ) ) { const int indent = countOf( "clFFT client API version: " ); tout << std::left << std::setw( indent ) << _T( "clFFT client API version: " ) << clfftVersionMajor << _T( "." ) << clfftVersionMinor << _T( "." ) << clfftVersionPatch << std::endl; cl_uint libMajor, libMinor, libPatch; clfftGetVersion( &libMajor, &libMinor, &libPatch ); tout << std::left << std::setw( indent ) << _T( "clFFT runtime version: " ) << libMajor << _T( "." ) << libMinor << _T( "." ) << libPatch << std::endl << std::endl; } // Print clInfo by default if( !vm.count( "noInfoCL" ) ) { cl_context tempContext = NULL; cl_command_queue tempQueue = NULL; cl_event tempEvent = NULL; std::vector< cl_device_id > device_id = ::initializeCL( device_type, device_gpu_list, tempContext, true ); ::cleanupCL( &tempContext, &tempQueue, 0, NULL, 0, NULL, &tempEvent ); } if( vm.count( "help" ) ) { std::cout << desc << std::endl; return 0; } if( vm.count( "verbose" ) ) { verbose = true; } else { verbose = false; } if( vm.count( "short" ) && vm.count( "medium" ) ) { terr << _T("Options 'short' and 'medium' are mutually-exclusive. Please select only one.") << std::endl; return 1; } // Create a new argc,argv to pass to InitGoogleTest // First parameter of course is the name of this program std::vector< const char* > myArgv; // Push back a pointer to the executable name if( argc > 0 ) myArgv.push_back( *argv ); // Push into our new argv vector any parameter the user passed, except to filter their gtest_filter expressions std::string userFilter; for( int i = 1; i < argc; ++i ) { if( vm.count( "short" ) || vm.count( "medium" ) ) { std::string tmpStr( argv[ i ] ); std::string::size_type pos = tmpStr.find( "gtest_filter" ); if( pos == std::string::npos ) { myArgv.push_back( argv[ i ] ); } else { // Capture the users filter, but only the regexp portion userFilter = argv[ i ]; userFilter.erase( 0, 15 ); } } else { myArgv.push_back( argv[ i ] ); } } std::string newFilter; if( vm.count( "short" ) ) { newFilter += "--gtest_filter=*accuracy_test_pow2*"; if( userFilter.size( ) ) { newFilter += ":"; newFilter += userFilter; } myArgv.push_back( newFilter.c_str( ) ); } if( vm.count( "medium" ) ) { newFilter += "--gtest_filter="; if( userFilter.size( ) ) { newFilter += userFilter; newFilter += ":"; } newFilter += "-*Random*"; myArgv.push_back( newFilter.c_str( ) ); } if( vm.count( "pointwise" ) ) { comparison_type = pointwise_compare; } else { comparison_type = root_mean_square; } int myArgc = static_cast< int >( myArgv.size( ) ); std::cout << "Result comparison tolerance is " << tolerance << std::endl; ::testing::InitGoogleTest( &myArgc, const_cast< char** >( &myArgv[ 0 ] ) ); return RUN_ALL_TESTS(); }
int _tmain( int argc, _TCHAR* argv[] ) { // This helps with mixing output of both wide and narrow characters to the screen std::ios::sync_with_stdio( false ); // Define MEMORYREPORT on windows platfroms to enable debug memory heap checking #if defined( MEMORYREPORT ) && defined( _WIN32 ) TCHAR logPath[ MAX_PATH ]; ::GetCurrentDirectory( MAX_PATH, logPath ); ::_tcscat_s( logPath, _T( "\\MemoryReport.txt") ); // We leak the handle to this file, on purpose, so that the ::_CrtSetReportFile() can output it's memory // statistics on app shutdown HANDLE hLogFile; hLogFile = ::CreateFile( logPath, GENERIC_WRITE, FILE_SHARE_READ|FILE_SHARE_WRITE, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL ); ::_CrtSetReportMode( _CRT_ASSERT, _CRTDBG_MODE_FILE | _CRTDBG_MODE_WNDW | _CRTDBG_MODE_DEBUG ); ::_CrtSetReportMode( _CRT_ERROR, _CRTDBG_MODE_FILE | _CRTDBG_MODE_WNDW | _CRTDBG_MODE_DEBUG ); ::_CrtSetReportMode( _CRT_WARN, _CRTDBG_MODE_FILE | _CRTDBG_MODE_DEBUG ); ::_CrtSetReportFile( _CRT_ASSERT, hLogFile ); ::_CrtSetReportFile( _CRT_ERROR, hLogFile ); ::_CrtSetReportFile( _CRT_WARN, hLogFile ); int tmp = ::_CrtSetDbgFlag( _CRTDBG_REPORT_FLAG ); tmp |= _CRTDBG_LEAK_CHECK_DF | _CRTDBG_ALLOC_MEM_DF | _CRTDBG_CHECK_ALWAYS_DF; ::_CrtSetDbgFlag( tmp ); // By looking at the memory leak report that is generated by this debug heap, there is a number with // {} brackets that indicates the incremental allocation number of that block. If you wish to set // a breakpoint on that allocation number, put it in the _CrtSetBreakAlloc() call below, and the heap // will issue a bp on the request, allowing you to look at the call stack // ::_CrtSetBreakAlloc( 1833 ); #endif /* MEMORYREPORT */ // OpenCL state cl_device_type deviceType = CL_DEVICE_TYPE_ALL; cl_int deviceId = 0; cl_int platformId = 0; // FFT state clfftResultLocation place = CLFFT_INPLACE; clfftLayout inLayout = CLFFT_COMPLEX_INTERLEAVED; clfftLayout outLayout = CLFFT_COMPLEX_INTERLEAVED; clfftPrecision precision = CLFFT_SINGLE; clfftDirection dir = CLFFT_FORWARD; size_t lengths[ 3 ] = {1,1,1}; size_t iStrides[ 4 ] = {0,0,0,0}; size_t oStrides[ 4 ] = {0,0,0,0}; cl_uint profile_count = 0; cl_uint command_queue_flags = 0; size_t batchSize = 1; // Initialize flags for FFT library std::auto_ptr< clfftSetupData > setupData( new clfftSetupData ); OPENCL_V_THROW( clfftInitSetupData( setupData.get( ) ), "clfftInitSetupData failed" ); try { // Declare the supported options. po::options_description desc( "clFFT client command line options" ); desc.add_options() ( "help,h", "produces this help message" ) ( "version,v", "Print queryable version information from the clFFT library" ) ( "clinfo,i", "Print queryable information of all the OpenCL runtimes and devices" ) ( "printChosen", "Print queryable information of the selected OpenCL runtime and device" ) ( "gpu,g", "Force selection of OpenCL GPU devices only" ) ( "cpu,c", "Force selection of OpenCL CPU devices only" ) ( "all,a", "Force selection of all OpenCL devices (default)" ) ( "platform", po::value< cl_int >( &platformId )->default_value( 0 ), "Select a specific OpenCL platform id as it is reported by clinfo" ) ( "device", po::value< cl_int >( &deviceId )->default_value( 0 ), "Select a specific OpenCL device id as it is reported by clinfo" ) ( "outPlace,o", "Out of place FFT transform (default: in place)" ) ( "double", "Double precision transform (default: single)" ) ( "inv", "Backward transform (default: forward)" ) ( "dumpKernels,d", "FFT engine will dump generated OpenCL FFT kernels to disk (default: dump off)" ) ( "lenX,x", po::value< size_t >( &lengths[ 0 ] )->default_value( 1024 ), "Specify the length of the 1st dimension of a test array" ) ( "lenY,y", po::value< size_t >( &lengths[ 1 ] )->default_value( 1 ), "Specify the length of the 2nd dimension of a test array" ) ( "lenZ,z", po::value< size_t >( &lengths[ 2 ] )->default_value( 1 ), "Specify the length of the 3rd dimension of a test array" ) ( "isX", po::value< size_t >( &iStrides[ 0 ] )->default_value( 1 ), "Specify the input stride of the 1st dimension of a test array" ) ( "isY", po::value< size_t >( &iStrides[ 1 ] )->default_value( 0 ), "Specify the input stride of the 2nd dimension of a test array" ) ( "isZ", po::value< size_t >( &iStrides[ 2 ] )->default_value( 0 ), "Specify the input stride of the 3rd dimension of a test array" ) ( "iD", po::value< size_t >( &iStrides[ 3 ] )->default_value( 0 ), "input distance between subsequent sets of data when batch size > 1" ) ( "osX", po::value< size_t >( &oStrides[ 0 ] )->default_value( 1 ), "Specify the output stride of the 1st dimension of a test array" ) ( "osY", po::value< size_t >( &oStrides[ 1 ] )->default_value( 0 ), "Specify the output stride of the 2nd dimension of a test array" ) ( "osZ", po::value< size_t >( &oStrides[ 2 ] )->default_value( 0 ), "Specify the output stride of the 3rd dimension of a test array" ) ( "oD", po::value< size_t >( &oStrides[ 3 ] )->default_value( 0 ), "output distance between subsequent sets of data when batch size > 1" ) ( "batchSize,b", po::value< size_t >( &batchSize )->default_value( 1 ), "If this value is greater than one, arrays will be used " ) ( "profile,p", po::value< cl_uint >( &profile_count )->default_value( 1 ), "Time and report the kernel speed of the FFT (default: profiling off)" ) ( "inLayout", po::value< clfftLayout >( &inLayout )->default_value( CLFFT_COMPLEX_INTERLEAVED ), "Layout of input data:\n1) interleaved\n2) planar\n3) hermitian interleaved\n4) hermitian planar\n5) real" ) ( "outLayout", po::value< clfftLayout >( &outLayout )->default_value( CLFFT_COMPLEX_INTERLEAVED ), "Layout of input data:\n1) interleaved\n2) planar\n3) hermitian interleaved\n4) hermitian planar\n5) real" ) ; po::variables_map vm; po::store( po::parse_command_line( argc, argv, desc ), vm ); po::notify( vm ); if( vm.count( "version" ) ) { const int indent = countOf( "clFFT client API version: " ); tout << std::left << std::setw( indent ) << _T( "clFFT client API version: " ) << clfftVersionMajor << _T( "." ) << clfftVersionMinor << _T( "." ) << clfftVersionPatch << std::endl; cl_uint libMajor, libMinor, libPatch; clfftGetVersion( &libMajor, &libMinor, &libPatch ); tout << std::left << std::setw( indent ) << _T( "clFFT runtime version: " ) << libMajor << _T( "." ) << libMinor << _T( "." ) << libPatch << std::endl << std::endl; } if( vm.count( "help" ) ) { // This needs to be 'cout' as program-options does not support wcout yet std::cout << desc << std::endl; return 0; } size_t mutex = ((vm.count( "gpu" ) > 0) ? 1 : 0) | ((vm.count( "cpu" ) > 0) ? 2 : 0) | ((vm.count( "all" ) > 0) ? 4 : 0); if ((mutex & (mutex-1)) != 0) { terr << _T("You have selected mutually-exclusive OpenCL device options:") << std::endl; if (vm.count ( "gpu" ) > 0) terr << _T(" gpu,g Force selection of OpenCL GPU devices only" ) << std::endl; if (vm.count ( "cpu" ) > 0) terr << _T(" cpu,c Force selection of OpenCL CPU devices only" ) << std::endl; if (vm.count ( "all" ) > 0) terr << _T(" all,a Force selection of all OpenCL devices (default)" ) << std::endl; return 1; } if( vm.count( "gpu" ) ) { deviceType = CL_DEVICE_TYPE_GPU; } if( vm.count( "cpu" ) ) { deviceType = CL_DEVICE_TYPE_CPU; } if( vm.count( "all" ) ) { deviceType = CL_DEVICE_TYPE_ALL; } if( vm.count( "clinfo" ) ) { std::vector< cl_platform_id > platformInfos; std::vector< std::vector< cl_device_id > > deviceInfos; discoverCLPlatforms( deviceType, platformInfos, deviceInfos ); prettyPrintCLPlatforms(platformInfos, deviceInfos); return 0; } bool printInfo = false; if( vm.count( "printChosen" ) ) { printInfo = true; } if( vm.count( "outPlace" ) ) { place = CLFFT_OUTOFPLACE; } if( vm.count( "double" ) ) { precision = CLFFT_DOUBLE; } if( vm.count( "inv" ) ) { dir = CLFFT_BACKWARD; } if( profile_count > 1 ) { command_queue_flags |= CL_QUEUE_PROFILING_ENABLE; } if( vm.count( "dumpKernels" ) ) { setupData->debugFlags |= CLFFT_DUMP_PROGRAMS; } int inL = (int)inLayout; int otL = (int)outLayout; // input output layout support matrix int ioLayoutSupport[5][5] = { { 1, 1, 0, 0, 1 }, { 1, 1, 0, 0, 1 }, { 0, 0, 0, 0, 1 }, { 0, 0, 0, 0, 1 }, { 1, 1, 1, 1, 0 }, }; if((inL < 1) || (inL > 5)) throw std::runtime_error( "Invalid Input layout format" ); if((otL < 1) || (otL > 5)) throw std::runtime_error( "Invalid Output layout format" ); if(ioLayoutSupport[inL-1][otL-1] == 0) throw std::runtime_error( "Invalid combination of Input/Output layout formats" ); if( ((inL == 1) || (inL == 2)) && ((otL == 1) || (otL == 2)) ) // Complex-Complex cases { iStrides[1] = iStrides[1] ? iStrides[1] : lengths[0] * iStrides[0]; iStrides[2] = iStrides[2] ? iStrides[2] : lengths[1] * iStrides[1]; iStrides[3] = iStrides[3] ? iStrides[3] : lengths[2] * iStrides[2]; if(place == CLFFT_INPLACE) { oStrides[0] = iStrides[0]; oStrides[1] = iStrides[1]; oStrides[2] = iStrides[2]; oStrides[3] = iStrides[3]; } else { oStrides[1] = oStrides[1] ? oStrides[1] : lengths[0] * oStrides[0]; oStrides[2] = oStrides[2] ? oStrides[2] : lengths[1] * oStrides[1]; oStrides[3] = oStrides[3] ? oStrides[3] : lengths[2] * oStrides[2]; } } else // Real-Complex and Complex-Real cases { size_t *rst, *cst; size_t N = lengths[0]; size_t Nt = 1 + lengths[0]/2; bool iflag = false; bool rcFull = (inL == 1) || (inL == 2) || (otL == 1) || (otL == 2); if(inLayout == CLFFT_REAL) { iflag = true; rst = iStrides; } else { rst = oStrides; } // either in or out should be REAL // Set either in or out strides whichever is real if(place == CLFFT_INPLACE) { if(rcFull) { rst[1] = rst[1] ? rst[1] : N * 2 * rst[0]; } else { rst[1] = rst[1] ? rst[1] : Nt * 2 * rst[0]; } rst[2] = rst[2] ? rst[2] : lengths[1] * rst[1]; rst[3] = rst[3] ? rst[3] : lengths[2] * rst[2]; } else { rst[1] = rst[1] ? rst[1] : lengths[0] * rst[0]; rst[2] = rst[2] ? rst[2] : lengths[1] * rst[1]; rst[3] = rst[3] ? rst[3] : lengths[2] * rst[2]; } // Set the remaining of in or out strides that is not real if(iflag) { cst = oStrides; } else { cst = iStrides; } if(rcFull) { cst[1] = cst[1] ? cst[1] : N * cst[0]; } else { cst[1] = cst[1] ? cst[1] : Nt * cst[0]; } cst[2] = cst[2] ? cst[2] : lengths[1] * cst[1]; cst[3] = cst[3] ? cst[3] : lengths[2] * cst[2]; } if( precision == CLFFT_SINGLE ) transform<float>( lengths, iStrides, oStrides, batchSize, inLayout, outLayout, place, precision, dir, deviceType, deviceId, platformId, printInfo, command_queue_flags, profile_count, setupData ); else transform<double>( lengths, iStrides, oStrides, batchSize, inLayout, outLayout, place, precision, dir, deviceType, deviceId, platformId, printInfo, command_queue_flags, profile_count, setupData ); } catch( std::exception& e ) { terr << _T( "clFFT error condition reported:" ) << std::endl << e.what() << std::endl; return 1; } return 0; }