int main(int argc, char *argv[]) { double alpha; double beta; int profileCount; //int transA_option; std::string function; std::string precision; std::string root_dir; po::options_description desc( "cuSPARSE bench command line options" ); desc.add_options() ( "help,h", "produces this help message" ) ( "dirpath,d", po::value( &root_dir ), "Matrix directory" ) ( "alpha", po::value<double>( &alpha )->default_value( 1.0f ), "specifies the scalar alpha" ) ( "beta", po::value<double>( &beta )->default_value( 0.0f ), "specifies the scalar beta" ) //( "transposeA", po::value<int>( &transA_option )->default_value( 0 ), "0 = no transpose, 1 = transpose, 2 = conjugate transpose" ) ( "function,f", po::value<std::string>( &function )->default_value( "SpMdV" ), "Sparse functions to test. Options: " "SpMdV, SpMSpM, Csr2Dense, Dense2Csr, Csr2Coo, Coo2Csr" ) ( "precision,r", po::value<std::string>( &precision )->default_value( "s" ), "Options: s,d,c,z" ) ( "profile,p", po::value<int>( &profileCount )->default_value( 20 ), "Time and report the kernel speed (default: profiling off)" ) ( "no_zeroes,z", po::bool_switch()->default_value(false), "Disable reading explicit zeroes from the input matrix market file.") ; po::variables_map vm; po::store( po::parse_command_line( argc, argv, desc ), vm ); po::notify( vm ); if( vm.count( "help" ) ) { std::cout << desc << std::endl; return 0; } if( precision != "s" && precision != "d" && precision != "c" && precision != "z" ) { std::cerr << "Invalid value for --precision" << std::endl; return -1; } if( vm.count( "dirpath" ) == 0 ) { std::cerr << "The [" << "root" << "] parameter is missing!" << std::endl; std::cerr << desc << std::endl; return false; } cl_bool explicit_zeroes = true; if (vm["no_zeroes"].as<bool>()) explicit_zeroes = false; StatisticalTimer& timer = StatisticalTimer::getInstance( ); timer.Reserve( 3, profileCount ); timer.setNormalize( true ); std::unique_ptr< cusparseFunc > my_function; if( boost::iequals( function, "SpMdV" ) ) { if( precision == "s" ) my_function = std::unique_ptr< cusparseFunc >( new xSpMdV< float >( timer, explicit_zeroes ) ); else if( precision == "d" ) // my_function = std::make_unique< xSpMdV< double > >( timer ); my_function = std::unique_ptr< cusparseFunc >( new xSpMdV< double >( timer, explicit_zeroes ) ); else { std::cerr << "Unknown spmdv precision" << std::endl; return -1; } } else if (boost::iequals(function, "SpMSpM")) { if (precision == "s") my_function = std::unique_ptr< cusparseFunc >(new xSpMSpM< float >( timer, explicit_zeroes )); else if (precision == "d") // Currently not supported my_function = std::unique_ptr< cusparseFunc >(new xSpMSpM< double >( timer, explicit_zeroes )); else { std::cerr << "Unknown spmspm precison" << std::endl; return -1; } } else if( boost::iequals( function, "Csr2Dense" ) ) { if( precision == "s" ) my_function = std::unique_ptr< cusparseFunc >( new xCsr2Dense< float >( timer, explicit_zeroes ) ); else if( precision == "d" ) my_function = std::unique_ptr< cusparseFunc >( new xCsr2Dense< double >( timer, explicit_zeroes ) ); else { std::cerr << "Unknown xCsr2Dense precision" << std::endl; return -1; } } else if (boost::iequals(function, "Csr2Coo")) { if (precision == "s") { my_function = std::unique_ptr< cusparseFunc >(new xCsr2Coo< float >( timer, explicit_zeroes )); } else if (precision == "d") { my_function = std::unique_ptr< cusparseFunc >(new xCsr2Coo< double >( timer, explicit_zeroes )); } else { std::cerr << "Unknown xCsr2Coo precision" << std::endl; return -1; } } else if (boost::iequals(function, "Dense2Csr")) { if (precision == "s") { my_function = std::unique_ptr< cusparseFunc >(new xDense2Csr< float >( timer, explicit_zeroes )); } else if (precision == "d") { my_function = std::unique_ptr< cusparseFunc >(new xDense2Csr< double >( timer, explicit_zeroes )); } else { std::cerr << "Unknown xDense2Csr precision " << std::endl; return -1; } } else if( boost::iequals( function, "Coo2Csr" ) ) { if( precision == "s" ) { my_function = std::unique_ptr< cusparseFunc >( new xCoo2Csr< float >( timer, explicit_zeroes ) ); } else { my_function = std::unique_ptr< cusparseFunc >( new xCoo2Csr< double >( timer, explicit_zeroes ) ); } } else { std::cerr << "Benchmarking unknown function" << std::endl; return -1; } try { std::vector< fs::path > matrix_files = enumMatrices( root_dir ); for( auto& file : matrix_files ) { timer.Reset( ); std::string path = file.string( ); try { my_function->setup_buffer( alpha, beta, path ); } // I expect to catch trow from clsparseHeaderfromFile // If io_exception then we don't need to cleanup. // If runtime_exception is catched we are doomed! catch (clsparse::io_exception& io_exc) { std::cout << io_exc.what() << std::endl; continue; } my_function->initialize_cpu_buffer( ); my_function->initialize_gpu_buffer( ); for( int i = 0; i < profileCount; ++i ) { my_function->call_func( ); my_function->reset_gpu_write_buffer( ); } my_function->releaseGPUBuffer_deleteCPUBuffer( ); timer.pruneOutliers( 3.0 ); std::cout << "cuSPARSE matrix: " << path << std::endl; std::cout << "cuSPARSE kernel execution time < ns >: " << my_function->time_in_ns( ) << std::endl; std::cout << "cuSPARSE kernel execution < " << my_function->bandwidth_formula( ) << " >: " << my_function->bandwidth( ) << std::endl << std::endl; std::cout << "cuSPARSE kernel execution < " << my_function->gflops_formula( ) << " >: " << my_function->gflops( ) << std::endl << std::endl; } } catch( std::exception& exc ) { std::cerr << exc.what( ) << std::endl; return 1; } return 0; }
int main( int argc, char *argv[ ] ) { cl_double alpha, beta; clsparseIdx_t rows, columns; size_t profileCount; std::string function; std::string precision; std::string root_dir; po::options_description desc( "clSPARSE bench command line options" ); desc.add_options( ) ( "help,h", "produces this help message" ) ( "dirpath,d", po::value( &root_dir ), "Matrix directory" ) ( "alpha,a", po::value<cl_double>( &alpha )->default_value( 1.0f ), "specifies the scalar alpha" ) ( "beta,b", po::value<cl_double>( &beta )->default_value( 0.0f ), "specifies the scalar beta" ) ( "rows", po::value<clsparseIdx_t>( &rows )->default_value( 16 ), "specifies the number of rows for matrix data" ) ( "columns", po::value<clsparseIdx_t>( &columns )->default_value( 16 ), "specifies the number of columns for matrix data" ) ( "function,f", po::value<std::string>( &function )->default_value( "SpMdV" ), "Sparse functions to test. Options: " "SpMdV, SpMdM, SpMSpM, CG, BiCGStab, Csr2Dense, Dense2Csr, Csr2Coo, Coo2Csr" ) ( "precision,r", po::value<std::string>( &precision )->default_value( "s" ), "Options: s,d,c,z" ) ( "profile,p", po::value<size_t>( &profileCount )->default_value( 20 ), "Number of times to run the desired test function" ) ( "extended,e", po::bool_switch()->default_value(false), "Use compensated summation to improve accuracy by emulating extended precision" ) ( "no_zeroes,z", po::bool_switch()->default_value(false), "Disable reading explicit zeroes from the input matrix market file.") ; po::variables_map vm; po::store( po::parse_command_line( argc, argv, desc ), vm ); po::notify( vm ); if( vm.count( "help" ) ) { std::cout << desc << std::endl; return 0; } if( precision != "s" && precision != "d" ) // && precision != "c" && precision != "z" ) { std::cerr << "Invalid value for --precision" << std::endl; return -1; } if( vm.count( "dirpath" ) == 0 ) { std::cerr << "The [" << "root" << "] parameter is missing!" << std::endl; std::cerr << desc << std::endl; return false; } // Discover and load the timer module if present void* timerLibHandle = LoadSharedLibrary( "lib", "clsparseTimer", false ); if( timerLibHandle == NULL ) { std::cerr << "Could not find the external timing library; timings disabled" << std::endl; } cl_bool extended_precision = false; if (vm["extended"].as<bool>()) extended_precision = true; cl_bool explicit_zeroes = true; if (vm["no_zeroes"].as<bool>()) explicit_zeroes = false; // Timer module discovered and loaded successfully // Initialize function pointers to call into the shared module void* funcPtr = LoadFunctionAddr( timerLibHandle, "clsparseGetTimer" ); PFCLSPARSETIMER sparseGetTimer = *static_cast<PFCLSPARSETIMER*>( static_cast<void*>( &funcPtr ) ); std::unique_ptr< clsparseFunc > my_function; if( boost::iequals( function, "SpMdV" ) ) { if( precision == "s" ) my_function = std::unique_ptr< clsparseFunc >( new xSpMdV< float >( sparseGetTimer, profileCount, extended_precision, CL_DEVICE_TYPE_GPU, explicit_zeroes ) ); else if( precision == "d" ) my_function = std::unique_ptr< clsparseFunc >( new xSpMdV< double >( sparseGetTimer, profileCount, extended_precision, CL_DEVICE_TYPE_GPU, explicit_zeroes ) ); else { std::cerr << "Unknown spmdv precision" << std::endl; return -1; } } else if( boost::iequals( function, "CG" ) ) { if( precision == "s" ) my_function = std::unique_ptr< clsparseFunc >( new xCG< float >( sparseGetTimer, profileCount, CL_DEVICE_TYPE_GPU, explicit_zeroes ) ); else my_function = std::unique_ptr< clsparseFunc >( new xCG< double >( sparseGetTimer, profileCount, CL_DEVICE_TYPE_GPU, explicit_zeroes ) ); } else if( boost::iequals( function, "BiCGStab" ) ) { if( precision == "s" ) my_function = std::unique_ptr< clsparseFunc >( new xBiCGStab< float >( sparseGetTimer, profileCount, CL_DEVICE_TYPE_GPU, explicit_zeroes ) ); else my_function = std::unique_ptr< clsparseFunc >( new xBiCGStab< double >( sparseGetTimer, profileCount, CL_DEVICE_TYPE_GPU, explicit_zeroes ) ); } else if( boost::iequals( function, "SpMdM" ) ) { if( precision == "s" ) my_function = std::unique_ptr< clsparseFunc >( new xSpMdM< cl_float >( sparseGetTimer, profileCount, CL_DEVICE_TYPE_GPU, columns, explicit_zeroes ) ); else my_function = std::unique_ptr< clsparseFunc >( new xSpMdM< cl_double >( sparseGetTimer, profileCount, CL_DEVICE_TYPE_GPU, columns, explicit_zeroes ) ); } else if (boost::iequals(function, "SpMSpM")) { if (precision == "s") my_function = std::unique_ptr< clsparseFunc>(new xSpMSpM< cl_float >( sparseGetTimer, profileCount, CL_DEVICE_TYPE_GPU, explicit_zeroes ) ); else my_function = std::unique_ptr< clsparseFunc >(new xSpMSpM< cl_double >(sparseGetTimer, profileCount, CL_DEVICE_TYPE_GPU, explicit_zeroes ) ); } else if( boost::iequals( function, "Coo2Csr" ) ) { if( precision == "s" ) my_function = std::unique_ptr< clsparseFunc >( new xCoo2Csr< float >( sparseGetTimer, profileCount, CL_DEVICE_TYPE_GPU, explicit_zeroes ) ); else my_function = std::unique_ptr< clsparseFunc >( new xCoo2Csr< double >( sparseGetTimer, profileCount, CL_DEVICE_TYPE_GPU, explicit_zeroes ) ); } else if( boost::iequals( function, "Dense2Csr" ) ) { if( precision == "s" ) my_function = std::unique_ptr< clsparseFunc >( new xDense2Csr< float >( sparseGetTimer, profileCount, CL_DEVICE_TYPE_GPU, explicit_zeroes ) ); else my_function = std::unique_ptr< clsparseFunc >( new xDense2Csr< double >( sparseGetTimer, profileCount, CL_DEVICE_TYPE_GPU, explicit_zeroes ) ); } else if( boost::iequals( function, "Csr2Dense" ) ) { if( precision == "s" ) my_function = std::unique_ptr< clsparseFunc >( new xCsr2Dense< cl_float >( sparseGetTimer, profileCount, CL_DEVICE_TYPE_GPU, explicit_zeroes ) ); else my_function = std::unique_ptr< clsparseFunc >( new xCsr2Dense< cl_double >( sparseGetTimer, profileCount, CL_DEVICE_TYPE_GPU, explicit_zeroes ) ); } else if( boost::iequals( function, "Csr2Coo" ) ) { if( precision == "s" ) my_function = std::unique_ptr< clsparseFunc >( new xCsr2Coo< cl_float >( sparseGetTimer, profileCount, CL_DEVICE_TYPE_GPU, explicit_zeroes ) ); else my_function = std::unique_ptr< clsparseFunc >( new xCsr2Coo< cl_double >( sparseGetTimer, profileCount, CL_DEVICE_TYPE_GPU, explicit_zeroes ) ); } else { std::cerr << "Benchmarking unknown function" << std::endl; return -1; } try { std::vector< fs::path > matrix_files = enumMatrices( root_dir ); for( auto& file : matrix_files ) { std::string path = file.string( ); try { my_function->setup_buffer( alpha, beta, path ); } // I expect to catch trow from clsparseHeaderfromFile // If io_exception then we don't need to cleanup. // If runtime_exception is catched we are doomed! catch( clsparse::io_exception& io_exc ) { std::cout << io_exc.what( ) << std::endl; continue; } my_function->initialize_cpu_buffer( ); my_function->initialize_gpu_buffer( ); for( int i = 0; i < profileCount; ++i ) { my_function->call_func( ); my_function->reset_gpu_write_buffer( ); } my_function->cleanup( ); //std::cout << "clSPARSE kernel execution time < ns >: " << my_function->time_in_ns( ) << std::endl; //std::cout << "clSPARSE kernel execution Gflops < " << // my_function->bandwidth_formula( ) << " >: " << my_function->bandwidth( ) << std::endl << std::endl; } } catch( std::exception& exc ) { std::cerr << exc.what( ) << std::endl; return 1; } FreeSharedLibrary( timerLibHandle ); return 0; }