Ejemplo n.º 1
0
int main(int argc, char *argv[])
{
  double alpha;
  double beta;
  int profileCount;
  //int transA_option;
  std::string function;
  std::string precision;
  std::string root_dir;

  po::options_description desc( "cuSPARSE bench command line options" );
  desc.add_options()
    ( "help,h", "produces this help message" )
    ( "dirpath,d", po::value( &root_dir ), "Matrix directory" )
    ( "alpha", po::value<double>( &alpha )->default_value( 1.0f ), "specifies the scalar alpha" )
    ( "beta", po::value<double>( &beta )->default_value( 0.0f ), "specifies the scalar beta" )
    //( "transposeA", po::value<int>( &transA_option )->default_value( 0 ), "0 = no transpose, 1 = transpose, 2 = conjugate transpose" )
    ( "function,f", po::value<std::string>( &function )->default_value( "SpMdV" ), "Sparse functions to test. Options: "
                "SpMdV, SpMSpM, Csr2Dense, Dense2Csr, Csr2Coo, Coo2Csr" )
    ( "precision,r", po::value<std::string>( &precision )->default_value( "s" ), "Options: s,d,c,z" )
    ( "profile,p", po::value<int>( &profileCount )->default_value( 20 ), "Time and report the kernel speed (default: profiling off)" )
    ( "no_zeroes,z", po::bool_switch()->default_value(false), "Disable reading explicit zeroes from the input matrix market file.")
    ;

  po::variables_map vm;
  po::store( po::parse_command_line( argc, argv, desc ), vm );
  po::notify( vm );

  if( vm.count( "help" ) )
  {
    std::cout << desc << std::endl;
    return 0;
  }

  if( precision != "s" && precision != "d" && precision != "c" && precision != "z" )
  {
    std::cerr << "Invalid value for --precision" << std::endl;
    return -1;
  }

  if( vm.count( "dirpath" ) == 0 )
  {
      std::cerr << "The [" << "root" << "] parameter is missing!" << std::endl;
      std::cerr << desc << std::endl;
      return false;
  }

  cl_bool explicit_zeroes = true;
  if (vm["no_zeroes"].as<bool>())
    explicit_zeroes = false;

  StatisticalTimer& timer = StatisticalTimer::getInstance( );
  timer.Reserve( 3, profileCount );
  timer.setNormalize( true );

  std::unique_ptr< cusparseFunc > my_function;
  if( boost::iequals( function, "SpMdV" ) )
  {
    if( precision == "s" )
        my_function = std::unique_ptr< cusparseFunc >( new xSpMdV< float >( timer, explicit_zeroes ) );
    else if( precision == "d" )
    //    my_function = std::make_unique< xSpMdV< double > >( timer );
      my_function = std::unique_ptr< cusparseFunc >( new xSpMdV< double >( timer, explicit_zeroes ) );
    else
    {
      std::cerr << "Unknown spmdv precision" << std::endl;
      return -1;
    }
  }
  else if (boost::iequals(function, "SpMSpM"))
  {
      if (precision == "s")
          my_function = std::unique_ptr< cusparseFunc >(new xSpMSpM< float >( timer, explicit_zeroes ));
      else if (precision == "d") // Currently not supported
          my_function = std::unique_ptr< cusparseFunc >(new xSpMSpM< double >( timer, explicit_zeroes ));
      else
      {
          std::cerr << "Unknown spmspm precison" << std::endl;
          return -1;
      }
  }
  else if( boost::iequals( function, "Csr2Dense" ) )
  {
      if( precision == "s" )
          my_function = std::unique_ptr< cusparseFunc >( new xCsr2Dense< float >( timer, explicit_zeroes ) );
      else if( precision == "d" )
          my_function = std::unique_ptr< cusparseFunc >( new xCsr2Dense< double >( timer, explicit_zeroes ) );
      else
      {
          std::cerr << "Unknown xCsr2Dense precision" << std::endl;
          return -1;
      }
  }
  else if (boost::iequals(function, "Csr2Coo"))
  {
      if (precision == "s")
      {
          my_function = std::unique_ptr< cusparseFunc >(new xCsr2Coo< float >( timer, explicit_zeroes ));
      }
      else if (precision == "d")
      {
          my_function = std::unique_ptr< cusparseFunc >(new xCsr2Coo< double >( timer, explicit_zeroes ));
      }
      else
      {
          std::cerr << "Unknown xCsr2Coo precision" << std::endl;
          return -1;
      }
  }
  else if (boost::iequals(function, "Dense2Csr"))
  {
      if (precision == "s")
      {
          my_function = std::unique_ptr< cusparseFunc >(new xDense2Csr< float >( timer, explicit_zeroes ));
      }
      else if (precision == "d")
      {
          my_function = std::unique_ptr< cusparseFunc >(new xDense2Csr< double >( timer, explicit_zeroes ));
      }
      else
      {
          std::cerr << "Unknown xDense2Csr precision " << std::endl;
          return -1;
      }
  }
  else if( boost::iequals( function, "Coo2Csr" ) )
  {
      if( precision == "s" )
      {
          my_function = std::unique_ptr< cusparseFunc >( new xCoo2Csr< float >( timer, explicit_zeroes ) );
      }
      else
      {
          my_function = std::unique_ptr< cusparseFunc >( new xCoo2Csr< double >( timer, explicit_zeroes ) );
      }
  }
  else
  {
    std::cerr << "Benchmarking unknown function" << std::endl;
    return -1;
  }

  try
  {
      std::vector< fs::path > matrix_files = enumMatrices( root_dir );

      for( auto& file : matrix_files )
      {
          timer.Reset( );

          std::string path = file.string( );
          try {
              my_function->setup_buffer( alpha, beta, path );
          }
          // I expect to catch trow from clsparseHeaderfromFile
          // If io_exception then we don't need to cleanup.
          // If runtime_exception is catched we are doomed!
          catch (clsparse::io_exception& io_exc)
          {
              std::cout << io_exc.what() << std::endl;
              continue;
          }
          my_function->initialize_cpu_buffer( );
          my_function->initialize_gpu_buffer( );

          for( int i = 0; i < profileCount; ++i )
          {
              my_function->call_func( );
              my_function->reset_gpu_write_buffer( );
          }
          my_function->releaseGPUBuffer_deleteCPUBuffer( );

        timer.pruneOutliers( 3.0 );
        std::cout << "cuSPARSE matrix: " << path << std::endl;
        std::cout << "cuSPARSE kernel execution time < ns >: " << my_function->time_in_ns( ) << std::endl;
        std::cout << "cuSPARSE kernel execution < " <<
            my_function->bandwidth_formula( ) << " >: " << my_function->bandwidth( ) << std::endl << std::endl;
        std::cout << "cuSPARSE kernel execution < " <<
            my_function->gflops_formula( ) << " >: " << my_function->gflops( ) << std::endl << std::endl;
      }

  }
  catch( std::exception& exc )
  {
      std::cerr << exc.what( ) << std::endl;
      return 1;
  }

  return 0;
}
Ejemplo n.º 2
0
int main( int argc, char *argv[ ] )
{
    cl_double alpha, beta;
    clsparseIdx_t rows, columns;
    size_t profileCount;
    std::string function;
    std::string precision;
    std::string root_dir;

    po::options_description desc( "clSPARSE bench command line options" );
    desc.add_options( )
        ( "help,h", "produces this help message" )
        ( "dirpath,d", po::value( &root_dir ), "Matrix directory" )
        ( "alpha,a", po::value<cl_double>( &alpha )->default_value( 1.0f ), "specifies the scalar alpha" )
        ( "beta,b", po::value<cl_double>( &beta )->default_value( 0.0f ), "specifies the scalar beta" )
        ( "rows", po::value<clsparseIdx_t>( &rows )->default_value( 16 ), "specifies the number of rows for matrix data" )
        ( "columns", po::value<clsparseIdx_t>( &columns )->default_value( 16 ), "specifies the number of columns for matrix data" )
        ( "function,f", po::value<std::string>( &function )->default_value( "SpMdV" ), "Sparse functions to test. Options: "
                    "SpMdV, SpMdM, SpMSpM, CG, BiCGStab, Csr2Dense, Dense2Csr, Csr2Coo, Coo2Csr" )
        ( "precision,r", po::value<std::string>( &precision )->default_value( "s" ), "Options: s,d,c,z" )
        ( "profile,p", po::value<size_t>( &profileCount )->default_value( 20 ), "Number of times to run the desired test function" )
        ( "extended,e", po::bool_switch()->default_value(false), "Use compensated summation to improve accuracy by emulating extended precision" )
        ( "no_zeroes,z", po::bool_switch()->default_value(false), "Disable reading explicit zeroes from the input matrix market file.")
        ;

    po::variables_map vm;
    po::store( po::parse_command_line( argc, argv, desc ), vm );
    po::notify( vm );

    if( vm.count( "help" ) )
    {
        std::cout << desc << std::endl;
        return 0;
    }

    if( precision != "s" && precision != "d" ) // && precision != "c" && precision != "z" )
    {
        std::cerr << "Invalid value for --precision" << std::endl;
        return -1;
    }

    if( vm.count( "dirpath" ) == 0 )
    {
        std::cerr << "The [" << "root" << "] parameter is missing!" << std::endl;
        std::cerr << desc << std::endl;
        return false;
    }

    //	Discover and load the timer module if present
    void* timerLibHandle = LoadSharedLibrary( "lib", "clsparseTimer", false );
    if( timerLibHandle == NULL )
    {
        std::cerr << "Could not find the external timing library; timings disabled" << std::endl;
    }

    cl_bool extended_precision = false;
    if (vm["extended"].as<bool>())
        extended_precision = true;
    cl_bool explicit_zeroes = true;
    if (vm["no_zeroes"].as<bool>())
        explicit_zeroes = false;

    //	Timer module discovered and loaded successfully
    //	Initialize function pointers to call into the shared module
    void* funcPtr = LoadFunctionAddr( timerLibHandle, "clsparseGetTimer" );
    PFCLSPARSETIMER sparseGetTimer = *static_cast<PFCLSPARSETIMER*>( static_cast<void*>( &funcPtr ) );

    std::unique_ptr< clsparseFunc > my_function;
    if( boost::iequals( function, "SpMdV" ) )
    {
        if( precision == "s" )
            my_function = std::unique_ptr< clsparseFunc >( new xSpMdV< float >( sparseGetTimer, profileCount, extended_precision, CL_DEVICE_TYPE_GPU, explicit_zeroes ) );
        else if( precision == "d" )
            my_function = std::unique_ptr< clsparseFunc >( new xSpMdV< double >( sparseGetTimer, profileCount, extended_precision, CL_DEVICE_TYPE_GPU, explicit_zeroes ) );
        else
        {
            std::cerr << "Unknown spmdv precision" << std::endl;
            return -1;
        }
    }
    else if( boost::iequals( function, "CG" ) )
    {
        if( precision == "s" )
            my_function = std::unique_ptr< clsparseFunc >( new xCG< float >( sparseGetTimer, profileCount, CL_DEVICE_TYPE_GPU, explicit_zeroes ) );
        else
            my_function = std::unique_ptr< clsparseFunc >( new xCG< double >( sparseGetTimer, profileCount, CL_DEVICE_TYPE_GPU, explicit_zeroes ) );
    }

    else if( boost::iequals( function, "BiCGStab" ) )
    {
        if( precision == "s" )
            my_function = std::unique_ptr< clsparseFunc >( new xBiCGStab< float >( sparseGetTimer, profileCount, CL_DEVICE_TYPE_GPU, explicit_zeroes ) );
        else
            my_function = std::unique_ptr< clsparseFunc >( new xBiCGStab< double >( sparseGetTimer, profileCount, CL_DEVICE_TYPE_GPU, explicit_zeroes ) );
    }
    else if( boost::iequals( function, "SpMdM" ) )
    {
        if( precision == "s" )
            my_function = std::unique_ptr< clsparseFunc >( new xSpMdM< cl_float >( sparseGetTimer, profileCount, CL_DEVICE_TYPE_GPU, columns, explicit_zeroes ) );
        else
            my_function = std::unique_ptr< clsparseFunc >( new xSpMdM< cl_double >( sparseGetTimer, profileCount, CL_DEVICE_TYPE_GPU, columns, explicit_zeroes ) );
    }
    else if (boost::iequals(function, "SpMSpM"))
    {
        if (precision == "s")
            my_function = std::unique_ptr< clsparseFunc>(new xSpMSpM< cl_float >( sparseGetTimer, profileCount, CL_DEVICE_TYPE_GPU, explicit_zeroes ) );
        else
            my_function = std::unique_ptr< clsparseFunc >(new xSpMSpM< cl_double >(sparseGetTimer, profileCount, CL_DEVICE_TYPE_GPU, explicit_zeroes ) );
    }
    else if( boost::iequals( function, "Coo2Csr" ) )
    {
        if( precision == "s" )
            my_function = std::unique_ptr< clsparseFunc >( new xCoo2Csr< float >( sparseGetTimer, profileCount, CL_DEVICE_TYPE_GPU, explicit_zeroes ) );
        else
            my_function = std::unique_ptr< clsparseFunc >( new xCoo2Csr< double >( sparseGetTimer, profileCount, CL_DEVICE_TYPE_GPU, explicit_zeroes ) );
    }
    else if( boost::iequals( function, "Dense2Csr" ) )
    {
        if( precision == "s" )
            my_function = std::unique_ptr< clsparseFunc >( new xDense2Csr< float >( sparseGetTimer, profileCount, CL_DEVICE_TYPE_GPU, explicit_zeroes ) );
        else
            my_function = std::unique_ptr< clsparseFunc >( new xDense2Csr< double >( sparseGetTimer, profileCount, CL_DEVICE_TYPE_GPU, explicit_zeroes ) );
    }
    else if( boost::iequals( function, "Csr2Dense" ) )
    {
        if( precision == "s" )
            my_function = std::unique_ptr< clsparseFunc >( new xCsr2Dense< cl_float >( sparseGetTimer, profileCount, CL_DEVICE_TYPE_GPU, explicit_zeroes ) );
        else
            my_function = std::unique_ptr< clsparseFunc >( new xCsr2Dense< cl_double >( sparseGetTimer, profileCount, CL_DEVICE_TYPE_GPU, explicit_zeroes ) );
    }
    else if( boost::iequals( function, "Csr2Coo" ) )
    {
        if( precision == "s" )
            my_function = std::unique_ptr< clsparseFunc >( new xCsr2Coo< cl_float >( sparseGetTimer, profileCount, CL_DEVICE_TYPE_GPU, explicit_zeroes ) );
        else
            my_function = std::unique_ptr< clsparseFunc >( new xCsr2Coo< cl_double >( sparseGetTimer, profileCount, CL_DEVICE_TYPE_GPU, explicit_zeroes ) );
    }
    else
    {
        std::cerr << "Benchmarking unknown function" << std::endl;
        return -1;
    }

    try
    {
        std::vector< fs::path > matrix_files = enumMatrices( root_dir );

        for( auto& file : matrix_files )
        {
            std::string path = file.string( );
            try {
                my_function->setup_buffer( alpha, beta, path );
            }
            // I expect to catch trow from clsparseHeaderfromFile
            // If io_exception then we don't need to cleanup.
            // If runtime_exception is catched we are doomed!
            catch( clsparse::io_exception& io_exc )
            {
                std::cout << io_exc.what( ) << std::endl;
                continue;
            }
            my_function->initialize_cpu_buffer( );
            my_function->initialize_gpu_buffer( );

            for( int i = 0; i < profileCount; ++i )
            {
                my_function->call_func( );
                my_function->reset_gpu_write_buffer( );
            }
            my_function->cleanup( );

            //std::cout << "clSPARSE kernel execution time < ns >: " << my_function->time_in_ns( ) << std::endl;
            //std::cout << "clSPARSE kernel execution Gflops < " <<
            //    my_function->bandwidth_formula( ) << " >: " << my_function->bandwidth( ) << std::endl << std::endl;
        }
    }
    catch( std::exception& exc )
    {
        std::cerr << exc.what( ) << std::endl;
        return 1;
    }

    FreeSharedLibrary( timerLibHandle );

    return 0;
}