Example #1
0
int run_vector_benchmark(test_config & config, viennacl::io::parameter_database& paras)
{
  typedef viennacl::scalar<ScalarType>   VCLScalar;
  typedef viennacl::vector<ScalarType>   VCLVector;
   
  ////////////////////////////////////////////////////////////////////
  //set up a little bit of data to play with:
  //ScalarType std_result = 0;
   
  ScalarType std_factor1 = static_cast<ScalarType>(3.1415);
  ScalarType std_factor2 = static_cast<ScalarType>(42.0);
  viennacl::scalar<ScalarType> vcl_factor1(std_factor1);
  viennacl::scalar<ScalarType> vcl_factor2(std_factor2);
  
  std::vector<ScalarType> std_vec1(BENCHMARK_VECTOR_SIZE);  //used to set all values to zero
  VCLVector vcl_vec1(BENCHMARK_VECTOR_SIZE);
  VCLVector vcl_vec2(BENCHMARK_VECTOR_SIZE); 
  VCLVector vcl_vec3(BENCHMARK_VECTOR_SIZE); 

  viennacl::copy(std_vec1, vcl_vec1); //initialize vectors with all zeros (no need to worry about overflows then)
  viennacl::copy(std_vec1, vcl_vec2); //initialize vectors with all zeros (no need to worry about overflows then)
  
  typedef test_data<VCLScalar, VCLVector>   TestDataType;
  test_data<VCLScalar, VCLVector> data(vcl_factor1, vcl_vec1, vcl_vec2, vcl_vec3);

  //////////////////////////////////////////////////////////
  ///////////// Start parameter recording  /////////////////
  //////////////////////////////////////////////////////////
  
  typedef std::map< double, std::pair<unsigned int, unsigned int> >   TimingType;
  std::map< std::string, TimingType > all_timings;
  
  // vector addition  
  std::cout << "------- Related to vector addition ----------" << std::endl;
  config.kernel_name("add");                    optimize_full(paras, all_timings[config.kernel_name()], vector_add<TestDataType>, config, data);
  config.kernel_name("inplace_add");            optimize_full(paras, all_timings[config.kernel_name()], vector_inplace_add<TestDataType>, config, data);
  config.kernel_name("mul_add");                optimize_full(paras, all_timings[config.kernel_name()], vector_mul_add<TestDataType>, config, data);
  config.kernel_name("cpu_mul_add");            optimize_full(paras, all_timings[config.kernel_name()], vector_cpu_mul_add<TestDataType>, config, data);
  config.kernel_name("inplace_mul_add");        optimize_full(paras, all_timings[config.kernel_name()], vector_inplace_mul_add<TestDataType>, config, data);
  config.kernel_name("cpu_inplace_mul_add");    optimize_full(paras, all_timings[config.kernel_name()], vector_cpu_inplace_mul_add<TestDataType>, config, data);
  config.kernel_name("inplace_div_add");        optimize_full(paras, all_timings[config.kernel_name()], vector_inplace_div_add<TestDataType>, config, data);

  std::cout << "------- Related to vector subtraction ----------" << std::endl;
  config.kernel_name("sub");                    optimize_full(paras, all_timings[config.kernel_name()], vector_sub<TestDataType>, config, data);
  config.kernel_name("inplace_sub");            optimize_full(paras, all_timings[config.kernel_name()], vector_inplace_sub<TestDataType>, config, data);
  config.kernel_name("mul_sub");                optimize_full(paras, all_timings[config.kernel_name()], vector_mul_sub<TestDataType>, config, data);
  config.kernel_name("inplace_mul_sub");        optimize_full(paras, all_timings[config.kernel_name()], vector_inplace_mul_sub<TestDataType>, config, data);
  config.kernel_name("inplace_div_sub");        optimize_full(paras, all_timings[config.kernel_name()], vector_inplace_div_sub<TestDataType>, config, data);

  std::cout << "------- Related to vector scaling (mult/div) ----------" << std::endl;
  config.kernel_name("mult");                   optimize_full(paras, all_timings[config.kernel_name()], vector_mult<TestDataType>, config, data);
  config.kernel_name("inplace_mult");           optimize_full(paras, all_timings[config.kernel_name()], vector_inplace_mult<TestDataType>, config, data);
  config.kernel_name("cpu_mult");               optimize_full(paras, all_timings[config.kernel_name()], vector_cpu_mult<TestDataType>, config, data);
  config.kernel_name("cpu_inplace_mult");       optimize_full(paras, all_timings[config.kernel_name()], vector_cpu_inplace_mult<TestDataType>, config, data);
  config.kernel_name("divide");                 optimize_full(paras, all_timings[config.kernel_name()], vector_divide<TestDataType>, config, data);
  config.kernel_name("inplace_divide");         optimize_full(paras, all_timings[config.kernel_name()], vector_inplace_divide<TestDataType>, config, data);
 
  std::cout << "------- Others ----------" << std::endl;
  config.kernel_name("inner_prod");             optimize_full(paras, all_timings[config.kernel_name()], vector_inner_prod<TestDataType>, config, data);
  config.kernel_name("swap");                   optimize_full(paras, all_timings[config.kernel_name()], vector_swap<TestDataType>, config, data);
  config.kernel_name("clear");                  optimize_full(paras, all_timings[config.kernel_name()], vector_clear<TestDataType>, config, data);
  config.kernel_name("plane_rotation");         optimize_full(paras, all_timings[config.kernel_name()], vector_plane_rotation<TestDataType>, config, data);
  
  //config.max_work_groups(32); //otherwise failures on 8500 GT
  config.kernel_name("norm_1");                 optimize_restricted(paras, all_timings[config.kernel_name()], vector_norm_1<TestDataType>, config, data);
  config.kernel_name("norm_2");                 optimize_restricted(paras, all_timings[config.kernel_name()], vector_norm_2<TestDataType>, config, data);
  config.kernel_name("norm_inf");               optimize_restricted(paras, all_timings[config.kernel_name()], vector_norm_inf<TestDataType>, config, data);

  
  //restricted optimizations:
  config.kernel_name("index_norm_inf");         optimize_restricted(paras, all_timings[config.kernel_name()], vector_index_norm_inf<TestDataType>, config, data);
  
  
  return 0;
}
Example #2
0
int run_matrix_benchmark(test_config & config, viennacl::io::parameter_database& paras)
{
  typedef viennacl::scalar<ScalarType>   VCLScalar;
  typedef viennacl::vector<ScalarType>   VCLVector;
  typedef viennacl::matrix<ScalarType>   VCLMatrix;
   
  ////////////////////////////////////////////////////////////////////
  //set up a little bit of data to play with:
  //ScalarType std_result = 0;
   
  ScalarType std_factor1 = static_cast<ScalarType>(3.1415);
  ScalarType std_factor2 = static_cast<ScalarType>(42.0);
  viennacl::scalar<ScalarType> vcl_factor1(std_factor1);
  viennacl::scalar<ScalarType> vcl_factor2(std_factor2);
  
  std::vector<ScalarType> std_vec1(BENCHMARK_MATRIX_SIZE);  //used to set all values to zero
  std::vector< std::vector<ScalarType> > stl_mat(BENCHMARK_MATRIX_SIZE);  //store identity matrix here
  VCLVector vcl_vec1(BENCHMARK_MATRIX_SIZE);
  VCLVector vcl_vec2(BENCHMARK_MATRIX_SIZE);
  VCLMatrix vcl_mat(BENCHMARK_MATRIX_SIZE, BENCHMARK_MATRIX_SIZE);
  
  for (int i=0; i<BENCHMARK_MATRIX_SIZE; ++i)
  {
    stl_mat[i].resize(BENCHMARK_MATRIX_SIZE);
    stl_mat[i][i] = 1.0;
  }

  copy(std_vec1, vcl_vec1); //initialize vectors with all zeros (no need to worry about overflows then)
  copy(std_vec1, vcl_vec2); //initialize vectors with all zeros (no need to worry about overflows then)
  copy(stl_mat, vcl_mat);
  
  typedef test_data<VCLScalar, VCLVector, VCLMatrix>   TestDataType;
  test_data<VCLScalar, VCLVector, VCLMatrix> data(vcl_factor1, vcl_vec1, vcl_vec2, vcl_mat);

  //////////////////////////////////////////////////////////
  ///////////// Start parameter recording  /////////////////
  //////////////////////////////////////////////////////////
  
  typedef std::map< double, std::pair<unsigned int, unsigned int> >   TimingType;
  std::map< std::string, TimingType > all_timings;
  
  std::cout << "------- Related to direct solution algorithms ----------" << std::endl;
  config.kernel_name("trans_lower_triangular_substitute_inplace");
  optimize_restricted(paras, all_timings[config.kernel_name()],
                      matrix_trans_lower_triangular_substitute_inplace<TestDataType>, config, data);

  config.kernel_name("lower_triangular_substitute_inplace");
  optimize_restricted(paras, all_timings[config.kernel_name()],
                      matrix_lower_triangular_substitute_inplace<TestDataType>, config, data);

  config.kernel_name("unit_lower_triangular_substitute_inplace");
  optimize_restricted(paras, all_timings[config.kernel_name()],
                      matrix_unit_lower_triangular_substitute_inplace<TestDataType>, config, data);

  config.kernel_name("upper_triangular_substitute_inplace");
  optimize_restricted(paras, all_timings[config.kernel_name()],
                      matrix_upper_triangular_substitute_inplace<TestDataType>, config, data);

  config.kernel_name("trans_upper_triangular_substitute_inplace");
  optimize_restricted(paras, all_timings[config.kernel_name()],
                      matrix_trans_upper_triangular_substitute_inplace<TestDataType>, config, data);

  config.kernel_name("unit_upper_triangular_substitute_inplace");
  optimize_restricted(paras, all_timings[config.kernel_name()],
                      matrix_unit_upper_triangular_substitute_inplace<TestDataType>, config, data);

  config.kernel_name("lu_factorize");
  optimize_restricted(paras, all_timings[config.kernel_name()],
                      matrix_lu_factorize<TestDataType>, config, data);

  //other kernels:
  std::cout << "------- Related to other operations ----------" << std::endl;
  config.kernel_name("rank1_update");
  optimize_restricted(paras, all_timings[config.kernel_name()],
                      matrix_rank1_update<TestDataType>, config, data);

  config.kernel_name("scaled_rank1_update");
  optimize_restricted(paras, all_timings[config.kernel_name()],
                      matrix_scaled_rank1_update<TestDataType>, config, data);

  config.kernel_name("vec_mul");
  optimize_restricted(paras, all_timings[config.kernel_name()],
                      matrix_vec_mul<TestDataType>, config, data);

  config.kernel_name("trans_vec_mul");
  optimize_restricted(paras, all_timings[config.kernel_name()],
                      matrix_trans_vec_mul<TestDataType>, config, data);

  return 0;
}
Example #3
0
int run_matrix_benchmark(test_config & config, viennacl::io::parameter_database& paras)
{
  typedef viennacl::scalar<ScalarType>   VCLScalar;
  typedef viennacl::vector<ScalarType>   VCLVector;
  typedef viennacl::compressed_matrix<ScalarType>   VCLMatrix;
   
  ////////////////////////////////////////////////////////////////////
  //set up a little bit of data to play with:
  //ScalarType std_result = 0;
   
  ScalarType std_factor1 = static_cast<ScalarType>(3.1415);
  ScalarType std_factor2 = static_cast<ScalarType>(42.0);
  viennacl::scalar<ScalarType> vcl_factor1(std_factor1);
  viennacl::scalar<ScalarType> vcl_factor2(std_factor2);
  
  std::vector<ScalarType> std_vec1(BENCHMARK_MATRIX_SIZE);  //used to set all values to zero
  std::vector< std::map< unsigned int, ScalarType> > stl_mat(BENCHMARK_MATRIX_SIZE);  //store identity matrix here
  VCLVector vcl_vec1(BENCHMARK_MATRIX_SIZE);
  VCLVector vcl_vec2(BENCHMARK_MATRIX_SIZE);
  VCLMatrix vcl_mat(BENCHMARK_MATRIX_SIZE, BENCHMARK_MATRIX_SIZE);
  
  for (int i=0; i<BENCHMARK_MATRIX_SIZE; ++i)
  {
      if (i > 10)
      {
          stl_mat[i][i - 10] = 1.0;
          stl_mat[i][i - 7] = 1.0;
          stl_mat[i][i - 4] = 1.0;
          stl_mat[i][i - 2] = 1.0;
      }
      stl_mat[i][i] = 1.0;
      if (i + 10 < BENCHMARK_MATRIX_SIZE)
      {
          stl_mat[i][i + 5] = 1.0;
          stl_mat[i][i + 7] = 1.0;
          stl_mat[i][i + 9] = 1.0;
          stl_mat[i][i + 10] = 1.0;
      }
  }

  viennacl::copy(std_vec1, vcl_vec1); //initialize vectors with all zeros (no need to worry about overflows then)
  viennacl::copy(std_vec1, vcl_vec2); //initialize vectors with all zeros (no need to worry about overflows then)
  viennacl::copy(stl_mat, vcl_mat);
  
  typedef test_data<VCLScalar, VCLVector, VCLMatrix>   TestDataType;
  test_data<VCLScalar, VCLVector, VCLMatrix> data(vcl_factor1, vcl_vec1, vcl_vec2, vcl_mat);

  //////////////////////////////////////////////////////////
  ///////////// Start parameter recording  /////////////////
  //////////////////////////////////////////////////////////
  
  typedef std::map< double, std::pair<unsigned int, unsigned int> >   TimingType;
  std::map< std::string, TimingType > all_timings;
  

  //other kernels:
  std::cout << "------- Related to other operations ----------" << std::endl;

  config.kernel_name("vec_mul");
  optimize_full(paras, all_timings[config.kernel_name()],
                      matrix_vec_mul<TestDataType>, config, data);


  return 0;
}