int run_vector_benchmark(test_config & config, viennacl::io::parameter_database& paras) { typedef viennacl::scalar<ScalarType> VCLScalar; typedef viennacl::vector<ScalarType> VCLVector; //////////////////////////////////////////////////////////////////// //set up a little bit of data to play with: //ScalarType std_result = 0; ScalarType std_factor1 = static_cast<ScalarType>(3.1415); ScalarType std_factor2 = static_cast<ScalarType>(42.0); viennacl::scalar<ScalarType> vcl_factor1(std_factor1); viennacl::scalar<ScalarType> vcl_factor2(std_factor2); std::vector<ScalarType> std_vec1(BENCHMARK_VECTOR_SIZE); //used to set all values to zero VCLVector vcl_vec1(BENCHMARK_VECTOR_SIZE); VCLVector vcl_vec2(BENCHMARK_VECTOR_SIZE); VCLVector vcl_vec3(BENCHMARK_VECTOR_SIZE); viennacl::copy(std_vec1, vcl_vec1); //initialize vectors with all zeros (no need to worry about overflows then) viennacl::copy(std_vec1, vcl_vec2); //initialize vectors with all zeros (no need to worry about overflows then) typedef test_data<VCLScalar, VCLVector> TestDataType; test_data<VCLScalar, VCLVector> data(vcl_factor1, vcl_vec1, vcl_vec2, vcl_vec3); ////////////////////////////////////////////////////////// ///////////// Start parameter recording ///////////////// ////////////////////////////////////////////////////////// typedef std::map< double, std::pair<unsigned int, unsigned int> > TimingType; std::map< std::string, TimingType > all_timings; // vector addition std::cout << "------- Related to vector addition ----------" << std::endl; config.kernel_name("add"); optimize_full(paras, all_timings[config.kernel_name()], vector_add<TestDataType>, config, data); config.kernel_name("inplace_add"); optimize_full(paras, all_timings[config.kernel_name()], vector_inplace_add<TestDataType>, config, data); config.kernel_name("mul_add"); optimize_full(paras, all_timings[config.kernel_name()], vector_mul_add<TestDataType>, config, data); config.kernel_name("cpu_mul_add"); optimize_full(paras, all_timings[config.kernel_name()], vector_cpu_mul_add<TestDataType>, config, data); config.kernel_name("inplace_mul_add"); optimize_full(paras, all_timings[config.kernel_name()], vector_inplace_mul_add<TestDataType>, config, data); config.kernel_name("cpu_inplace_mul_add"); optimize_full(paras, all_timings[config.kernel_name()], vector_cpu_inplace_mul_add<TestDataType>, config, data); config.kernel_name("inplace_div_add"); optimize_full(paras, all_timings[config.kernel_name()], vector_inplace_div_add<TestDataType>, config, data); std::cout << "------- Related to vector subtraction ----------" << std::endl; config.kernel_name("sub"); optimize_full(paras, all_timings[config.kernel_name()], vector_sub<TestDataType>, config, data); config.kernel_name("inplace_sub"); optimize_full(paras, all_timings[config.kernel_name()], vector_inplace_sub<TestDataType>, config, data); config.kernel_name("mul_sub"); optimize_full(paras, all_timings[config.kernel_name()], vector_mul_sub<TestDataType>, config, data); config.kernel_name("inplace_mul_sub"); optimize_full(paras, all_timings[config.kernel_name()], vector_inplace_mul_sub<TestDataType>, config, data); config.kernel_name("inplace_div_sub"); optimize_full(paras, all_timings[config.kernel_name()], vector_inplace_div_sub<TestDataType>, config, data); std::cout << "------- Related to vector scaling (mult/div) ----------" << std::endl; config.kernel_name("mult"); optimize_full(paras, all_timings[config.kernel_name()], vector_mult<TestDataType>, config, data); config.kernel_name("inplace_mult"); optimize_full(paras, all_timings[config.kernel_name()], vector_inplace_mult<TestDataType>, config, data); config.kernel_name("cpu_mult"); optimize_full(paras, all_timings[config.kernel_name()], vector_cpu_mult<TestDataType>, config, data); config.kernel_name("cpu_inplace_mult"); optimize_full(paras, all_timings[config.kernel_name()], vector_cpu_inplace_mult<TestDataType>, config, data); config.kernel_name("divide"); optimize_full(paras, all_timings[config.kernel_name()], vector_divide<TestDataType>, config, data); config.kernel_name("inplace_divide"); optimize_full(paras, all_timings[config.kernel_name()], vector_inplace_divide<TestDataType>, config, data); std::cout << "------- Others ----------" << std::endl; config.kernel_name("inner_prod"); optimize_full(paras, all_timings[config.kernel_name()], vector_inner_prod<TestDataType>, config, data); config.kernel_name("swap"); optimize_full(paras, all_timings[config.kernel_name()], vector_swap<TestDataType>, config, data); config.kernel_name("clear"); optimize_full(paras, all_timings[config.kernel_name()], vector_clear<TestDataType>, config, data); config.kernel_name("plane_rotation"); optimize_full(paras, all_timings[config.kernel_name()], vector_plane_rotation<TestDataType>, config, data); //config.max_work_groups(32); //otherwise failures on 8500 GT config.kernel_name("norm_1"); optimize_restricted(paras, all_timings[config.kernel_name()], vector_norm_1<TestDataType>, config, data); config.kernel_name("norm_2"); optimize_restricted(paras, all_timings[config.kernel_name()], vector_norm_2<TestDataType>, config, data); config.kernel_name("norm_inf"); optimize_restricted(paras, all_timings[config.kernel_name()], vector_norm_inf<TestDataType>, config, data); //restricted optimizations: config.kernel_name("index_norm_inf"); optimize_restricted(paras, all_timings[config.kernel_name()], vector_index_norm_inf<TestDataType>, config, data); return 0; }
int run_matrix_benchmark(test_config & config, viennacl::io::parameter_database& paras) { typedef viennacl::scalar<ScalarType> VCLScalar; typedef viennacl::vector<ScalarType> VCLVector; typedef viennacl::matrix<ScalarType> VCLMatrix; //////////////////////////////////////////////////////////////////// //set up a little bit of data to play with: //ScalarType std_result = 0; ScalarType std_factor1 = static_cast<ScalarType>(3.1415); ScalarType std_factor2 = static_cast<ScalarType>(42.0); viennacl::scalar<ScalarType> vcl_factor1(std_factor1); viennacl::scalar<ScalarType> vcl_factor2(std_factor2); std::vector<ScalarType> std_vec1(BENCHMARK_MATRIX_SIZE); //used to set all values to zero std::vector< std::vector<ScalarType> > stl_mat(BENCHMARK_MATRIX_SIZE); //store identity matrix here VCLVector vcl_vec1(BENCHMARK_MATRIX_SIZE); VCLVector vcl_vec2(BENCHMARK_MATRIX_SIZE); VCLMatrix vcl_mat(BENCHMARK_MATRIX_SIZE, BENCHMARK_MATRIX_SIZE); for (int i=0; i<BENCHMARK_MATRIX_SIZE; ++i) { stl_mat[i].resize(BENCHMARK_MATRIX_SIZE); stl_mat[i][i] = 1.0; } copy(std_vec1, vcl_vec1); //initialize vectors with all zeros (no need to worry about overflows then) copy(std_vec1, vcl_vec2); //initialize vectors with all zeros (no need to worry about overflows then) copy(stl_mat, vcl_mat); typedef test_data<VCLScalar, VCLVector, VCLMatrix> TestDataType; test_data<VCLScalar, VCLVector, VCLMatrix> data(vcl_factor1, vcl_vec1, vcl_vec2, vcl_mat); ////////////////////////////////////////////////////////// ///////////// Start parameter recording ///////////////// ////////////////////////////////////////////////////////// typedef std::map< double, std::pair<unsigned int, unsigned int> > TimingType; std::map< std::string, TimingType > all_timings; std::cout << "------- Related to direct solution algorithms ----------" << std::endl; config.kernel_name("trans_lower_triangular_substitute_inplace"); optimize_restricted(paras, all_timings[config.kernel_name()], matrix_trans_lower_triangular_substitute_inplace<TestDataType>, config, data); config.kernel_name("lower_triangular_substitute_inplace"); optimize_restricted(paras, all_timings[config.kernel_name()], matrix_lower_triangular_substitute_inplace<TestDataType>, config, data); config.kernel_name("unit_lower_triangular_substitute_inplace"); optimize_restricted(paras, all_timings[config.kernel_name()], matrix_unit_lower_triangular_substitute_inplace<TestDataType>, config, data); config.kernel_name("upper_triangular_substitute_inplace"); optimize_restricted(paras, all_timings[config.kernel_name()], matrix_upper_triangular_substitute_inplace<TestDataType>, config, data); config.kernel_name("trans_upper_triangular_substitute_inplace"); optimize_restricted(paras, all_timings[config.kernel_name()], matrix_trans_upper_triangular_substitute_inplace<TestDataType>, config, data); config.kernel_name("unit_upper_triangular_substitute_inplace"); optimize_restricted(paras, all_timings[config.kernel_name()], matrix_unit_upper_triangular_substitute_inplace<TestDataType>, config, data); config.kernel_name("lu_factorize"); optimize_restricted(paras, all_timings[config.kernel_name()], matrix_lu_factorize<TestDataType>, config, data); //other kernels: std::cout << "------- Related to other operations ----------" << std::endl; config.kernel_name("rank1_update"); optimize_restricted(paras, all_timings[config.kernel_name()], matrix_rank1_update<TestDataType>, config, data); config.kernel_name("scaled_rank1_update"); optimize_restricted(paras, all_timings[config.kernel_name()], matrix_scaled_rank1_update<TestDataType>, config, data); config.kernel_name("vec_mul"); optimize_restricted(paras, all_timings[config.kernel_name()], matrix_vec_mul<TestDataType>, config, data); config.kernel_name("trans_vec_mul"); optimize_restricted(paras, all_timings[config.kernel_name()], matrix_trans_vec_mul<TestDataType>, config, data); return 0; }
int run_matrix_benchmark(test_config & config, viennacl::io::parameter_database& paras) { typedef viennacl::scalar<ScalarType> VCLScalar; typedef viennacl::vector<ScalarType> VCLVector; typedef viennacl::compressed_matrix<ScalarType> VCLMatrix; //////////////////////////////////////////////////////////////////// //set up a little bit of data to play with: //ScalarType std_result = 0; ScalarType std_factor1 = static_cast<ScalarType>(3.1415); ScalarType std_factor2 = static_cast<ScalarType>(42.0); viennacl::scalar<ScalarType> vcl_factor1(std_factor1); viennacl::scalar<ScalarType> vcl_factor2(std_factor2); std::vector<ScalarType> std_vec1(BENCHMARK_MATRIX_SIZE); //used to set all values to zero std::vector< std::map< unsigned int, ScalarType> > stl_mat(BENCHMARK_MATRIX_SIZE); //store identity matrix here VCLVector vcl_vec1(BENCHMARK_MATRIX_SIZE); VCLVector vcl_vec2(BENCHMARK_MATRIX_SIZE); VCLMatrix vcl_mat(BENCHMARK_MATRIX_SIZE, BENCHMARK_MATRIX_SIZE); for (int i=0; i<BENCHMARK_MATRIX_SIZE; ++i) { if (i > 10) { stl_mat[i][i - 10] = 1.0; stl_mat[i][i - 7] = 1.0; stl_mat[i][i - 4] = 1.0; stl_mat[i][i - 2] = 1.0; } stl_mat[i][i] = 1.0; if (i + 10 < BENCHMARK_MATRIX_SIZE) { stl_mat[i][i + 5] = 1.0; stl_mat[i][i + 7] = 1.0; stl_mat[i][i + 9] = 1.0; stl_mat[i][i + 10] = 1.0; } } viennacl::copy(std_vec1, vcl_vec1); //initialize vectors with all zeros (no need to worry about overflows then) viennacl::copy(std_vec1, vcl_vec2); //initialize vectors with all zeros (no need to worry about overflows then) viennacl::copy(stl_mat, vcl_mat); typedef test_data<VCLScalar, VCLVector, VCLMatrix> TestDataType; test_data<VCLScalar, VCLVector, VCLMatrix> data(vcl_factor1, vcl_vec1, vcl_vec2, vcl_mat); ////////////////////////////////////////////////////////// ///////////// Start parameter recording ///////////////// ////////////////////////////////////////////////////////// typedef std::map< double, std::pair<unsigned int, unsigned int> > TimingType; std::map< std::string, TimingType > all_timings; //other kernels: std::cout << "------- Related to other operations ----------" << std::endl; config.kernel_name("vec_mul"); optimize_full(paras, all_timings[config.kernel_name()], matrix_vec_mul<TestDataType>, config, data); return 0; }