double run_benchmark(size_t matrix_size) { // // One alternative: Put the matrices into a contiguous block of memory (allows to use viennacl::fast_copy(), avoiding temporary memory) // std::vector<ScalarType> stl_B(matrix_size * matrix_size); std::vector<ScalarType> stl_C(matrix_size * matrix_size); // // Fill the matrix // for (unsigned int i = 0; i < matrix_size; ++i) for (unsigned int j = 0; j < matrix_size; ++j) stl_B[i*matrix_size + j] = random<ScalarType>(); for (unsigned int i = 0; i < matrix_size; ++i) for (unsigned int j = 0; j < matrix_size; ++j) stl_C[i + j*matrix_size] = random<ScalarType>(); //viennacl::ocl::current_context().build_options("-cl-mad-enable -cl-fast-relaxed-math"); //uncomment for additional optimizations //viennacl::ocl::current_context().build_options("-cl-opt-disable"); //uncomment to get poor performance viennacl::matrix<ScalarType> vcl_A(matrix_size, matrix_size); viennacl::matrix<ScalarType,FB> vcl_B(matrix_size, matrix_size); viennacl::matrix<ScalarType,FC> vcl_C(matrix_size, matrix_size); typedef viennacl::generator::matrix< viennacl::matrix<ScalarType> > dma_t; typedef viennacl::generator::matrix< viennacl::matrix<ScalarType,FB> > dmb_t; typedef viennacl::generator::matrix< viennacl::matrix<ScalarType,FC> > dmc_t; viennacl::fast_copy(&(stl_B[0]), &(stl_B[0]) + stl_B.size(), vcl_B); viennacl::fast_copy(&(stl_C[0]), &(stl_C[0]) + stl_C.size(), vcl_C); viennacl::generator::custom_operation op; op.add(dma_t(vcl_A) = viennacl::generator::prod(dmb_t(vcl_B), dmc_t(vcl_C))); op.program(); op.execute(); viennacl::backend::finish(); double res = 0; Timer timer; timer.start(); for(unsigned int r = 0 ; r < N_RUNS ; ++r){ op.execute(); } viennacl::backend::finish(); res = timer.get(); return res/N_RUNS; }
int main (int, const char **) { typedef float ScalarType; //feel free to change this to 'double' if supported by your hardware typedef boost::numeric::ublas::matrix<ScalarType> MatrixType; typedef viennacl::matrix<ScalarType, viennacl::row_major> VCLMatrixType; std::size_t dim_large = 5; std::size_t dim_small = 3; // // Setup ublas objects and fill with data: // MatrixType ublas_A(dim_large, dim_large); MatrixType ublas_B(dim_small, dim_small); MatrixType ublas_C(dim_large, dim_small); MatrixType ublas_D(dim_small, dim_large); for (std::size_t i=0; i<ublas_A.size1(); ++i) for (std::size_t j=0; j<ublas_A.size2(); ++j) ublas_A(i,j) = static_cast<ScalarType>((i+1) + (j+1)*(i+1)); for (std::size_t i=0; i<ublas_B.size1(); ++i) for (std::size_t j=0; j<ublas_B.size2(); ++j) ublas_B(i,j) = static_cast<ScalarType>((i+1) + (j+1)*(i+1)); for (std::size_t i=0; i<ublas_C.size1(); ++i) for (std::size_t j=0; j<ublas_C.size2(); ++j) ublas_C(i,j) = static_cast<ScalarType>((j+2) + (j+1)*(i+1)); for (std::size_t i=0; i<ublas_D.size1(); ++i) for (std::size_t j=0; j<ublas_D.size2(); ++j) ublas_D(i,j) = static_cast<ScalarType>((j+2) + (j+1)*(i+1)); // // Extract submatrices using the ranges in ublas // boost::numeric::ublas::range ublas_r1(0, dim_small); //the first 'dim_small' entries boost::numeric::ublas::range ublas_r2(dim_large - dim_small, dim_large); //the last 'dim_small' entries boost::numeric::ublas::matrix_range<MatrixType> ublas_A_sub1(ublas_A, ublas_r1, ublas_r1); //upper left part of A boost::numeric::ublas::matrix_range<MatrixType> ublas_A_sub2(ublas_A, ublas_r2, ublas_r2); //lower right part of A boost::numeric::ublas::matrix_range<MatrixType> ublas_C_sub(ublas_C, ublas_r1, ublas_r1); //upper left part of C boost::numeric::ublas::matrix_range<MatrixType> ublas_D_sub(ublas_D, ublas_r1, ublas_r1); //upper left part of D // // Setup ViennaCL objects // VCLMatrixType vcl_A(dim_large, dim_large); VCLMatrixType vcl_B(dim_small, dim_small); VCLMatrixType vcl_C(dim_large, dim_small); VCLMatrixType vcl_D(dim_small, dim_large); viennacl::copy(ublas_A, vcl_A); viennacl::copy(ublas_B, vcl_B); viennacl::copy(ublas_C, vcl_C); viennacl::copy(ublas_D, vcl_D); // // Extract submatrices using the ranges in ViennaCL // viennacl::range vcl_r1(0, dim_small); //the first 'dim_small' entries viennacl::range vcl_r2(dim_large - dim_small, dim_large); //the last 'dim_small' entries viennacl::matrix_range<VCLMatrixType> vcl_A_sub1(vcl_A, vcl_r1, vcl_r1); //upper left part of A viennacl::matrix_range<VCLMatrixType> vcl_A_sub2(vcl_A, vcl_r2, vcl_r2); //lower right part of A viennacl::matrix_range<VCLMatrixType> vcl_C_sub(vcl_C, vcl_r1, vcl_r1); //upper left part of C viennacl::matrix_range<VCLMatrixType> vcl_D_sub(vcl_D, vcl_r1, vcl_r1); //upper left part of D // // Copy from ublas to submatrices and back: // ublas_A_sub1 = ublas_B; viennacl::copy(ublas_B, vcl_A_sub1); viennacl::copy(vcl_A_sub1, ublas_B); // // Addition: // // range to range: ublas_A_sub2 += ublas_A_sub2; vcl_A_sub2 += vcl_A_sub2; // range to matrix: ublas_B += ublas_A_sub2; vcl_B += vcl_A_sub2; // // use matrix range with matrix-matrix product: // ublas_A_sub1 += prod(ublas_C_sub, ublas_D_sub); vcl_A_sub1 += viennacl::linalg::prod(vcl_C_sub, vcl_D_sub); // // Print result matrices: // std::cout << "Result ublas: " << ublas_A << std::endl; std::cout << "Result ViennaCL: " << vcl_A << std::endl; // // That's it. // std::cout << "!!!! TUTORIAL COMPLETED SUCCESSFULLY !!!!" << std::endl; return EXIT_SUCCESS; }
int run_benchmark() { Timer timer; double exec_time; // // One alternative: Put the matrices into a contiguous block of memory (allows to use viennacl::fast_copy(), avoiding temporary memory) // std::vector<ScalarType> stl_A(BLAS3_MATRIX_SIZE * BLAS3_MATRIX_SIZE); std::vector<ScalarType> stl_B(BLAS3_MATRIX_SIZE * BLAS3_MATRIX_SIZE); std::vector<ScalarType> stl_C(BLAS3_MATRIX_SIZE * BLAS3_MATRIX_SIZE); // // Fill the matrix // for (unsigned int i = 0; i < BLAS3_MATRIX_SIZE; ++i) for (unsigned int j = 0; j < BLAS3_MATRIX_SIZE; ++j) stl_A[i*BLAS3_MATRIX_SIZE + j] = random<ScalarType>(); for (unsigned int i = 0; i < BLAS3_MATRIX_SIZE; ++i) for (unsigned int j = 0; j < BLAS3_MATRIX_SIZE; ++j) stl_B[i + j*BLAS3_MATRIX_SIZE] = random<ScalarType>(); // // Set up some ViennaCL objects // #ifdef VIENNACL_WITH_OPENCL viennacl::ocl::set_context_device_type(0, viennacl::ocl::gpu_tag()); #endif //viennacl::ocl::current_context().build_options("-cl-mad-enable -cl-fast-relaxed-math"); //uncomment for additional optimizations //viennacl::ocl::current_context().build_options("-cl-opt-disable"); //uncomment to get poor performance viennacl::matrix<ScalarType> vcl_A(BLAS3_MATRIX_SIZE, BLAS3_MATRIX_SIZE); viennacl::matrix<ScalarType> vcl_B(BLAS3_MATRIX_SIZE, BLAS3_MATRIX_SIZE); viennacl::matrix<ScalarType> vcl_C(BLAS3_MATRIX_SIZE, BLAS3_MATRIX_SIZE); ///////////////////////////////////////////////// //////////// Matrix-matrix products ///////////// ///////////////////////////////////////////////// // // Now iterate over all OpenCL devices in the context and compute the matrix-matrix product // std::cout << " ------ Benchmark 1: Matrix-Matrix product ------ " << std::endl; #ifdef VIENNACL_WITH_OPENCL std::vector<viennacl::ocl::device> devices = viennacl::ocl::current_context().devices(); #else std::vector<long> devices(1); #endif for (std::size_t i=0; i<devices.size(); ++i) { #ifdef VIENNACL_WITH_OPENCL viennacl::ocl::current_context().switch_device(devices[i]); std::cout << " - Device Name: " << viennacl::ocl::current_device().name() << std::endl; #endif viennacl::fast_copy(&(stl_A[0]), &(stl_A[0]) + stl_A.size(), vcl_A); viennacl::fast_copy(&(stl_B[0]), &(stl_B[0]) + stl_B.size(), vcl_B); vcl_C = viennacl::linalg::prod(vcl_A, vcl_B); viennacl::backend::finish(); timer.start(); vcl_C = viennacl::linalg::prod(vcl_A, vcl_B); viennacl::backend::finish(); exec_time = timer.get(); std::cout << " - Execution time on device (no setup time included): " << exec_time << std::endl; std::cout << " - GFLOPs (counting multiply&add as separate operations): " << 2.0 * (vcl_A.size1() / 1000.0) * (vcl_A.size2() / 1000.0) * (vcl_B.size2() / 1000.0) / exec_time << std::endl; std::cout << std::endl; } std::cout << " ------ Benchmark 2: Matrix-Matrix product using ranges ------ " << std::endl; viennacl::range r(BLAS3_MATRIX_SIZE/4, 3 * BLAS3_MATRIX_SIZE/4); for (std::size_t i=0; i<devices.size(); ++i) { #ifdef VIENNACL_WITH_OPENCL viennacl::ocl::current_context().switch_device(devices[i]); std::cout << " - Device Name: " << viennacl::ocl::current_device().name() << std::endl; #endif viennacl::fast_copy(&(stl_A[0]), &(stl_A[0]) + stl_A.size(), vcl_A); viennacl::fast_copy(&(stl_B[0]), &(stl_B[0]) + stl_B.size(), vcl_B); viennacl::project(vcl_C, r, r) = viennacl::linalg::prod(viennacl::project(vcl_A, r, r), viennacl::project(vcl_B, r, r)); viennacl::backend::finish(); timer.start(); viennacl::project(vcl_C, r, r) = viennacl::linalg::prod(viennacl::project(vcl_A, r, r), viennacl::project(vcl_B, r, r)); viennacl::backend::finish(); exec_time = timer.get(); std::cout << " - Execution time on device (no setup time included): " << exec_time << std::endl; std::cout << " - GFLOPs (counting multiply&add as separate operations): " << 2.0 * (vcl_A.size1() / 2000.0) * (vcl_A.size2() / 2000.0) * (vcl_B.size2() / 2000.0) / exec_time << std::endl; std::cout << std::endl; } std::cout << " ------ Benchmark 3: Matrix-Matrix product using slices ------ " << std::endl; viennacl::slice s(0, 2, BLAS3_MATRIX_SIZE/2); for (std::size_t i=0; i<devices.size(); ++i) { #ifdef VIENNACL_WITH_OPENCL viennacl::ocl::current_context().switch_device(devices[i]); std::cout << " - Device Name: " << viennacl::ocl::current_device().name() << std::endl; #endif viennacl::fast_copy(&(stl_A[0]), &(stl_A[0]) + stl_A.size(), vcl_A); viennacl::fast_copy(&(stl_B[0]), &(stl_B[0]) + stl_B.size(), vcl_B); viennacl::project(vcl_C, s, s) = viennacl::linalg::prod(viennacl::project(vcl_A, s, s), viennacl::project(vcl_B, s, s)); viennacl::backend::finish(); timer.start(); viennacl::project(vcl_C, s, s) = viennacl::linalg::prod(viennacl::project(vcl_A, s, s), viennacl::project(vcl_B, s, s)); viennacl::backend::finish(); exec_time = timer.get(); std::cout << " - Execution time on device (no setup time included): " << exec_time << std::endl; std::cout << " - GFLOPs (counting multiply&add as separate operations): " << 2.0 * (vcl_A.size1() / 2000.0) * (vcl_A.size2() / 2000.0) * (vcl_B.size2() / 2000.0) / exec_time << std::endl; std::cout << std::endl; } std::cout << " ------ Benchmark 4: LU factorization ------ " << std::endl; for (std::size_t i=0; i<devices.size(); ++i) { #ifdef VIENNACL_WITH_OPENCL viennacl::ocl::current_context().switch_device(devices[i]); std::cout << " - Device Name: " << viennacl::ocl::current_device().name() << std::endl; #endif viennacl::fast_copy(&(stl_A[0]), &(stl_A[0]) + stl_A.size(), vcl_A); viennacl::linalg::lu_factorize(vcl_A); viennacl::backend::finish(); timer.start(); viennacl::linalg::lu_factorize(vcl_A); viennacl::backend::finish(); exec_time = timer.get(); std::cout << " - Execution time on device (no setup time included): " << exec_time << std::endl; std::cout << " - GFLOPs (counting multiply&add as separate operations): " << 2.0 * (vcl_A.size1() / 1000.0) * (vcl_A.size2() / 1000.0) * (vcl_A.size2() / 1000.0) / exec_time << std::endl; std::cout << std::endl; } return EXIT_SUCCESS; }
int test_prod(Epsilon const& epsilon) { int ret; viennacl::tools::uniform_random_numbers<NumericT> randomNumber; std::size_t matrix_size1 = 29; //some odd number, not too large std::size_t matrix_size2 = 47; //some odd number, not too large std::size_t matrix_size3 = 33; //some odd number, not too large //std::size_t matrix_size1 = 128; //some odd number, not too large //std::size_t matrix_size2 = 64; //some odd number, not too large //std::size_t matrix_size3 = 128; //some odd number, not too large //std::size_t matrix_size1 = 256; // for testing AMD kernels //std::size_t matrix_size2 = 256; // for testing AMD kernels //std::size_t matrix_size3 = 256; // for testing AMD kernels // -------------------------------------------------------------------------- // ublas reference: std::vector<std::vector<NumericT> > A(matrix_size1, std::vector<NumericT>(matrix_size2)); std::vector<std::vector<NumericT> > big_A(4*matrix_size1, std::vector<NumericT>(4*matrix_size2, NumericT(3.1415))); std::vector<std::vector<NumericT> > B(matrix_size2, std::vector<NumericT>(matrix_size3)); std::vector<std::vector<NumericT> > big_B(4*matrix_size2, std::vector<NumericT>(4*matrix_size3, NumericT(42.0))); std::vector<std::vector<NumericT> > C(matrix_size1, std::vector<NumericT>(matrix_size3)); //fill A and B: for (std::size_t i = 0; i < A.size(); ++i) for (std::size_t j = 0; j < A[0].size(); ++j) A[i][j] = static_cast<NumericT>(0.1) * randomNumber(); for (std::size_t i = 0; i < B.size(); ++i) for (std::size_t j = 0; j < B[0].size(); ++j) B[i][j] = static_cast<NumericT>(0.1) * randomNumber(); std::vector<std::vector<NumericT> > A_trans(A[0].size(), std::vector<NumericT>(A.size())); for (std::size_t i = 0; i < A.size(); ++i) for (std::size_t j = 0; j < A[0].size(); ++j) A_trans[j][i] = A[i][j]; std::vector<std::vector<NumericT> > big_A_trans(big_A[0].size(), std::vector<NumericT>(big_A.size())); for (std::size_t i = 0; i < big_A.size(); ++i) for (std::size_t j = 0; j < big_A[0].size(); ++j) big_A_trans[j][i] = big_A[i][j]; std::vector<std::vector<NumericT> > B_trans(B[0].size(), std::vector<NumericT>(B.size())); for (std::size_t i = 0; i < B.size(); ++i) for (std::size_t j = 0; j < B[0].size(); ++j) B_trans[j][i] = B[i][j]; std::vector<std::vector<NumericT> > big_B_trans(big_B[0].size(), std::vector<NumericT>(big_B.size())); for (std::size_t i = 0; i < big_B.size(); ++i) for (std::size_t j = 0; j < big_B[0].size(); ++j) big_B_trans[j][i] = big_B[i][j]; // // ViennaCL objects // // A viennacl::range range1_A(matrix_size1, 2*matrix_size1); viennacl::range range2_A(matrix_size2, 2*matrix_size2); viennacl::slice slice1_A(matrix_size1, 2, matrix_size1); viennacl::slice slice2_A(matrix_size2, 3, matrix_size2); viennacl::matrix<NumericT, F_A> vcl_A(matrix_size1, matrix_size2); viennacl::copy(A, vcl_A); viennacl::matrix<NumericT, F_A> vcl_big_range_A(4*matrix_size1, 4*matrix_size2); viennacl::matrix_range<viennacl::matrix<NumericT, F_A> > vcl_range_A(vcl_big_range_A, range1_A, range2_A); viennacl::copy(A, vcl_range_A); viennacl::matrix<NumericT, F_A> vcl_big_slice_A(4*matrix_size1, 4*matrix_size2); viennacl::matrix_slice<viennacl::matrix<NumericT, F_A> > vcl_slice_A(vcl_big_slice_A, slice1_A, slice2_A); viennacl::copy(A, vcl_slice_A); // A^T viennacl::matrix<NumericT, F_A> vcl_A_trans(matrix_size2, matrix_size1); viennacl::copy(A_trans, vcl_A_trans); viennacl::matrix<NumericT, F_A> vcl_big_range_A_trans(4*matrix_size2, 4*matrix_size1); viennacl::matrix_range<viennacl::matrix<NumericT, F_A> > vcl_range_A_trans(vcl_big_range_A_trans, range2_A, range1_A); viennacl::copy(A_trans, vcl_range_A_trans); viennacl::matrix<NumericT, F_A> vcl_big_slice_A_trans(4*matrix_size2, 4*matrix_size1); viennacl::matrix_slice<viennacl::matrix<NumericT, F_A> > vcl_slice_A_trans(vcl_big_slice_A_trans, slice2_A, slice1_A); viennacl::copy(A_trans, vcl_slice_A_trans); // B viennacl::range range1_B(2*matrix_size2, 3*matrix_size2); viennacl::range range2_B(2*matrix_size3, 3*matrix_size3); viennacl::slice slice1_B(matrix_size2, 3, matrix_size2); viennacl::slice slice2_B(matrix_size3, 2, matrix_size3); viennacl::matrix<NumericT, F_B> vcl_B(matrix_size2, matrix_size3); viennacl::copy(B, vcl_B); viennacl::matrix<NumericT, F_B> vcl_big_range_B(4*matrix_size2, 4*matrix_size3); viennacl::matrix_range<viennacl::matrix<NumericT, F_B> > vcl_range_B(vcl_big_range_B, range1_B, range2_B); viennacl::copy(B, vcl_range_B); viennacl::matrix<NumericT, F_B> vcl_big_slice_B(4*matrix_size2, 4*matrix_size3); viennacl::matrix_slice<viennacl::matrix<NumericT, F_B> > vcl_slice_B(vcl_big_slice_B, slice1_B, slice2_B); viennacl::copy(B, vcl_slice_B); // B^T viennacl::matrix<NumericT, F_B> vcl_B_trans(matrix_size3, matrix_size2); viennacl::copy(B_trans, vcl_B_trans); viennacl::matrix<NumericT, F_B> vcl_big_range_B_trans(4*matrix_size3, 4*matrix_size2); viennacl::matrix_range<viennacl::matrix<NumericT, F_B> > vcl_range_B_trans(vcl_big_range_B_trans, range2_B, range1_B); viennacl::copy(B_trans, vcl_range_B_trans); viennacl::matrix<NumericT, F_B> vcl_big_slice_B_trans(4*matrix_size3, 4*matrix_size2); viennacl::matrix_slice<viennacl::matrix<NumericT, F_B> > vcl_slice_B_trans(vcl_big_slice_B_trans, slice2_B, slice1_B); viennacl::copy(B_trans, vcl_slice_B_trans); // C viennacl::range range1_C(matrix_size1-1, 2*matrix_size1-1); viennacl::range range2_C(matrix_size3-1, 2*matrix_size3-1); viennacl::slice slice1_C(matrix_size1-1, 3, matrix_size1); viennacl::slice slice2_C(matrix_size3-1, 3, matrix_size3); viennacl::matrix<NumericT, F_C> vcl_C(matrix_size1, matrix_size3); viennacl::matrix<NumericT, F_C> vcl_big_range_C(4*matrix_size1, 4*matrix_size3); viennacl::matrix_range<viennacl::matrix<NumericT, F_C> > vcl_range_C(vcl_big_range_C, range1_C, range2_C); viennacl::matrix<NumericT, F_C> vcl_big_slice_C(4*matrix_size1, 4*matrix_size3); viennacl::matrix_slice<viennacl::matrix<NumericT, F_C> > vcl_slice_C(vcl_big_slice_C, slice1_C, slice2_C); std::cout << "--- Part 1: Testing matrix-matrix products ---" << std::endl; ////// ////// A: matrix ////// // // std::cout << "Now using A=matrix, B=matrix, C=matrix" << std::endl; ret = test_prod<NumericT>(epsilon, A, A_trans, B, B_trans, C, vcl_A, vcl_A_trans, vcl_B, vcl_B_trans, vcl_C); if (ret != EXIT_SUCCESS) return ret; // // std::cout << "Now using A=matrix, B=matrix, C=range" << std::endl; ret = test_prod<NumericT>(epsilon, A, A_trans, B, B_trans, C, vcl_A, vcl_A_trans, vcl_B, vcl_B_trans, vcl_range_C); if (ret != EXIT_SUCCESS) return ret; // // std::cout << "Now using A=matrix, B=matrix, C=slice" << std::endl; ret = test_prod<NumericT>(epsilon, A, A_trans, B, B_trans, C, vcl_A, vcl_A_trans, vcl_B, vcl_B_trans, vcl_slice_C); if (ret != EXIT_SUCCESS) return ret; // // std::cout << "Now using A=matrix, B=range, C=matrix" << std::endl; ret = test_prod<NumericT>(epsilon, A, A_trans, B, B_trans, C, vcl_A, vcl_A_trans, vcl_range_B, vcl_range_B_trans, vcl_C); if (ret != EXIT_SUCCESS) return ret; // // std::cout << "Now using A=matrix, B=range, C=range" << std::endl; ret = test_prod<NumericT>(epsilon, A, A_trans, B, B_trans, C, vcl_A, vcl_A_trans, vcl_range_B, vcl_range_B_trans, vcl_range_C); if (ret != EXIT_SUCCESS) return ret; // // std::cout << "Now using A=matrix, B=range, C=slice" << std::endl; ret = test_prod<NumericT>(epsilon, A, A_trans, B, B_trans, C, vcl_A, vcl_A_trans, vcl_range_B, vcl_range_B_trans, vcl_slice_C); if (ret != EXIT_SUCCESS) return ret; // // std::cout << "Now using A=matrix, B=slice, C=matrix" << std::endl; ret = test_prod<NumericT>(epsilon, A, A_trans, B, B_trans, C, vcl_A, vcl_A_trans, vcl_slice_B, vcl_slice_B_trans, vcl_C); if (ret != EXIT_SUCCESS) return ret; // // std::cout << "Now using A=matrix, B=slice, C=range" << std::endl; ret = test_prod<NumericT>(epsilon, A, A_trans, B, B_trans, C, vcl_A, vcl_A_trans, vcl_slice_B, vcl_slice_B_trans, vcl_range_C); if (ret != EXIT_SUCCESS) return ret; // // std::cout << "Now using A=matrix, B=slice, C=slice" << std::endl; ret = test_prod<NumericT>(epsilon, A, A_trans, B, B_trans, C, vcl_A, vcl_A_trans, vcl_slice_B, vcl_slice_B_trans, vcl_slice_C); if (ret != EXIT_SUCCESS) return ret; ////// ////// A: range ////// // // std::cout << "Now using A=range, B=matrix, C=matrix" << std::endl; ret = test_prod<NumericT>(epsilon, A, A_trans, B, B_trans, C, vcl_range_A, vcl_range_A_trans, vcl_B, vcl_B_trans, vcl_C); if (ret != EXIT_SUCCESS) return ret; // // std::cout << "Now using A=range, B=matrix, C=range" << std::endl; ret = test_prod<NumericT>(epsilon, A, A_trans, B, B_trans, C, vcl_range_A, vcl_range_A_trans, vcl_B, vcl_B_trans, vcl_range_C); if (ret != EXIT_SUCCESS) return ret; // // std::cout << "Now using A=range, B=matrix, C=slice" << std::endl; ret = test_prod<NumericT>(epsilon, A, A_trans, B, B_trans, C, vcl_range_A, vcl_range_A_trans, vcl_B, vcl_B_trans, vcl_slice_C); if (ret != EXIT_SUCCESS) return ret; // // std::cout << "Now using A=range, B=range, C=matrix" << std::endl; ret = test_prod<NumericT>(epsilon, A, A_trans, B, B_trans, C, vcl_range_A, vcl_range_A_trans, vcl_range_B, vcl_range_B_trans, vcl_C); if (ret != EXIT_SUCCESS) return ret; // // std::cout << "Now using A=range, B=range, C=range" << std::endl; ret = test_prod<NumericT>(epsilon, A, A_trans, B, B_trans, C, vcl_range_A, vcl_range_A_trans, vcl_range_B, vcl_range_B_trans, vcl_range_C); if (ret != EXIT_SUCCESS) return ret; // // std::cout << "Now using A=range, B=range, C=slice" << std::endl; ret = test_prod<NumericT>(epsilon, A, A_trans, B, B_trans, C, vcl_range_A, vcl_range_A_trans, vcl_range_B, vcl_range_B_trans, vcl_slice_C); if (ret != EXIT_SUCCESS) return ret; // // std::cout << "Now using A=range, B=slice, C=matrix" << std::endl; ret = test_prod<NumericT>(epsilon, A, A_trans, B, B_trans, C, vcl_range_A, vcl_range_A_trans, vcl_slice_B, vcl_slice_B_trans, vcl_C); if (ret != EXIT_SUCCESS) return ret; // // std::cout << "Now using A=range, B=slice, C=range" << std::endl; ret = test_prod<NumericT>(epsilon, A, A_trans, B, B_trans, C, vcl_range_A, vcl_range_A_trans, vcl_slice_B, vcl_slice_B_trans, vcl_range_C); if (ret != EXIT_SUCCESS) return ret; // // std::cout << "Now using A=range, B=slice, C=slice" << std::endl; ret = test_prod<NumericT>(epsilon, A, A_trans, B, B_trans, C, vcl_range_A, vcl_range_A_trans, vcl_slice_B, vcl_slice_B_trans, vcl_slice_C); if (ret != EXIT_SUCCESS) return ret; ////// ////// A: slice ////// // // std::cout << "Now using A=slice, B=matrix, C=matrix" << std::endl; ret = test_prod<NumericT>(epsilon, A, A_trans, B, B_trans, C, vcl_slice_A, vcl_slice_A_trans, vcl_B, vcl_B_trans, vcl_C); if (ret != EXIT_SUCCESS) return ret; // // std::cout << "Now using A=slice, B=matrix, C=range" << std::endl; ret = test_prod<NumericT>(epsilon, A, A_trans, B, B_trans, C, vcl_slice_A, vcl_slice_A_trans, vcl_B, vcl_B_trans, vcl_range_C); if (ret != EXIT_SUCCESS) return ret; // // std::cout << "Now using A=slice, B=matrix, C=slice" << std::endl; ret = test_prod<NumericT>(epsilon, A, A_trans, B, B_trans, C, vcl_slice_A, vcl_slice_A_trans, vcl_B, vcl_B_trans, vcl_slice_C); if (ret != EXIT_SUCCESS) return ret; // // std::cout << "Now using A=slice, B=range, C=matrix" << std::endl; ret = test_prod<NumericT>(epsilon, A, A_trans, B, B_trans, C, vcl_slice_A, vcl_slice_A_trans, vcl_range_B, vcl_range_B_trans, vcl_C); if (ret != EXIT_SUCCESS) return ret; // // std::cout << "Now using A=slice, B=range, C=range" << std::endl; ret = test_prod<NumericT>(epsilon, A, A_trans, B, B_trans, C, vcl_slice_A, vcl_slice_A_trans, vcl_range_B, vcl_range_B_trans, vcl_range_C); if (ret != EXIT_SUCCESS) return ret; // // std::cout << "Now using A=slice, B=range, C=slice" << std::endl; ret = test_prod<NumericT>(epsilon, A, A_trans, B, B_trans, C, vcl_slice_A, vcl_slice_A_trans, vcl_range_B, vcl_range_B_trans, vcl_slice_C); if (ret != EXIT_SUCCESS) return ret; // // std::cout << "Now using A=slice, B=slice, C=matrix" << std::endl; ret = test_prod<NumericT>(epsilon, A, A_trans, B, B_trans, C, vcl_slice_A, vcl_slice_A_trans, vcl_slice_B, vcl_slice_B_trans, vcl_C); if (ret != EXIT_SUCCESS) return ret; // // std::cout << "Now using A=slice, B=slice, C=range" << std::endl; ret = test_prod<NumericT>(epsilon, A, A_trans, B, B_trans, C, vcl_slice_A, vcl_slice_A_trans, vcl_slice_B, vcl_slice_B_trans, vcl_range_C); if (ret != EXIT_SUCCESS) return ret; // // std::cout << "Now using A=slice, B=slice, C=slice" << std::endl; ret = test_prod<NumericT>(epsilon, A, A_trans, B, B_trans, C, vcl_slice_A, vcl_slice_A_trans, vcl_slice_B, vcl_slice_B_trans, vcl_slice_C); if (ret != EXIT_SUCCESS) return ret; return ret; }
int run_test() { //typedef float ScalarType; typedef boost::numeric::ublas::matrix<ScalarType> MatrixType; typedef boost::numeric::ublas::vector<ScalarType> VectorType; typedef viennacl::matrix<ScalarType, T> VCLMatrixType; typedef viennacl::vector<ScalarType> VCLVectorType; viennacl::scalar<ScalarType> gpu_pi = ScalarType(3.1415); std::size_t dim_large = 151; std::size_t dim_small = 37; //std::size_t dim_large = 35; //std::size_t dim_small = 11; //setup ublas objects: MatrixType ublas_A(dim_large, dim_large); for (std::size_t i=0; i<ublas_A.size1(); ++i) for (std::size_t j=0; j<ublas_A.size2(); ++j) ublas_A(i,j) = ScalarType((i+1) + (j+1)*(i+1)); MatrixType ublas_B(dim_small, dim_small); for (std::size_t i=0; i<ublas_B.size1(); ++i) for (std::size_t j=0; j<ublas_B.size2(); ++j) ublas_B(i,j) = ScalarType((i+1) + (j+1)*(i+1)); MatrixType ublas_C(dim_large, dim_small); for (std::size_t i=0; i<ublas_C.size1(); ++i) for (std::size_t j=0; j<ublas_C.size2(); ++j) ublas_C(i,j) = ScalarType((j+2) + (j+1)*(i+1)); MatrixType ublas_D(dim_small, dim_large); for (std::size_t i=0; i<ublas_D.size1(); ++i) for (std::size_t j=0; j<ublas_D.size2(); ++j) ublas_D(i,j) = ScalarType((j+2) + (j+1)*(i+1)); boost::numeric::ublas::range ublas_r1(0, dim_small); boost::numeric::ublas::range ublas_r2(dim_large - dim_small, dim_large); boost::numeric::ublas::matrix_range<MatrixType> ublas_A_sub1(ublas_A, ublas_r1, ublas_r1); boost::numeric::ublas::matrix_range<MatrixType> ublas_A_sub2(ublas_A, ublas_r2, ublas_r2); boost::numeric::ublas::matrix_range<MatrixType> ublas_C_sub(ublas_C, ublas_r1, ublas_r1); boost::numeric::ublas::matrix_range<MatrixType> ublas_D_sub(ublas_D, ublas_r1, ublas_r1); //Setup ViennaCL objects VCLMatrixType vcl_A(dim_large, dim_large); viennacl::copy(ublas_A, vcl_A); VCLMatrixType vcl_B(dim_small, dim_small); viennacl::copy(ublas_B, vcl_B); VCLMatrixType vcl_C(dim_large, dim_small); viennacl::copy(ublas_C, vcl_C); VCLMatrixType vcl_D(dim_small, dim_large); viennacl::copy(ublas_D, vcl_D); viennacl::range vcl_r1(0, dim_small); viennacl::range vcl_r2(dim_large - dim_small, dim_large); viennacl::matrix_range<VCLMatrixType> vcl_A_sub1(vcl_A, vcl_r1, vcl_r1); viennacl::matrix_range<VCLMatrixType> vcl_A_sub2(vcl_A, vcl_r2, vcl_r2); viennacl::matrix_range<VCLMatrixType> vcl_C_sub(vcl_C, vcl_r1, vcl_r1); viennacl::matrix_range<VCLMatrixType> vcl_D_sub(vcl_D, vcl_r1, vcl_r1); std::cout << std::endl; std::cout << "//" << std::endl; std::cout << "////////// Test: Copy CTOR //////////" << std::endl; std::cout << "//" << std::endl; { std::cout << "Testing matrix created from range... "; ublas_B = ublas_A_sub1; VCLMatrixType vcl_temp = vcl_A_sub1; if (check_for_equality(ublas_B, vcl_temp)) std::cout << "PASSED!" << std::endl; else { std::cout << std::endl << "TEST failed!"; return EXIT_FAILURE; } std::cout << "Testing range created from range... "; //ublas_A_sub1 = ublas_A_sub1; VCLMatrixType vcl_ctor_sub1 = vcl_A_sub1; //Note: This is mostly a compilation test only if (check_for_equality(ublas_A, vcl_A)) std::cout << "PASSED!" << std::endl; else { std::cout << std::endl << "TEST failed!"; return EXIT_FAILURE; } } std::cout << std::endl; std::cout << "//" << std::endl; std::cout << "////////// Test: Assignments //////////" << std::endl; std::cout << "//" << std::endl; std::cout << "Testing matrix assigned to range... "; ublas_A_sub1 = ublas_B; vcl_A_sub1 = vcl_B; if (check_for_equality(ublas_A, vcl_A)) std::cout << "PASSED!" << std::endl; else { std::cout << std::endl << "TEST failed!"; return EXIT_FAILURE; } std::cout << "Testing range assigned to matrix... "; ublas_B = ublas_A_sub2; vcl_B = vcl_A_sub2; if (check_for_equality(ublas_B, vcl_B)) std::cout << "PASSED!" << std::endl; else { std::cout << std::endl << "TEST failed!"; return EXIT_FAILURE; } std::cout << "Testing range assigned to range... "; ublas_A_sub1 = ublas_C_sub; vcl_A_sub1 = vcl_C_sub; if (check_for_equality(ublas_A, vcl_A)) std::cout << "PASSED!" << std::endl; else { std::cout << std::endl << "TEST failed!"; return EXIT_FAILURE; } std::cout << std::endl; std::cout << "//" << std::endl; std::cout << "////////// Test 1: Copy to GPU //////////" << std::endl; std::cout << "//" << std::endl; ublas_A_sub1 = ublas_B; viennacl::copy(ublas_B, vcl_A_sub1); std::cout << "Testing upper left copy to A... "; if (check_for_equality(ublas_A, vcl_A)) std::cout << "PASSED!" << std::endl; else { std::cout << std::endl << "TEST failed!"; return EXIT_FAILURE; } ublas_A_sub2 = ublas_B; viennacl::copy(ublas_B, vcl_A_sub2); std::cout << "Testing lower right copy to A... "; if (check_for_equality(ublas_A, vcl_A)) std::cout << "PASSED!" << std::endl; else { std::cout << std::endl << "TEST failed!"; return EXIT_FAILURE; } ublas_C_sub = ublas_B; viennacl::copy(ublas_B, vcl_C_sub); std::cout << "Testing upper copy to C... "; if (check_for_equality(ublas_C, vcl_C)) std::cout << "PASSED!" << std::endl; else { std::cout << std::endl << "TEST failed!"; return EXIT_FAILURE; } ublas_D_sub = ublas_B; viennacl::copy(ublas_B, vcl_D_sub); std::cout << "Testing left copy to D... "; if (check_for_equality(ublas_D, vcl_D)) std::cout << "PASSED!" << std::endl; else std::cout << std::endl << "TEST failed!"; std::cout << std::endl; std::cout << "//" << std::endl; std::cout << "////////// Test 2: Copy from GPU //////////" << std::endl; std::cout << "//" << std::endl; std::cout << "Testing upper left copy to A... "; if (check_for_equality(ublas_A_sub1, vcl_A_sub1)) std::cout << "PASSED!" << std::endl; else { std::cout << std::endl << "TEST failed!"; return EXIT_FAILURE; } std::cout << "Testing lower right copy to A... "; if (check_for_equality(ublas_A_sub2, vcl_A_sub2)) std::cout << "PASSED!" << std::endl; else { std::cout << std::endl << "TEST failed!"; return EXIT_FAILURE; } std::cout << "Testing upper copy to C... "; if (check_for_equality(ublas_C_sub, vcl_C_sub)) std::cout << "PASSED!" << std::endl; else { std::cout << std::endl << "TEST failed!"; return EXIT_FAILURE; } std::cout << "Testing left copy to D... "; if (check_for_equality(ublas_D_sub, vcl_D_sub)) std::cout << "PASSED!" << std::endl; else { std::cout << std::endl << "TEST failed!"; return EXIT_FAILURE; } std::cout << "//" << std::endl; std::cout << "////////// Test 3: Addition //////////" << std::endl; std::cout << "//" << std::endl; viennacl::copy(ublas_A_sub2, vcl_A_sub2); std::cout << "Inplace add to submatrix: "; ublas_A_sub2 += ublas_A_sub2; vcl_A_sub2 += vcl_A_sub2; if (check_for_equality(ublas_A, vcl_A)) std::cout << "PASSED!" << std::endl; else { std::cout << std::endl << "TEST failed!"; return EXIT_FAILURE; } std::cout << "Inplace add to matrix: "; ublas_B += ublas_A_sub2; vcl_B += vcl_A_sub2; if (check_for_equality(ublas_B, vcl_B)) std::cout << "PASSED!" << std::endl; else { std::cout << std::endl << "TEST failed!"; return EXIT_FAILURE; } std::cout << "Inplace add of matrix: "; ublas_A_sub2 += ublas_B; vcl_A_sub2 += vcl_B; if (check_for_equality(ublas_A, vcl_A)) std::cout << "PASSED!" << std::endl; else { std::cout << std::endl << "TEST failed!"; return EXIT_FAILURE; } std::cout << "Add to submatrix: "; ublas_A_sub2 = ublas_A_sub2 + ublas_A_sub2; vcl_A_sub2 = vcl_A_sub2 + vcl_A_sub2; if (check_for_equality(ublas_A, vcl_A)) std::cout << "PASSED!" << std::endl; else { std::cout << std::endl << "TEST failed!"; return EXIT_FAILURE; } std::cout << "Add to matrix: "; ublas_B = ublas_A_sub2 + ublas_A_sub2; vcl_B = vcl_A_sub2 + vcl_A_sub2; if (check_for_equality(ublas_B, vcl_B)) std::cout << "PASSED!" << std::endl; else { std::cout << std::endl << "TEST failed!"; return EXIT_FAILURE; } std::cout << "//" << std::endl; std::cout << "////////// Test 4: Subtraction //////////" << std::endl; std::cout << "//" << std::endl; viennacl::copy(ublas_A_sub2, vcl_A_sub2); std::cout << "Inplace sub to submatrix: "; ublas_A_sub2 -= ublas_A_sub2; vcl_A_sub2 -= vcl_A_sub2; if (check_for_equality(ublas_A, vcl_A)) std::cout << "PASSED!" << std::endl; else { std::cout << std::endl << "TEST failed!"; return EXIT_FAILURE; } std::cout << "Inplace sub to matrix: "; ublas_B -= ublas_A_sub2; vcl_B -= vcl_A_sub2; if (check_for_equality(ublas_B, vcl_B)) std::cout << "PASSED!" << std::endl; else { std::cout << std::endl << "TEST failed!"; return EXIT_FAILURE; } std::cout << "Inplace sub of matrix: "; ublas_A_sub2 -= ublas_B; vcl_A_sub2 -= vcl_B; if (check_for_equality(ublas_A, vcl_A)) std::cout << "PASSED!" << std::endl; else { std::cout << std::endl << "TEST failed!"; return EXIT_FAILURE; } std::cout << "Sub from submatrix: "; ublas_A_sub2 = ublas_A_sub2 - ublas_A_sub2; vcl_A_sub2 = vcl_A_sub2 - vcl_A_sub2; if (check_for_equality(ublas_A, vcl_A)) std::cout << "PASSED!" << std::endl; else { std::cout << std::endl << "TEST failed!"; return EXIT_FAILURE; } std::cout << "Sub from matrix: "; ublas_B = ublas_A_sub2 - ublas_A_sub2; vcl_B = vcl_A_sub2 - vcl_A_sub2; if (check_for_equality(ublas_B, vcl_B)) std::cout << "PASSED!" << std::endl; else { std::cout << std::endl << "TEST failed!"; return EXIT_FAILURE; } std::cout << "//" << std::endl; std::cout << "////////// Test 5: Scaling //////////" << std::endl; std::cout << "//" << std::endl; viennacl::copy(ublas_A, vcl_A); std::cout << "Multiplication with CPU scalar: "; ublas_A_sub2 *= ScalarType(3.1415); vcl_A_sub2 *= ScalarType(3.1415); if (check_for_equality(ublas_A, vcl_A)) std::cout << "PASSED!" << std::endl; else { std::cout << std::endl << "TEST failed!"; return EXIT_FAILURE; } std::cout << "Multiplication with GPU scalar: "; ublas_A_sub2 *= gpu_pi; vcl_A_sub2 *= gpu_pi; if (check_for_equality(ublas_A, vcl_A)) std::cout << "PASSED!" << std::endl; else { std::cout << std::endl << "TEST failed!"; return EXIT_FAILURE; } std::cout << "Division with CPU scalar: "; ublas_A_sub2 /= ScalarType(3.1415); vcl_A_sub2 /= ScalarType(3.1415); if (check_for_equality(ublas_A, vcl_A)) std::cout << "PASSED!" << std::endl; else { std::cout << std::endl << "TEST failed!"; return EXIT_FAILURE; } std::cout << "Division with GPU scalar: "; ublas_A_sub2 /= gpu_pi; vcl_A_sub2 /= gpu_pi; if (check_for_equality(ublas_A, vcl_A)) std::cout << "PASSED!" << std::endl; else { std::cout << std::endl << "TEST failed!"; return EXIT_FAILURE; } std::cout << "//" << std::endl; std::cout << "////////// Test 6: Matrix-Matrix Products //////////" << std::endl; std::cout << "//" << std::endl; std::cout << "Assigned C = A * B: "; ublas_A_sub1 = prod(ublas_C_sub, ublas_D_sub); vcl_A_sub1 = viennacl::linalg::prod(vcl_C_sub, vcl_D_sub); if (check_for_equality(ublas_A, vcl_A)) std::cout << "PASSED!" << std::endl; else { std::cout << std::endl << "TEST failed!"; return EXIT_FAILURE; } std::cout << "Assigned C = A^T * B: "; ublas_A_sub1 = prod(trans(ublas_C_sub), ublas_D_sub); vcl_A_sub1 = viennacl::linalg::prod(trans(vcl_C_sub), vcl_D_sub); if (check_for_equality(ublas_A, vcl_A)) std::cout << "PASSED!" << std::endl; else { std::cout << std::endl << "TEST failed!"; return EXIT_FAILURE; } std::cout << "Assigned C = A * B^T: "; ublas_A_sub1 = prod(ublas_C_sub, trans(ublas_D_sub)); vcl_A_sub1 = viennacl::linalg::prod(vcl_C_sub, trans(vcl_D_sub)); if (check_for_equality(ublas_A, vcl_A)) std::cout << "PASSED!" << std::endl; else { std::cout << std::endl << "TEST failed!"; return EXIT_FAILURE; } std::cout << "Assigned C = A^T * B^T: "; ublas_A_sub1 = prod(trans(ublas_C_sub), trans(ublas_D_sub)); vcl_A_sub1 = viennacl::linalg::prod(trans(vcl_C_sub), trans(vcl_D_sub)); if (check_for_equality(ublas_A, vcl_A)) std::cout << "PASSED!" << std::endl; else { std::cout << std::endl << "TEST failed!"; return EXIT_FAILURE; } std::cout << "Inplace add of prod(): "; ublas_A_sub1 += prod(ublas_C_sub, ublas_D_sub); vcl_A_sub1 += viennacl::linalg::prod(vcl_C_sub, vcl_D_sub); if (check_for_equality(ublas_A, vcl_A)) std::cout << "PASSED!" << std::endl; else { std::cout << std::endl << "TEST failed!"; return EXIT_FAILURE; } std::cout << "//" << std::endl; std::cout << "////////// Test 7: Matrix-Vector Products //////////" << std::endl; std::cout << "//" << std::endl; VectorType ublas_v1(dim_large); for (std::size_t i=0; i<ublas_v1.size(); ++i) ublas_v1(i) = i; boost::numeric::ublas::vector_range<VectorType> ublas_v1_sub(ublas_v1, ublas_r1); VectorType ublas_v2(dim_large); for (std::size_t i=0; i<ublas_v2.size(); ++i) ublas_v2(i) = i - 5; boost::numeric::ublas::vector_range<VectorType> ublas_v2_sub(ublas_v2, ublas_r1); VCLVectorType vcl_v1(ublas_v1.size()); viennacl::vector_range<VCLVectorType> vcl_v1_sub(vcl_v1, vcl_r1); VCLVectorType vcl_v2(ublas_v2.size()); viennacl::vector_range<VCLVectorType> vcl_v2_sub(vcl_v2, vcl_r1); viennacl::copy(ublas_v1, vcl_v1); viennacl::copy(ublas_v2, vcl_v2); viennacl::copy(ublas_A_sub1, vcl_A_sub1); ublas_v2_sub = prod(ublas_A_sub1, ublas_v1_sub); vcl_v2_sub = viennacl::linalg::prod(vcl_A_sub1, vcl_v1_sub); if (check_for_equality_vector(ublas_v2, vcl_v2)) std::cout << "PASSED!" << std::endl; else { std::cout << std::endl << "TEST failed!"; return EXIT_FAILURE; } std::cout << std::endl; std::cout << "----------------------------------------------" << std::endl; std::cout << std::endl; return EXIT_SUCCESS; }