std::vector<double> test_mpvector_spmv(const int ensemble_length, const int nGrid, const int iterCount, Kokkos::DeviceConfig dev_config, MultiplyTag tag) { typedef StorageType storage_type; typedef typename storage_type::value_type value_type; typedef typename storage_type::ordinal_type ordinal_type; typedef typename storage_type::device_type device_type; typedef Sacado::MP::Vector<StorageType> VectorType; typedef Kokkos::LayoutRight Layout; typedef Kokkos::View< VectorType*, Layout, device_type > vector_type; typedef Kokkos::CrsMatrix< VectorType, ordinal_type, device_type > matrix_type; typedef typename matrix_type::StaticCrsGraphType matrix_graph_type; typedef typename matrix_type::values_type matrix_values_type; //------------------------------ // Generate graph for "FEM" box structure: std::vector< std::vector<size_t> > fem_graph; const size_t fem_length = nGrid * nGrid * nGrid; const size_t graph_length = generate_fem_graph( nGrid , fem_graph ); //------------------------------ // Generate input multivector: vector_type x = vector_type(Kokkos::allocate_without_initializing, "x", fem_length, ensemble_length); vector_type y = vector_type(Kokkos::allocate_without_initializing, "y", fem_length, ensemble_length); //------------------------------ matrix_graph_type matrix_graph = Kokkos::create_staticcrsgraph<matrix_graph_type>( std::string("test crs graph"), fem_graph); matrix_values_type matrix_values = matrix_values_type(Kokkos::allocate_without_initializing, "matrix", graph_length, ensemble_length); matrix_type matrix("block_matrix", fem_length, matrix_values, matrix_graph); matrix.dev_config = dev_config; //------------------------------ // Fill: { // The VectorType may be dynamic (with allocated memory) // so cannot pass a VectorType value to the device. // Get an array-of-intrinsic View and fill that view. typename vector_type::array_type xx( x ); typename vector_type::array_type yy( y ); typename matrix_values_type::array_type mm( matrix_values ); Kokkos::deep_copy( xx , value_type(1.0) ); Kokkos::deep_copy( yy , value_type(1.0) ); Kokkos::deep_copy( mm , value_type(1.0) ); } //------------------------------ // One iteration to warm up Stokhos::multiply( matrix, x, y, tag ); device_type::fence(); Kokkos::Impl::Timer clock ; for (int iter = 0; iter < iterCount; ++iter) { Stokhos::multiply( matrix, x, y, tag ); } device_type::fence(); const double seconds_per_iter = clock.seconds() / ((double) iterCount ); const double flops = 1.0e-9 * 2.0 * graph_length * ensemble_length; std::vector<double> perf(5); perf[0] = fem_length; perf[1] = ensemble_length; perf[2] = graph_length; perf[3] = seconds_per_iter; perf[4] = flops / seconds_per_iter; return perf; }
bool test_embedded_vector(const typename VectorType::ordinal_type nGrid, const typename VectorType::ordinal_type stoch_length, Kokkos::DeviceConfig dev_config, Multiply multiply_op, Teuchos::FancyOStream& out) { typedef typename VectorType::ordinal_type ordinal_type; typedef typename VectorType::value_type scalar_type; typedef typename VectorType::storage_type storage_type; typedef typename storage_type::device_type device_type; typedef Kokkos::LayoutRight Layout; typedef Kokkos::View< VectorType*, Layout, device_type > block_vector_type; typedef Kokkos::CrsMatrix< VectorType, ordinal_type, device_type > block_matrix_type; typedef typename block_matrix_type::StaticCrsGraphType matrix_graph_type; typedef typename block_matrix_type::values_type matrix_values_type; // Check ensemble_length == storage_type::static_size for static storage TEUCHOS_TEST_FOR_EXCEPTION( storage_type::is_static && storage_type::static_size != stoch_length, std::logic_error, "Static storage size must equal ensemble size"); // Generate FEM graph: ordinal_type fem_length = nGrid * nGrid * nGrid; std::vector< std::vector<ordinal_type> > fem_graph; ordinal_type fem_graph_length = generate_fem_graph( nGrid, fem_graph ); //------------------------------ // Generate input multivector: block_vector_type x = block_vector_type(Kokkos::allocate_without_initializing, "x", fem_length, stoch_length); block_vector_type y = block_vector_type(Kokkos::allocate_without_initializing, "y", fem_length, stoch_length); typename block_vector_type::HostMirror hx = Kokkos::create_mirror_view( x ); typename block_vector_type::HostMirror hy = Kokkos::create_mirror_view( y ); // View the block vector as an array of the embedded intrinsic type. typename block_vector_type::HostMirror::array_type hax = hx ; typename block_vector_type::HostMirror::array_type hay = hy ; for (ordinal_type iRowFEM=0; iRowFEM<fem_length; ++iRowFEM) { for (ordinal_type iRowStoch=0; iRowStoch<stoch_length; ++iRowStoch) { hax(iRowFEM,iRowStoch) = generate_vector_coefficient<scalar_type>( fem_length, stoch_length, iRowFEM, iRowStoch ); hay(iRowFEM,iRowStoch) = 0.0; } } Kokkos::deep_copy( x, hx ); Kokkos::deep_copy( y, hy ); //------------------------------ // Generate block matrix matrix_graph_type matrix_graph = Kokkos::create_staticcrsgraph<matrix_graph_type>( std::string("test crs graph"), fem_graph); matrix_values_type matrix_values = matrix_values_type( Kokkos::allocate_without_initializing, "matrix", fem_graph_length, stoch_length); block_matrix_type matrix( "block_matrix", fem_length, matrix_values, matrix_graph); matrix.dev_config = dev_config; typename matrix_values_type::HostMirror hM = Kokkos::create_mirror_view( matrix.values ); typename matrix_values_type::HostMirror::array_type haM = hM ; for (ordinal_type iRowFEM=0, iEntryFEM=0; iRowFEM<fem_length; ++iRowFEM) { const ordinal_type row_size = fem_graph[iRowFEM].size(); for (ordinal_type iRowEntryFEM=0; iRowEntryFEM<row_size; ++iRowEntryFEM, ++iEntryFEM) { const ordinal_type iColFEM = fem_graph[iRowFEM][iRowEntryFEM]; for (ordinal_type k=0; k<stoch_length; ++k) { haM(iEntryFEM,k) = generate_matrix_coefficient<scalar_type>( fem_length, stoch_length, iRowFEM, iColFEM, k); } } } Kokkos::deep_copy( matrix.values, hM ); //------------------------------ // multiply multiply_op( matrix, x, y ); //------------------------------ // generate correct answer typedef typename block_vector_type::array_type array_type; array_type ay_expected = array_type("ay_expected", fem_length, stoch_length); typename array_type::HostMirror hay_expected = Kokkos::create_mirror_view(ay_expected); for (ordinal_type iRowFEM=0, iEntryFEM=0; iRowFEM<fem_length; ++iRowFEM) { const ordinal_type row_size = fem_graph[iRowFEM].size(); for (ordinal_type iRowEntryFEM=0; iRowEntryFEM<row_size; ++iRowEntryFEM, ++iEntryFEM) { const ordinal_type iColFEM = fem_graph[iRowFEM][iRowEntryFEM]; for (ordinal_type k=0; k<stoch_length; ++k) { hay_expected(iRowFEM, k) += generate_matrix_coefficient<scalar_type>( fem_length, stoch_length, iRowFEM, iColFEM, k) * generate_vector_coefficient<scalar_type>( fem_length, stoch_length, iColFEM, k ); } } } Kokkos::deep_copy( ay_expected, hay_expected ); //------------------------------ // check typename block_vector_type::array_type ay = y; scalar_type rel_tol = ScalarTol<scalar_type>::tol(); scalar_type abs_tol = ScalarTol<scalar_type>::tol(); bool success = compare_rank_2_views(ay, ay_expected, rel_tol, abs_tol, out); return success; }
std::vector<double> test_scalar_spmv(const int ensemble_length, const int nGrid, const int iterCount, Kokkos::DeviceConfig dev_config) { typedef ScalarType value_type; typedef OrdinalType ordinal_type; typedef Device device_type; typedef Kokkos::View< value_type*, device_type > vector_type; typedef Kokkos::CrsMatrix< value_type, ordinal_type, device_type > matrix_type; typedef typename matrix_type::StaticCrsGraphType matrix_graph_type; typedef typename matrix_type::values_type matrix_values_type; //------------------------------ // Generate graph for "FEM" box structure: std::vector< std::vector<size_t> > fem_graph; const size_t fem_length = nGrid * nGrid * nGrid; const size_t graph_length = generate_fem_graph( nGrid , fem_graph ); //------------------------------ // Generate input multivector: std::vector<vector_type> x(ensemble_length); std::vector<vector_type> y(ensemble_length); for (int e=0; e<ensemble_length; ++e) { x[e] = vector_type(Kokkos::allocate_without_initializing, "x", fem_length); y[e] = vector_type(Kokkos::allocate_without_initializing, "y", fem_length); Kokkos::deep_copy( x[e] , value_type(1.0) ); Kokkos::deep_copy( y[e] , value_type(0.0) ); } //------------------------------ std::vector<matrix_type> matrix(ensemble_length); for (int e=0; e<ensemble_length; ++e) { matrix_graph_type matrix_graph = Kokkos::create_staticcrsgraph<matrix_graph_type>( std::string("test crs graph"), fem_graph); matrix_values_type matrix_values = matrix_values_type(Kokkos::allocate_without_initializing, "matrix", graph_length); matrix[e] = matrix_type("matrix", fem_length, matrix_values, matrix_graph); Kokkos::deep_copy( matrix[e].values , value_type(1.0) ); } //------------------------------ // One iteration to warm up for (int iter = 0; iter < iterCount; ++iter) { for (int e=0; e<ensemble_length; ++e) { Kokkos::MV_Multiply( y[e], matrix[e], x[e] ); } } device_type::fence(); Kokkos::Impl::Timer clock ; for (int iter = 0; iter < iterCount; ++iter) { for (int e=0; e<ensemble_length; ++e) { Kokkos::MV_Multiply( y[e], matrix[e], x[e] ); } } device_type::fence(); const double seconds_per_iter = clock.seconds() / ((double) iterCount ); const double flops = 1.0e-9 * 2.0 * graph_length * ensemble_length; std::vector<double> perf(5); perf[0] = fem_length; perf[1] = ensemble_length; perf[2] = graph_length; perf[3] = seconds_per_iter; perf[4] = flops / seconds_per_iter; return perf; }