예제 #1
0
std::vector<double>
test_mpvector_spmv(const int ensemble_length,
                   const int nGrid,
                   const int iterCount,
                   Kokkos::DeviceConfig dev_config,
                   MultiplyTag tag)
{
  typedef StorageType storage_type;
  typedef typename storage_type::value_type value_type;
  typedef typename storage_type::ordinal_type ordinal_type;
  typedef typename storage_type::device_type device_type;
  typedef Sacado::MP::Vector<StorageType> VectorType;
  typedef Kokkos::LayoutRight Layout;
  typedef Kokkos::View< VectorType*, Layout, device_type > vector_type;
  typedef Kokkos::CrsMatrix< VectorType, ordinal_type, device_type > matrix_type;
  typedef typename matrix_type::StaticCrsGraphType matrix_graph_type;
  typedef typename matrix_type::values_type matrix_values_type;

  //------------------------------
  // Generate graph for "FEM" box structure:

  std::vector< std::vector<size_t> > fem_graph;
  const size_t fem_length = nGrid * nGrid * nGrid;
  const size_t graph_length = generate_fem_graph( nGrid , fem_graph );

  //------------------------------
  // Generate input multivector:

  vector_type x =
    vector_type(Kokkos::allocate_without_initializing,
                "x", fem_length, ensemble_length);
  vector_type y =
    vector_type(Kokkos::allocate_without_initializing,
                "y", fem_length, ensemble_length);

  //------------------------------

  matrix_graph_type matrix_graph =
    Kokkos::create_staticcrsgraph<matrix_graph_type>(
      std::string("test crs graph"), fem_graph);
  matrix_values_type matrix_values =
    matrix_values_type(Kokkos::allocate_without_initializing,
                       "matrix", graph_length, ensemble_length);
  matrix_type matrix("block_matrix", fem_length, matrix_values, matrix_graph);
  matrix.dev_config = dev_config;

  //------------------------------
  // Fill:

  {
    // The VectorType may be dynamic (with allocated memory)
    // so cannot pass a VectorType value to the device.
    // Get an array-of-intrinsic View and fill that view.
    typename vector_type::array_type xx( x );
    typename vector_type::array_type yy( y );
    typename matrix_values_type::array_type mm( matrix_values );

    Kokkos::deep_copy( xx , value_type(1.0) );
    Kokkos::deep_copy( yy , value_type(1.0) );
    Kokkos::deep_copy( mm , value_type(1.0) );
  }

  //------------------------------

  // One iteration to warm up
  Stokhos::multiply( matrix, x, y, tag );

  device_type::fence();
  Kokkos::Impl::Timer clock ;
  for (int iter = 0; iter < iterCount; ++iter) {
    Stokhos::multiply( matrix, x, y, tag );
  }
  device_type::fence();

  const double seconds_per_iter = clock.seconds() / ((double) iterCount );
  const double flops = 1.0e-9 * 2.0 * graph_length * ensemble_length;

  std::vector<double> perf(5);
  perf[0] = fem_length;
  perf[1] = ensemble_length;
  perf[2] = graph_length;
  perf[3] = seconds_per_iter;
  perf[4] = flops / seconds_per_iter;
  return perf;
}
bool test_embedded_vector(const typename VectorType::ordinal_type nGrid,
                          const typename VectorType::ordinal_type stoch_length,
                          Kokkos::DeviceConfig dev_config,
                          Multiply multiply_op,
                          Teuchos::FancyOStream& out)
{
  typedef typename VectorType::ordinal_type ordinal_type;
  typedef typename VectorType::value_type scalar_type;
  typedef typename VectorType::storage_type storage_type;
  typedef typename storage_type::device_type device_type;
  typedef Kokkos::LayoutRight Layout;
  typedef Kokkos::View< VectorType*, Layout, device_type > block_vector_type;
  typedef Kokkos::CrsMatrix< VectorType, ordinal_type, device_type > block_matrix_type;
  typedef typename block_matrix_type::StaticCrsGraphType matrix_graph_type;
  typedef typename block_matrix_type::values_type matrix_values_type;

  // Check ensemble_length == storage_type::static_size for static storage
  TEUCHOS_TEST_FOR_EXCEPTION(
    storage_type::is_static && storage_type::static_size != stoch_length,
    std::logic_error,
    "Static storage size must equal ensemble size");

  // Generate FEM graph:
  ordinal_type fem_length = nGrid * nGrid * nGrid;
  std::vector< std::vector<ordinal_type> > fem_graph;
  ordinal_type fem_graph_length = generate_fem_graph( nGrid, fem_graph );

  //------------------------------
  // Generate input multivector:

  block_vector_type x =
    block_vector_type(Kokkos::allocate_without_initializing,
                      "x", fem_length, stoch_length);
  block_vector_type y =
    block_vector_type(Kokkos::allocate_without_initializing,
                      "y", fem_length, stoch_length);

  typename block_vector_type::HostMirror hx = Kokkos::create_mirror_view( x );
  typename block_vector_type::HostMirror hy = Kokkos::create_mirror_view( y );

  // View the block vector as an array of the embedded intrinsic type.
  typename block_vector_type::HostMirror::array_type hax = hx ;
  typename block_vector_type::HostMirror::array_type hay = hy ;

  for (ordinal_type iRowFEM=0; iRowFEM<fem_length; ++iRowFEM) {
    for (ordinal_type iRowStoch=0; iRowStoch<stoch_length; ++iRowStoch) {
      hax(iRowFEM,iRowStoch) =
        generate_vector_coefficient<scalar_type>(
          fem_length, stoch_length, iRowFEM, iRowStoch );
      hay(iRowFEM,iRowStoch) = 0.0;
    }
  }

  Kokkos::deep_copy( x, hx );
  Kokkos::deep_copy( y, hy );

  //------------------------------
  // Generate block matrix

  matrix_graph_type matrix_graph =
    Kokkos::create_staticcrsgraph<matrix_graph_type>(
      std::string("test crs graph"), fem_graph);
  matrix_values_type matrix_values =
    matrix_values_type(
      Kokkos::allocate_without_initializing,
      "matrix", fem_graph_length, stoch_length);
  block_matrix_type matrix(
    "block_matrix", fem_length, matrix_values, matrix_graph);
  matrix.dev_config = dev_config;

  typename matrix_values_type::HostMirror hM =
    Kokkos::create_mirror_view( matrix.values );

  typename matrix_values_type::HostMirror::array_type haM = hM ;

  for (ordinal_type iRowFEM=0, iEntryFEM=0; iRowFEM<fem_length; ++iRowFEM) {
    const ordinal_type row_size = fem_graph[iRowFEM].size();
    for (ordinal_type iRowEntryFEM=0; iRowEntryFEM<row_size;
         ++iRowEntryFEM, ++iEntryFEM) {
      const ordinal_type iColFEM = fem_graph[iRowFEM][iRowEntryFEM];

      for (ordinal_type k=0; k<stoch_length; ++k) {
        haM(iEntryFEM,k) =
          generate_matrix_coefficient<scalar_type>(
            fem_length, stoch_length, iRowFEM, iColFEM, k);
      }
    }
  }

  Kokkos::deep_copy( matrix.values, hM );

  //------------------------------
  // multiply

  multiply_op( matrix, x, y );

  //------------------------------
  // generate correct answer

  typedef typename block_vector_type::array_type array_type;
  array_type ay_expected =
    array_type("ay_expected", fem_length, stoch_length);
  typename array_type::HostMirror hay_expected =
    Kokkos::create_mirror_view(ay_expected);
  for (ordinal_type iRowFEM=0, iEntryFEM=0; iRowFEM<fem_length; ++iRowFEM) {
    const ordinal_type row_size = fem_graph[iRowFEM].size();
    for (ordinal_type iRowEntryFEM=0; iRowEntryFEM<row_size;
         ++iRowEntryFEM, ++iEntryFEM) {
      const ordinal_type iColFEM = fem_graph[iRowFEM][iRowEntryFEM];
      for (ordinal_type k=0; k<stoch_length; ++k) {
        hay_expected(iRowFEM, k) +=
          generate_matrix_coefficient<scalar_type>(
            fem_length, stoch_length, iRowFEM, iColFEM, k) *
          generate_vector_coefficient<scalar_type>(
            fem_length, stoch_length, iColFEM, k );
      }
    }
  }
  Kokkos::deep_copy( ay_expected, hay_expected );

  //------------------------------
  // check

  typename block_vector_type::array_type ay = y;
  scalar_type rel_tol = ScalarTol<scalar_type>::tol();
  scalar_type abs_tol = ScalarTol<scalar_type>::tol();
  bool success = compare_rank_2_views(ay, ay_expected, rel_tol, abs_tol, out);

  return success;
}
예제 #3
0
std::vector<double>
test_scalar_spmv(const int ensemble_length,
                 const int nGrid,
                 const int iterCount,
                 Kokkos::DeviceConfig dev_config)
{
  typedef ScalarType value_type;
  typedef OrdinalType ordinal_type;
  typedef Device device_type;
  typedef Kokkos::View< value_type*, device_type > vector_type;
  typedef Kokkos::CrsMatrix< value_type, ordinal_type, device_type > matrix_type;
  typedef typename matrix_type::StaticCrsGraphType matrix_graph_type;
  typedef typename matrix_type::values_type matrix_values_type;

  //------------------------------
  // Generate graph for "FEM" box structure:

  std::vector< std::vector<size_t> > fem_graph;
  const size_t fem_length = nGrid * nGrid * nGrid;
  const size_t graph_length = generate_fem_graph( nGrid , fem_graph );

  //------------------------------
  // Generate input multivector:

  std::vector<vector_type> x(ensemble_length);
  std::vector<vector_type> y(ensemble_length);
  for (int e=0; e<ensemble_length; ++e) {
    x[e] = vector_type(Kokkos::allocate_without_initializing,
                       "x", fem_length);
    y[e] = vector_type(Kokkos::allocate_without_initializing,
                       "y", fem_length);

    Kokkos::deep_copy( x[e] , value_type(1.0) );
    Kokkos::deep_copy( y[e] , value_type(0.0) );
  }

  //------------------------------

  std::vector<matrix_type> matrix(ensemble_length);
  for (int e=0; e<ensemble_length; ++e) {
    matrix_graph_type matrix_graph =
      Kokkos::create_staticcrsgraph<matrix_graph_type>(
        std::string("test crs graph"), fem_graph);
    matrix_values_type matrix_values =
      matrix_values_type(Kokkos::allocate_without_initializing,
                         "matrix", graph_length);
    matrix[e] = matrix_type("matrix", fem_length, matrix_values, matrix_graph);

    Kokkos::deep_copy( matrix[e].values , value_type(1.0) );
  }

  //------------------------------

  // One iteration to warm up
  for (int iter = 0; iter < iterCount; ++iter) {
    for (int e=0; e<ensemble_length; ++e) {
      Kokkos::MV_Multiply( y[e], matrix[e], x[e] );
    }
  }

  device_type::fence();
  Kokkos::Impl::Timer clock ;
  for (int iter = 0; iter < iterCount; ++iter) {
    for (int e=0; e<ensemble_length; ++e) {
      Kokkos::MV_Multiply( y[e], matrix[e], x[e] );
    }
  }
  device_type::fence();

  const double seconds_per_iter = clock.seconds() / ((double) iterCount );
  const double flops = 1.0e-9 * 2.0 * graph_length * ensemble_length;

  std::vector<double> perf(5);
  perf[0] = fem_length;
  perf[1] = ensemble_length;
  perf[2] = graph_length;
  perf[3] = seconds_per_iter;
  perf[4] = flops / seconds_per_iter;
  return perf;
}