KOKKOS_INLINE_FUNCTION void operator() (const typename ViewType::size_type i) const { // On CPUs this loop could be vectorized so j should do stride 1 // access on a for optimal performance. I.e. a should be LayoutRight. // On GPUs threads should do coalesced loads and stores. That means // that i should be the stride one access for optimal performance. for (typename ViewType::size_type j = 0; j < a.dimension_1 (); ++j) { a(i,j) = 1.0*a.dimension_0()*i + 1.0*j; } }
size_t getStride2DView (ViewType A) { size_t stride[8]; A.stride (stride); return A.dimension_1 () > 1 ? stride[1] : A.dimension_0 (); }