예제 #1
0
 KOKKOS_INLINE_FUNCTION
 void operator() (const typename ViewType::size_type i) const {
   // On CPUs this loop could be vectorized so j should do stride 1
   // access on a for optimal performance. I.e. a should be LayoutRight.
   // On GPUs threads should do coalesced loads and stores. That means
   // that i should be the stride one access for optimal performance.
   for (typename ViewType::size_type j = 0; j < a.dimension_1 (); ++j) {
     a(i,j) = 1.0*a.dimension_0()*i + 1.0*j;
   }
 }
 MPVectorAtomicFunctor( const ViewType & v , const scalar_type & s ) : m_v( v ), m_s( s )
 {
   Kokkos::parallel_for( m_v.dimension_0() , *this );
 }
 GetMeanValsFunc(const ViewType& vals)
 {
   mean_vals = ViewType("mean-values", vals.dimension_0());
   Kokkos::deep_copy( mean_vals, vals );
 }
 // Kernel launch
 static void apply(const ViewType& v, const ScalarType& s) {
   const size_type nrow = v.dimension_0();
   Kokkos::parallel_for( nrow, ScalarAssignKernel(v,s) );
 }
예제 #5
0
 size_t getStride2DView (ViewType A) {
   size_t stride[8];
   A.stride (stride);
   return A.dimension_1 () > 1 ? stride[1] : A.dimension_0 ();
 }