KOKKOS_INLINE_FUNCTION
    void operator()( size_t i ) const
    {
        if ( i < m_elem_node.dimension_0() * m_elem_node.dimension_1() ) {

            const size_t ielem = i / ElemNode ;
            const size_t inode = i % ElemNode ;

            unsigned elem_grid[SpaceDim] ;
            unsigned node_grid[SpaceDim] ;

            m_box_part.uses_elem_coord( ielem , elem_grid );

            enum { elem_node_scale = Order == BoxElemPart::ElemLinear ? 1 :
                                     Order == BoxElemPart::ElemQuadratic ? 2 : 0
                 };

            node_grid[0] = elem_node_scale * elem_grid[0] + m_elem_node_local[inode][0] ;
            node_grid[1] = elem_node_scale * elem_grid[1] + m_elem_node_local[inode][1] ;
            node_grid[2] = elem_node_scale * elem_grid[2] + m_elem_node_local[inode][2] ;

            m_elem_node(ielem,inode) = m_box_part.local_node_id( node_grid );
        }

        if ( i < m_node_grid.dimension_0() ) {
            unsigned node_grid[SpaceDim] ;
            m_box_part.local_node_coord( i , node_grid );
            m_node_grid(i,0) = node_grid[0] ;
            m_node_grid(i,1) = node_grid[1] ;
            m_node_grid(i,2) = node_grid[2] ;

            m_coord_map( node_grid[0] ,
                         node_grid[1] ,
                         node_grid[2] ,
                         m_node_coord(i,0) ,
                         m_node_coord(i,1) ,
                         m_node_coord(i,2) );
        }

        if ( i < m_recv_node.dimension_0() ) {
            m_recv_node(i,0) = m_box_part.recv_node_rank(i);
            m_recv_node(i,1) = m_box_part.recv_node_count(i);
        }

        if ( i < m_send_node.dimension_0() ) {
            m_send_node(i,0) = m_box_part.send_node_rank(i);
            m_send_node(i,1) = m_box_part.send_node_count(i);
        }

        if ( i < m_send_node_id.dimension_0() ) {
            m_send_node_id(i) = m_box_part.send_node_id(i);
        }
    }
	static void GEMM(Teuchos::ETransp transA, Teuchos::ETransp transB, Scalar alpha,
          Kokkos::View<Scalar**,Kokkos::LayoutRight,Kokkos::DefaultExecutionSpace> A, Kokkos::View<Scalar**,Kokkos::LayoutRight,Kokkos::DefaultExecutionSpace> B,
          Scalar beta, Kokkos::View<Scalar**,Kokkos::LayoutRight,Kokkos::DefaultExecutionSpace> C){
	Teuchos::BLAS<int,Scalar>blas;
	    const int m = static_cast<int> (C.dimension_0 ()),
        n = static_cast<int> (C.dimension_1 ()),
        k = (transA == Teuchos::NO_TRANS ? A.dimension_1 () : A.dimension_0 ());
   
	blas.GEMM(transB, transA, n, m, k, alpha,
                   B.ptr_on_device(), n,
                   A.ptr_on_device(), k,
                   beta, C.ptr_on_device(), n);
					   
					   
	}
 KOKKOS_INLINE_FUNCTION
 ~NestedView()
 { 
   if ( member.dimension_0() ) {
     Kokkos::atomic_add( & member(0) , -1 );
   }
 }
 KOKKOS_INLINE_FUNCTION
 NestedView & operator = ( const Kokkos::View<int*,Space> & lhs )
   {
     member = lhs ;
     if ( member.dimension_0() ) Kokkos::atomic_add( & member(0) , 1 );
     return *this ;
   }
TEST_F( KokkosThreads, LambdaInitialize)
{
  Kokkos::View<unsigned*[COMPILE_TIME_DIMENSION], KOKKOS_THREAD_DEVICE> a( Kokkos::ViewAllocateWithoutInitializing("node views"), RUN_TIME_DIMENSION);

  Kokkos::parallel_for<KOKKOS_THREAD_DEVICE>(
    a.dimension_0() ,
    [=](size_t i) {
      for (size_t x=0; x < a.dimension_1(); ++x) {
        a(i,x) = i;
      }
    }
  );

  Kokkos::View<const unsigned*[COMPILE_TIME_DIMENSION], KOKKOS_THREAD_DEVICE> b = a;

  int num_error = 0;
  // Cannot portably call a GTEST macro in parallel
  // count the errors and test that they are equal to zero
  Kokkos::parallel_reduce<KOKKOS_THREAD_DEVICE, int /*reduction value type */>(
    b.dimension_0() ,
    [](int & local_errors)                                    // init lambda
    { local_errors = 0; } ,
    [=](size_t i, int & local_errors) {                       // operator() lambda
      for (size_t x=0; x < b.dimension_1(); ++x)
        local_errors += i == b(i,x) ? 0 : 1;
    } ,
    [](volatile int & dst_err, volatile int const& src_err)   // join lambda
    { dst_err += src_err; } ,
    num_errors                                                // where to store the result
  );
  EXPECT_EQ( 0, num_errors);

}
TEST_F( KokkosThreads, SerialInitialize)
{
  // allocate a rank 2 array witn that is RUN_TIME_DIMENSION x COMPILE_TIME_DIMENSION

  // View will default initialize all the values unless it is explicitly disabled, ie,
  // Kokkos::View<unsigned*[COMPILE_TIME_DIMENSION], KOKKOS_THREAD_DEVICE> a("node views", RUN_TIME_DIMENSION);
  // zero fills the array, but
  // Kokkos::View<unsigned*[COMPILE_TIME_DIMENSION], KOKKOS_THREAD_DEVICE> a( Kokkos::ViewAllocateWithoutInitializing("node views"), RUN_TIME_DIMENSION);
  // will allocate without initializing the array

  Kokkos::View<unsigned*[COMPILE_TIME_DIMENSION], KOKKOS_THREAD_DEVICE> a( Kokkos::ViewAllocateWithoutInitializing("node views"), RUN_TIME_DIMENSION);

  for (size_t i=0; i < a.dimension_0(); ++i) {
    for (size_t x=0; x < a.dimension_1(); ++x) {
      a(i,x) = i;
    }
  }

  // get a const view to the same array
  // this view shares the same memory as a, but cannot modify the values
  Kokkos::View<const unsigned*[COMPILE_TIME_DIMENSION], KOKKOS_THREAD_DEVICE> b = a;

  for (size_t i=0; i < b.dimension_0(); ++i) {
    for (size_t x=0; x < b.dimension_1(); ++x) {
      EXPECT_EQ(i, b(i,x));
    }
  }
}
  Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> > >
  Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::
  replaceCommWithSubset (const Teuchos::RCP<const Teuchos::Comm<int> >& newComm) const
  {
    using Teuchos::ArrayView;
    using Teuchos::outArg;
    using Teuchos::RCP;
    using Teuchos::REDUCE_MIN;
    using Teuchos::reduceAll;
    typedef global_size_t GST;
    typedef LocalOrdinal LO;
    typedef GlobalOrdinal GO;
    typedef Map<LO, GO, node_type> map_type;

    // mfh 26 Mar 2013: The lazy way to do this is simply to recreate
    // the Map by calling its ordinary public constructor, using the
    // original Map's data.  This only involves O(1) all-reduces over
    // the new communicator, which in the common case only includes a
    // small number of processes.

    // Create the Map to return.
    if (newComm.is_null ()) {
      return Teuchos::null; // my process does not participate in the new Map
    } else {
      // Map requires that the index base equal the global min GID.
      // Figuring out the global min GID requires a reduction over all
      // processes in the new communicator.  It could be that some (or
      // even all) of these processes contain zero entries.  (Recall
      // that this method, unlike removeEmptyProcesses(), may remove
      // an arbitrary subset of processes.)  We deal with this by
      // doing a min over the min GID on each process if the process
      // has more than zero entries, or the global max GID, if that
      // process has zero entries.  If no processes have any entries,
      // then the index base doesn't matter anyway.
      const GO myMinGid = (this->getNodeNumElements () == 0) ?
        this->getMaxAllGlobalIndex () : this->getMinGlobalIndex ();
      GO newIndexBase = this->getInvalidGlobalIndex ();
      reduceAll<int, GO> (*newComm, REDUCE_MIN, myMinGid, outArg (newIndexBase));

      // Make Map's constructor compute the global number of indices.
      const GST globalNumInds = Teuchos::OrdinalTraits<GST>::invalid ();

      if (mapDevice_.initialized ()) {
        Kokkos::View<const GO*, DeviceType> myGIDs =
          mapDevice_.getMyGlobalIndices ();
        return rcp (new map_type (globalNumInds, myGIDs, newIndexBase,
                                  newComm, this->getNode ()));
      }
      else {
        Kokkos::View<const GO*, host_mirror_device_type> myGidsHostView =
          mapHost_.getMyGlobalIndices ();
        ArrayView<const GO> myGidsArrayView (myGidsHostView.ptr_on_device (),
                                             myGidsHostView.dimension_0 ());
        return rcp (new map_type (globalNumInds, myGidsArrayView, newIndexBase,
                                  newComm, this->getNode ()));
      }
    }
  }
Exemple #8
0
 KOKKOS_INLINE_FUNCTION
 void operator() (int i) const {
   double tmp = 0.0;
   for(int j = 0; j < idx.dimension_1(); j++) {
     const double val = src(idx(i,j));
     tmp += val*val + 0.5*(idx.dimension_0()*val -idx.dimension_1()*val);
   }
   dest(i) += tmp;
 }
   /** Access the local IDs for an element. The local ordering is according to
     * the <code>getOwnedAndSharedIndices</code> method. Note
     */
   void getElementLIDs(Kokkos::View<const int*,PHX::Device> cellIds,
                       Kokkos::View<LocalOrdinalT**,PHX::Device> lids) const
   { 
     CopyCellLIDsFunctor functor;
     functor.cellIds = cellIds;
     functor.global_lids = localIDs_k_;
     functor.local_lids = lids; // we assume this array is sized correctly!

     Kokkos::parallel_for(cellIds.dimension_0(),functor);
   }
    BoxElemFixture( const BoxElemPart::Decompose decompose ,
                    const unsigned global_size ,
                    const unsigned global_rank ,
                    const unsigned elem_nx ,
                    const unsigned elem_ny ,
                    const unsigned elem_nz ,
                    const float bubble_x = 1.1f ,
                    const float bubble_y = 1.2f ,
                    const float bubble_z = 1.3f )
        : m_box_part( Order , decompose , global_size , global_rank , elem_nx , elem_ny , elem_nz )
        , m_coord_map( m_box_part.global_coord_max(0) ,
                       m_box_part.global_coord_max(1) ,
                       m_box_part.global_coord_max(2) ,
                       bubble_x ,
                       bubble_y ,
                       bubble_z )
        , m_node_coord( "fixture_node_coord" , m_box_part.uses_node_count() )
        , m_node_grid(  "fixture_node_grid" , m_box_part.uses_node_count() )
        , m_elem_node(  "fixture_elem_node" , m_box_part.uses_elem_count() )
        , m_recv_node(  "fixture_recv_node" , m_box_part.recv_node_msg_count() )
        , m_send_node(  "fixture_send_node" , m_box_part.send_node_msg_count() )
        , m_send_node_id( "fixture_send_node_id" , m_box_part.send_node_id_count() )
    {
        {
            const hex_data elem_data ;

            for ( unsigned i = 0 ; i < ElemNode ; ++i ) {
                m_elem_node_local[i][0] = elem_data.eval_map[i][0] ;
                m_elem_node_local[i][1] = elem_data.eval_map[i][1] ;
                m_elem_node_local[i][2] = elem_data.eval_map[i][2] ;
                m_elem_node_local[i][3] = 0 ;
            }
        }

        const size_t nwork =
            std::max( m_recv_node.dimension_0() ,
                      std::max( m_send_node.dimension_0() ,
                                std::max( m_send_node_id.dimension_0() ,
                                          std::max( m_node_grid.dimension_0() ,
                                                  m_elem_node.dimension_0() * m_elem_node.dimension_1() ))));

        Kokkos::parallel_for( nwork , *this );
    }
  Teuchos::ArrayView<const GlobalOrdinal>
  Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::
  getNodeElementList () const
  {
    typedef GlobalOrdinal GO;
    Kokkos::View<const GO*, host_mirror_device_type> myGlobalInds =
      mapHost_.getMyGlobalIndices (); // creates it if it doesn't exist

    return Teuchos::ArrayView<const GO> (myGlobalInds.ptr_on_device (),
                                         myGlobalInds.dimension_0 ());
  }
Exemple #12
0
		static void GEMM(Teuchos::ETransp transA, Teuchos::ETransp transB, Scalar alpha,
           Kokkos::View<Scalar***,Kokkos::LayoutRight,Kokkos::DefaultExecutionSpace> A,  Kokkos::View<Scalar***,Kokkos::LayoutRight,Kokkos::DefaultExecutionSpace> B,
          Scalar beta, Kokkos::View<Scalar***,Kokkos::LayoutRight,Kokkos::DefaultExecutionSpace> C){
		const int m = static_cast<int> (C.dimension_1()),
        n = static_cast<int> (C.dimension_2 ()),
        k = (transA == Teuchos::NO_TRANS ? A.dimension_2 () : A.dimension_1 ());
Teuchos::BLAS<int,Scalar>blas;
Kokkos::parallel_for(C.dimension_0(),KOKKOS_LAMBDA (const size_t i) {
        blas.GEMM(transB, transA, n, m, k, alpha,
                   &B(i,0,0), n,
                   &A(i,0,0), k,
                   beta, &C(i,0,0), n);
});
Exemple #13
0
void modified_gram_schmidt(

  const Kokkos::View< ScalarQ ** ,
                           Kokkos::LayoutLeft ,
                           DeviceType ,
                           Management > & Q ,

  const Kokkos::View< ScalarR ** ,
                           Kokkos::LayoutLeft ,
                           DeviceType ,
                           Management > & R ,

  comm::Machine machine )
{
  const Kokkos::ALL ALL ;

  typedef Kokkos::View< ScalarQ * ,
                             Kokkos::LayoutLeft ,
                             DeviceType ,
                             Kokkos::MemoryUnmanaged >
    vector_view_type ;

  const typename
    Kokkos::View< ScalarR** ,
                       Kokkos::LayoutLeft ,
                       DeviceType >::
    HostMirror hostR = Kokkos::create_mirror_view( R );

  const int length = Q.dimension_0();
  const int count  = Q.dimension_1();

  for ( int j = 0 ; j < count ; ++j ) {

    const vector_view_type  Qj = Kokkos::subview< vector_view_type >( Q , ALL , j );

    // reads  += length
    // writes += 0
    // flops  += 1 + 2 * length
    const double norm_Qj = Kokkos::norm2( length , Qj , machine );

    hostR(j,j) = norm_Qj ;

    // reads  += length
    // writes += length
    // flops  += 1 + length
    Kokkos::scale( length , 1.0 / norm_Qj , Qj );

    for ( int k = j + 1 ; k < count ; ++k ) {

      const vector_view_type  Qk = Kokkos::subview< vector_view_type >( Q , ALL , k );

      // reads  += 2 * length
      // writes += 0
      // flops  += 2 * length
      const double Qj_dot_Qk =
        Kokkos::dot( length , Qj , Qk , machine );

      hostR(j,k) = Qj_dot_Qk ;

      // reads  += 2 * length
      // writes += length
      // flops += 2 * length
      Kokkos::axpy( length , - Qj_dot_Qk , Qj , Qk );
    }
  }

  // reads  += 0
  // writes += count * count
  Kokkos::deep_copy( R , hostR );
}
 KOKKOS_INLINE_FUNCTION
 unsigned elem_count() const {
     return m_elem_node.dimension_0();
 }
 KOKKOS_INLINE_FUNCTION
 unsigned node_count() const {
     return m_node_grid.dimension_0();
 }
 GatherTranspose( multi_vector_type& xt,
                  const multi_vector_type& x,
                  const Kokkos::View<Ordinal*,device_type>& col ) :
   m_xt(xt), m_x(x), m_col(col), m_ncol(col.dimension_0()) {}