Example #1
0
	static void GEMM(Teuchos::ETransp transA, Teuchos::ETransp transB, Scalar alpha,
          Kokkos::View<Scalar**,Kokkos::LayoutRight,Kokkos::DefaultExecutionSpace> A, Kokkos::View<Scalar**,Kokkos::LayoutRight,Kokkos::DefaultExecutionSpace> B,
          Scalar beta, Kokkos::View<Scalar**,Kokkos::LayoutRight,Kokkos::DefaultExecutionSpace> C){
	Teuchos::BLAS<int,Scalar>blas;
	    const int m = static_cast<int> (C.dimension_0 ()),
        n = static_cast<int> (C.dimension_1 ()),
        k = (transA == Teuchos::NO_TRANS ? A.dimension_1 () : A.dimension_0 ());
   
	blas.GEMM(transB, transA, n, m, k, alpha,
                   B.ptr_on_device(), n,
                   A.ptr_on_device(), k,
                   beta, C.ptr_on_device(), n);
					   
					   
	}
  Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> > >
  Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::
  replaceCommWithSubset (const Teuchos::RCP<const Teuchos::Comm<int> >& newComm) const
  {
    using Teuchos::ArrayView;
    using Teuchos::outArg;
    using Teuchos::RCP;
    using Teuchos::REDUCE_MIN;
    using Teuchos::reduceAll;
    typedef global_size_t GST;
    typedef LocalOrdinal LO;
    typedef GlobalOrdinal GO;
    typedef Map<LO, GO, node_type> map_type;

    // mfh 26 Mar 2013: The lazy way to do this is simply to recreate
    // the Map by calling its ordinary public constructor, using the
    // original Map's data.  This only involves O(1) all-reduces over
    // the new communicator, which in the common case only includes a
    // small number of processes.

    // Create the Map to return.
    if (newComm.is_null ()) {
      return Teuchos::null; // my process does not participate in the new Map
    } else {
      // Map requires that the index base equal the global min GID.
      // Figuring out the global min GID requires a reduction over all
      // processes in the new communicator.  It could be that some (or
      // even all) of these processes contain zero entries.  (Recall
      // that this method, unlike removeEmptyProcesses(), may remove
      // an arbitrary subset of processes.)  We deal with this by
      // doing a min over the min GID on each process if the process
      // has more than zero entries, or the global max GID, if that
      // process has zero entries.  If no processes have any entries,
      // then the index base doesn't matter anyway.
      const GO myMinGid = (this->getNodeNumElements () == 0) ?
        this->getMaxAllGlobalIndex () : this->getMinGlobalIndex ();
      GO newIndexBase = this->getInvalidGlobalIndex ();
      reduceAll<int, GO> (*newComm, REDUCE_MIN, myMinGid, outArg (newIndexBase));

      // Make Map's constructor compute the global number of indices.
      const GST globalNumInds = Teuchos::OrdinalTraits<GST>::invalid ();

      if (mapDevice_.initialized ()) {
        Kokkos::View<const GO*, DeviceType> myGIDs =
          mapDevice_.getMyGlobalIndices ();
        return rcp (new map_type (globalNumInds, myGIDs, newIndexBase,
                                  newComm, this->getNode ()));
      }
      else {
        Kokkos::View<const GO*, host_mirror_device_type> myGidsHostView =
          mapHost_.getMyGlobalIndices ();
        ArrayView<const GO> myGidsArrayView (myGidsHostView.ptr_on_device (),
                                             myGidsHostView.dimension_0 ());
        return rcp (new map_type (globalNumInds, myGidsArrayView, newIndexBase,
                                  newComm, this->getNode ()));
      }
    }
  }
  Teuchos::ArrayView<const GlobalOrdinal>
  Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::
  getNodeElementList () const
  {
    typedef GlobalOrdinal GO;
    Kokkos::View<const GO*, host_mirror_device_type> myGlobalInds =
      mapHost_.getMyGlobalIndices (); // creates it if it doesn't exist

    return Teuchos::ArrayView<const GO> (myGlobalInds.ptr_on_device (),
                                         myGlobalInds.dimension_0 ());
  }
void multiply(const CrsMatrix< float , Kokkos::OpenMP >& A,
              const Kokkos::View< float* , Kokkos::OpenMP >& x,
              Kokkos::View< float* , Kokkos::OpenMP >& y,
              MKLMultiply tag)
{
  MKL_INT n = A.graph.row_map.dimension_0() - 1 ;
  float *A_values = A.values.ptr_on_device() ;
  MKL_INT *col_indices = A.graph.entries.ptr_on_device() ;
  MKL_INT *row_beg = const_cast<MKL_INT*>(A.graph.row_map.ptr_on_device()) ;
  MKL_INT *row_end = row_beg+1;
  char matdescra[6] = { 'G', 'x', 'N', 'C', 'x', 'x' };
  char trans = 'N';
  float alpha = 1.0;
  float beta = 0.0;

  float *x_values = x.ptr_on_device() ;
  float *y_values = y.ptr_on_device() ;

  mkl_scsrmv(&trans, &n, &n, &alpha, matdescra, A_values, col_indices,
             row_beg, row_end, x_values, &beta, y_values);
}
FieldContainer_Kokkos(Kokkos::View<ScalarPoindex_typeer,Kokkos::LayoutLeft,Kokkos::OpenMP>& InContainer){
dim0=dim[0]=InContainer.dimension(0);
dim1=dim[1]=InContainer.dimension(1);
dim2=dim[2]=InContainer.dimension(2);
dim3=dim[3]=InContainer.dimension(3);
dim4=dim[4]=InContainer.dimension(4);
dim5=dim[5]=InContainer.dimension(5);
dim6=dim[6]=InContainer.dimension(6);
dim7=dim[7]=InContainer.dimension(7);
rankValue=Kokkos::View<ScalarPoindex_typeer,Kokkos::LayoutLeft,Kokkos::OpenMP>::Rank;
intepidManaged=false;
switch(rankValue){
case 1:
sizeValue=dim0;
break;

case 2:
sizeValue=dim0*dim1;
break;

case 3:
sizeValue=dim0*dim1*dim2;
break;

case 4:
sizeValue=dim0*dim1*dim2*dim3;
break;

case 5:
sizeValue=dim0*dim1*dim2*dim3*dim4;
break;

case 6:
sizeValue=dim0*dim1*dim2*dim3*dim4*dim5;
break;

case 7:
sizeValue=dim0*dim1*dim2*dim3*dim4*dim5*dim6;
break;

case 8:
sizeValue=dim0*dim1*dim2*dim3*dim4*dim5*dim6*dim7;
break;

}
containerMemory=InContainer.ptr_on_device();


}
Example #6
0
  KOKKOS_INLINE_FUNCTION
  void operator()( const typename policy_type::member_type ind , value_type & error ) const
  {
    if ( 0 == ind.league_rank() && 0 == ind.team_rank() ) {
      const long int thread_count = ind.league_size() * ind.team_size();
      total() = ( thread_count * ( thread_count + 1 ) ) / 2 ;
    }

    // Team max:
    const int long m = ind.team_reduce( (long int) ( ind.league_rank() + ind.team_rank() ) , JoinMax() );

    if ( m != ind.league_rank() + ( ind.team_size() - 1 ) ) {
#ifndef __KALMAR_ACCELERATOR__
      printf("ScanTeamFunctor[%d.%d of %d.%d] reduce_max_answer(%ld) != reduce_max(%ld)\n"
            , ind.league_rank(), ind.team_rank()
            , ind.league_size(), ind.team_size()
            , (long int)(ind.league_rank() + ( ind.team_size() - 1 )) , m );
#endif
    }

    // Scan:
    const long int answer =
      ( ind.league_rank() + 1 ) * ind.team_rank() +
      ( ind.team_rank() * ( ind.team_rank() + 1 ) ) / 2 ;

    const long int result =
      ind.team_scan( ind.league_rank() + 1 + ind.team_rank() + 1 );

    const long int result2 =
      ind.team_scan( ind.league_rank() + 1 + ind.team_rank() + 1 );

    if ( answer != result || answer != result2 ) {
#ifndef __KALMAR_ACCELERATOR__
      printf("ScanTeamFunctor[%d.%d of %d.%d] answer(%ld) != scan_first(%ld) or scan_second(%ld)\n",
             ind.league_rank(), ind.team_rank(),
             ind.league_size(), ind.team_size(),
             answer,result,result2);
#endif
      error = 1 ;
    }

    const long int thread_rank = ind.team_rank() +
                                 ind.team_size() * ind.league_rank();
    ind.team_scan( 1 + thread_rank , accum.ptr_on_device() );
  }
Example #7
0
 SortView( const Kokkos::View<ValueType*,Kokkos::Cuda> v , int begin , int end )
   {
     thrust::sort( thrust::device_ptr<ValueType>( v.ptr_on_device() + begin )
                 , thrust::device_ptr<ValueType>( v.ptr_on_device() + end ) );
   }
Example #8
0
 SortView( const Kokkos::View<ValueType*,ExecSpace> v , int begin , int end )
   {
     std::sort( v.ptr_on_device() + begin , v.ptr_on_device() + end );
   }