Exemple #1
0
  template <class T> inline
  void CUDANodeMemoryModel::copyBuffers(size_t size, const ArrayRCP<const T> &buffSrc, const ArrayRCP<T> &buffDest) {
    CHECK_COMPUTE_BUFFER(buffSrc);
    CHECK_COMPUTE_BUFFER(buffDest);

    TEUCHOS_TEST_FOR_EXCEPTION( (size_t)buffDest.size() < size, std::runtime_error,
      "CUDANodeMemoryModel::copyBuffers<" 
      << Teuchos::TypeNameTraits<T>::name () 
      << ">: invalid copy.  Device destination buffer has size " << buffDest.size () 
      << ", which is less than the requested copy size " << size << ".");
    TEUCHOS_TEST_FOR_EXCEPTION( (size_t)buffSrc.size() < size, std::runtime_error,
      "CUDANodeMemoryModel::copyBuffers<" 
      << Teuchos::TypeNameTraits<T>::name () 
      << ">: invalid copy.  Device source buffer has size " << buffSrc.size () 
      << ", which is less than the requested copy size " << size << ".");

#ifdef HAVE_KOKKOSCLASSIC_CUDA_NODE_MEMORY_PROFILING
    ++numCopiesD2D_;
    bytesCopiedD2D_ += size*sizeof(T);
#endif
#ifdef HAVE_KOKKOSCLASSIC_CUDA_NODE_MEMORY_TRACE
    std::cerr << "copyBuffers<" << Teuchos::TypeNameTraits<T>::name() << "> of size " << sizeof(T) * size << std::endl;
#endif
    cudaError_t err = cudaMemcpy( buffDest.getRawPtr(), buffSrc.getRawPtr(), size*sizeof(T), cudaMemcpyDeviceToDevice);
    TEUCHOS_TEST_FOR_EXCEPTION( cudaSuccess != err, std::runtime_error,
      "Kokkos::CUDANodeMemoryModel::copyBuffers<"
      << Teuchos::TypeNameTraits<T>::name () 
      << ">(): cudaMemcpy() returned error: " << cudaGetErrorString (err)
      );
  }
      /*! \brief Copy data between buffers.

        @param[in]     size     The size of the copy, greater than zero.
        @param[in]     buffSrc  The source buffer, with length at least as large as \c size.
        @param[in,out] buffDest The destination buffer, with length at least as large as \c size.

        \post The data is guaranteed to have been copied before any other usage of buffSrc or buffDest occurs.
      */
      template <class T> inline
      void copyBuffers(size_t size, const ArrayRCP<const T> &buffSrc, const ArrayRCP<T> &buffDest) {
        if (isHostNode == false) {
          CHECK_COMPUTE_BUFFER(buffSrc);
          CHECK_COMPUTE_BUFFER(buffDest);
        }
        ArrayView<const T> av_src = buffSrc(0,size);
        ArrayView<T>       av_dst = buffDest(0,size);
        std::copy(av_src.begin(),av_src.end(),av_dst.begin());
      }
Exemple #3
0
  inline void CUDANodeMemoryModel::readyBuffers(ArrayView<ArrayRCP<const char> > buffers, ArrayView<ArrayRCP<char> > ncBuffers) {
#ifdef HAVE_KOKKOSCLASSIC_DEBUG
    for (size_t i=0; i < (size_t)buffers.size(); ++i) {
      CHECK_COMPUTE_BUFFER(buffers[i]);
    }
    for (size_t i=0; i < (size_t)ncBuffers.size(); ++i) {
      CHECK_COMPUTE_BUFFER(ncBuffers[i]);
    }
#endif
    (void)buffers;
    (void)ncBuffers;
  }
      inline void readyBuffers(ArrayView<ArrayRCP<const char> > buffers, ArrayView<ArrayRCP<char> > ncBuffers) {
#ifdef HAVE_KOKKOSCLASSIC_DEBUG
        if (isHostNode == false) {
          for (size_t i=0; i < (size_t)buffers.size(); ++i) {
            CHECK_COMPUTE_BUFFER(buffers[i]);
          }
          for (size_t i=0; i < (size_t)ncBuffers.size(); ++i) {
            CHECK_COMPUTE_BUFFER(ncBuffers[i]);
          }
        }
#endif
        (void)buffers;
        (void)ncBuffers;
      }
 //! \brief Return a const view of a buffer for use on the host.
 template <class T> inline
 ArrayRCP<const T> viewBuffer(size_t size, ArrayRCP<const T> buff) {
   if (isHostNode == false) {
     CHECK_COMPUTE_BUFFER(buff);
   }
   return buff.persistingView(0,size);
 }
 //! \brief Return a non-const view of a buffer for use on the host.
 template <class T> inline
 ArrayRCP<T> viewBufferNonConst(ReadWriteOption rw, size_t size, const ArrayRCP<T> &buff) {
   (void) rw; // Silence "unused parameter" compiler warning
   if (isHostNode == false) {
     CHECK_COMPUTE_BUFFER(buff);
   }
   return buff.persistingView(0,size);
 }
      /*! \brief Copy data to host memory from a parallel buffer.

          @param[in]  size        The number of entries to copy from \c hostSrc to \c buffDest.
          @param[in]  hostSrc     The location in host memory from where the data is copied.
          @param[out] buffDest    The parallel buffer to which the data is copied.

          \pre  \c size is non-negative.
          \pre  \c hostSrc has length equal to \c size.
          \pre  \c buffSrc has length at least <tt>size</tt>.
          \post On return, entries in the range <tt>[0 , size)</tt> of \c hostSrc are allowed to be written to. The data is guaranteed to be present in \c buffDest before it is used in a parallel computation.
      */
      template <class T> inline
      void copyToBuffer(size_t size, const ArrayView<const T> &hostSrc, const ArrayRCP<T> &buffDest) {
        if (isHostNode == false) {
          CHECK_COMPUTE_BUFFER(buffDest);
        }
        ArrayRCP<const T> buffSrc = arcpFromArrayView(hostSrc);
        copyBuffers<T>(size,buffSrc,buffDest);
      }
Exemple #8
0
  template <class T> inline
  ArrayRCP<const T> 
  CUDANodeMemoryModel::viewBuffer(size_t size, ArrayRCP<const T> buff) {
    CHECK_COMPUTE_BUFFER(buff);
    ArrayRCP<T> hostBuff;
    if (size != 0) {
      hostBuff = arcp<T>(size);
#ifdef HAVE_KOKKOSCLASSIC_CUDA_NODE_MEMORY_TRACE
      std::cerr << "viewBuffer() -> ";
#endif
      this->template copyFromBuffer<T>(size,buff,hostBuff());
    }
    return hostBuff;
  }
Exemple #9
0
  template <class T> inline
  ArrayRCP<T> 
  CUDANodeMemoryModel::viewBufferNonConst(ReadWriteOption rw, size_t size, const ArrayRCP<T> &buff) {
    CHECK_COMPUTE_BUFFER(buff);
    // Create a copy-back deallocator that copies back to buff.
    CUDANodeCopyBackDeallocator<T> dealloc(buff.persistingView(0,size), rcpFromRef(*this));
    // It allocates a host buffer with the appropriate deallocator embedded.
    ArrayRCP<T> hostBuff = dealloc.alloc();
    if (rw == ReadWrite) {
#ifdef HAVE_KOKKOSCLASSIC_CUDA_NODE_MEMORY_TRACE
      std::cerr << "viewBufferNonConst(ReadWrite) -> ";
#endif
      this->template copyFromBuffer<T>(size, buff, hostBuff());
    }  
    else {
#ifdef HAVE_KOKKOSCLASSIC_CUDA_NODE_MEMORY_TRACE
      std::cerr << "viewBufferNonConst(WriteOnly)" << std::endl;
#endif
    }
    // else rw == WriteOnly, and we need no copy
    return hostBuff;
  }