/*! \brief Allocate a parallel buffer, returning it as a pointer ecnapsulated in an ArrayRCP.

          Dereferencing the returned ArrayRCP or its underlying pointer in general results in undefined
          behavior outside of parallel computations.

          The buffer will be automatically freed by the Node when no more references remain.

          @tparam T The data type of the allocate buffer. This is used to perform alignment and determine the number of bytes to allocate.
          @param[in] size The size requested for the parallel buffer, greater than zero.

          \post The method will return an ArrayRCP encapsulating a pointer. The underlying pointer may be used in parallel computation routines,
                and is guaranteed to have size large enough to reference \c size number of entries of type \c T.
      */
      template <class T> inline
      ArrayRCP<T> allocBuffer(size_t size) {
        ArrayRCP<T> buff;
        if (size > 0) {
          buff = arcp<T>(size);
        }
        if (isHostNode == false) {
          MARK_COMPUTE_BUFFER(buff);
        }
        return buff;
      }
Beispiel #2
0
  template <class T> inline
  ArrayRCP<T> 
  CUDANodeMemoryModel::allocBuffer(size_t size) {
    // FINISH: if possible, check that there is room; else, boot someone
    T * devptr = NULL;
    const size_t sizeInBytes = sizeof(T)*size;
    if (size > 0) {
      cudaError_t err = cudaMalloc( (void**)&devptr, sizeInBytes );
      TEUCHOS_TEST_FOR_EXCEPTION( err != cudaSuccess, std::runtime_error,
        "Kokkos::CUDANodeMemoryModel::allocBuffer<" 
        << Teuchos::TypeNameTraits<T>::name () << ">: cudaMalloc() returned "
        "error: " << cudaGetErrorString (err) 
        );
#ifdef HAVE_KOKKOSCLASSIC_CUDA_NODE_MEMORY_PROFILING
      allocSize_ += sizeInBytes;
#endif
    }
    CUDANodeDeallocator dealloc(sizeInBytes,rcpFromRef(*this));
    const bool OwnsMem = true;
    ArrayRCP<T> buff = arcp<T>(devptr,0,size,dealloc,OwnsMem);
    MARK_COMPUTE_BUFFER(buff);
    return buff;
  }