virtual void copy(DataSpace<DIM3> &hostCurrentSize)
        {
            cudaPitchedPtr hostPtr;
            hostPtr.pitch = this->host->getDataSpace()[0] * sizeof (TYPE);
            hostPtr.ptr = this->host->getBasePointer();
            hostPtr.xsize = this->host->getDataSpace()[0] * sizeof (TYPE);
            hostPtr.ysize = this->host->getDataSpace()[1];

            cudaMemcpy3DParms params;
            params.dstArray = NULL;
            params.dstPos = make_cudaPos(this->device->getOffset()[0] * sizeof (TYPE),
                                         this->device->getOffset()[1],
                                         this->device->getOffset()[2]);
            params.dstPtr = this->device->getCudaPitched();

            params.srcArray = NULL;
            params.srcPos = make_cudaPos(0, 0, 0);
            params.srcPtr = hostPtr;

            params.extent = make_cudaExtent(
                                            hostCurrentSize[0] * sizeof (TYPE),
                                            hostCurrentSize[1],
                                            hostCurrentSize[2]);
            params.kind = cudaMemcpyHostToDevice;

            CUDA_CHECK(cudaMemcpy3DAsync(&params, this->getCudaStream()));
        }
Exemplo n.º 2
0
        virtual void copy(DataSpace<DIM3> &devCurrentSize)
        {

            cudaMemcpy3DParms params;

            //  assert(this->source->getDataSpace().productOfComponents() <= this->destination->getDataSpace().productOfComponents());

            params.srcArray = NULL;
            params.srcPos = make_cudaPos(
                                         this->source->getOffset()[0] * sizeof (TYPE),
                                         this->source->getOffset()[1],
                                         this->source->getOffset()[2]);
            params.srcPtr = this->source->getCudaPitched();

            params.dstArray = NULL;
            params.dstPos = make_cudaPos(
                                         this->destination->getOffset()[0] * sizeof (TYPE),
                                         this->destination->getOffset()[1],
                                         this->destination->getOffset()[2]);
            ;
            params.dstPtr = this->destination->getCudaPitched();

            params.extent = make_cudaExtent(
                                            devCurrentSize[0] * sizeof (TYPE),
                                            devCurrentSize[1],
                                            devCurrentSize[2]);
            params.kind = cudaMemcpyDeviceToDevice;
            CUDA_CHECK(cudaMemcpy3DAsync(&params, this->getCudaStream()));
        }
Exemplo n.º 3
0
 void operator()(Type* dest, const math::Size_t<2u> pitchDest, 
                 Type* source, const math::Size_t<2u> pitchSource, const math::Size_t<3>& size,
                 flags::Memcopy::Direction direction)
 {
         const cudaMemcpyKind kind[] = {cudaMemcpyHostToDevice, cudaMemcpyDeviceToHost,
                                  cudaMemcpyHostToHost, cudaMemcpyDeviceToDevice};
                                  
         cudaPitchedPtr pitchedPtrDest;
         pitchedPtrDest.pitch = pitchDest.x(); pitchedPtrDest.ptr = dest;
         pitchedPtrDest.xsize = size.x(); pitchedPtrDest.ysize = size.y();
         cudaPitchedPtr pitchedPtrSource;
         pitchedPtrSource.pitch = pitchSource.x(); pitchedPtrSource.ptr = source;
         pitchedPtrSource.xsize = size.x(); pitchedPtrSource.ysize = size.y();
         
         cudaMemcpy3DParms params;
         params.srcArray = NULL;
         params.srcPos = make_cudaPos(0,0,0);
         params.srcPtr = pitchedPtrSource;
         params.dstArray = NULL;
         params.dstPos = make_cudaPos(0,0,0);
         params.dstPtr = pitchedPtrDest;
         params.extent = make_cudaExtent(size.x() * sizeof(Type), size.y(), size.z());
         params.kind = kind[direction];
         CUDA_CHECK_NO_EXCEP(cudaMemcpy3D(&params));
 }                    
bool ControlCubeCache::_readElement(NodeLinkedList<index_node_t> * element)
{
	#ifndef NDEBUG
	if ((int)element->element > _maxNumCubes) 
	{
		std::cerr<<"Control Cube CPU Cache, try to write outside reserved memory"<<std::endl;
		throw;
	}
	#endif
	index_node_t idCube = element->id;
	float * cube = (_memory + element->element*_sizeElement);

	if (!checkCubeInside(element->id))
	{
		if (cudaSuccess != cudaMemset((void*)cube, 0, _sizeElement*sizeof(float)))
		{
			std::cout<<"---> "<<idCube<<" "<<_minValue<<" "<<_maxValue<<std::endl;
			LBERROR<<"Control Cube Cache: error copying to a device: "<<cudaGetErrorString(cudaGetLastError()) <<" "<<cube<<" "<<_sizeElement<<std::endl;
			throw;
		}
		return true;
	}

	index_node_t idCubeCPU = idCube >> 3*(_levelCube - _cpuCache->getCubeLevel()); 
	float * pCube = _cpuCache->getAndBlockElement(idCubeCPU);

	if (pCube != 0)
	{
		vmml::vector<3, int> coord = getMinBoxIndex2(idCube, _levelCube, _nLevels);
		vmml::vector<3, int> coordC = getMinBoxIndex2(idCubeCPU, _cpuCache->getCubeLevel(), _nLevels);
		coord -= coordC;
		vmml::vector<3, int> realDimCPU = _cpuCache->getRealCubeDim();

		cudaMemcpy3DParms myParms = {0};
		myParms.srcPtr = make_cudaPitchedPtr((void*)pCube, realDimCPU.z()*sizeof(float), realDimCPU.x(), realDimCPU.y()); 
		//myParms.dstPtr = make_cudaPitchedPtr((void*)cube, _realcubeDim.z()*sizeof(float), _realcubeDim.x(), _realcubeDim.y()); 
		myParms.dstPtr = make_cudaPitchedPtr((void*)cube, _dimCube*sizeof(float), _dimCube, _dimCube); 
		myParms.extent = make_cudaExtent(_dimCube*sizeof(float), _dimCube, _dimCube);
		myParms.dstPos = make_cudaPos(0,0,0);
		myParms.srcPos = make_cudaPos(coord.z()*sizeof(float), coord.y(), coord.x());
		myParms.kind = cudaMemcpyHostToDevice;

		if (cudaSuccess != cudaMemcpy3DAsync(&myParms, _stream) || cudaSuccess != cudaStreamSynchronize(_stream))
		{
			std::cout<<"---> "<<idCube<<" "<<_minValue<<" "<<_maxValue<<std::endl;
			LBERROR<<"Control Cube Cache: error copying to a device: "<<cudaGetErrorString(cudaGetLastError()) <<" "<<cube<<" "<<pCube<<" "<<_sizeElement<<std::endl;
			throw;
		}

		_cpuCache->unlockElement(idCubeCPU);
		return true;
	}
	else
	{
		return false;
	}
}
Exemplo n.º 5
0
void cpyD2H() {
    // D2H“]‘—
    cudaMemcpy3DParms parms = { 0 };
    parms.srcPos = make_cudaPos(0, 0, 0);
    parms.srcPtr = make_cudaPitchedPtr(vdm.dPtr, sizeof(TYPE) * buf.pitch.x, buf.pitch.x, buf.pitch.y);
    parms.dstPos = make_cudaPos(sizeof(TYPE) * buf.lower.x, buf.lower.y, buf.lower.z);
    parms.dstPtr = make_cudaPitchedPtr(vdm.hPtr, sizeof(TYPE) * vdm.pitch.x, vdm.pitch.x, vdm.pitch.y);
    parms.extent = make_cudaExtent(sizeof(TYPE) * buf.pitch.x, buf.pitch.y, buf.pitch.z);
    parms.kind = cudaMemcpyDeviceToHost;
    cudaMemcpy3D(&parms);
}
Exemplo n.º 6
0
void CudaImagePyramidHost::copyFromHost(int width, int height, const void* source, int layer)
{
    assert(isInitialized());
    assert(_textureType == cudaTextureType2DLayered);

    cudaMemcpy3DParms myParms = {0};
    myParms.srcPtr = make_cudaPitchedPtr((void*)source,width*_typeSize,width,height);
    myParms.srcPos = make_cudaPos(0,0,0);
    myParms.dstArray = _storage;
    myParms.dstPos = make_cudaPos(0,0,layer);
    myParms.extent = make_cudaExtent(width,height,1);
    myParms.kind = cudaMemcpyHostToDevice;
    cudaMemcpy3D(&myParms);

    checkCUDAError("Memcpy error", _name);
}
Exemplo n.º 7
0
void MemManager::cpyD2H(Task task) {
    cudaStream_t stream = task.stream;
    for (auto vdm : vdms) {
        VDMBuffer buf = vdm.bufs[task.n];

        // D2H転送
        cudaMemcpy3DParms parms = { 0 };
        parms.srcPos = make_cudaPos(0, 0, 0);
        parms.srcPtr = make_cudaPitchedPtr(vdm.dPtr, sizeof(TYPE) * buf.pitchx(), buf.pitchx(), buf.pitchy());
        parms.dstPos = make_cudaPos(sizeof(TYPE) * buf.lower.x, buf.lower.y, buf.lower.z);
        parms.dstPtr = make_cudaPitchedPtr(vdm.hPtr, sizeof(TYPE) * vdm.pitch.x, vdm.pitch.x, vdm.pitch.y);
        parms.extent = make_cudaExtent(sizeof(TYPE) * buf.pitchx(), buf.pitchy(), buf.pitchz());
        parms.kind = cudaMemcpyDeviceToHost;
        cudaMemcpy3DAsync(&parms, stream);
    }
}
Exemplo n.º 8
0
void MemManager::cpyH2D(Task task) {
    cudaStream_t stream = task.stream;
    for (auto vdm : vdms) {
        VDMBuffer buf = vdm.bufs[task.n];

        // H2D転送
        cudaMemcpy3DParms parms = { 0 };
        parms.dstPos = make_cudaPos(0, 0, 0);
        parms.dstPtr = make_cudaPitchedPtr(vdm.dPtr, sizeof(TYPE) * buf.pitchx(), buf.pitchx(), buf.pitchy());
        parms.srcPos = make_cudaPos(sizeof(TYPE) * buf.lower.x, buf.lower.y, buf.lower.z);
        parms.srcPtr = make_cudaPitchedPtr(vdm.hPtr, sizeof(TYPE) * vdm.pitch.x, vdm.pitch.x, vdm.pitch.y);
        parms.extent = make_cudaExtent(sizeof(TYPE) * buf.pitchx(), buf.pitchy(), buf.pitchz());
        parms.kind = cudaMemcpyHostToDevice;
        cudaMemcpy3DAsync(&parms, stream);
    }

    cudaMemcpyAsync(confsetDevPtr, confsetHosPtrs[task.n], sizeof(MKConf)*nVdm, cudaMemcpyHostToDevice, stream);
}
Exemplo n.º 9
0
    void Buffer3D::setData( void const* srcData, dp::math::Vec3ui const& srcOffset, dp::math::Vec3ui const& srcStride, dp::math::Vec3ui const& srcExtent, dp::math::Vec3ui const& dstOffset )
    {
#if !defined(NDEBUG)
      for ( int i=0 ; i<3 ; i++ )
      {
        DP_ASSERT( srcOffset[i] + ( srcExtent[i] ? srcExtent[i] : m_extent[i] ) <= m_extent[i] );
        DP_ASSERT( dstOffset[i] + ( srcExtent[i] ? srcExtent[i] : m_extent[i] ) <= m_extent[i] );
      }
#endif
      cudaMemcpy3DParms parms = { 0 };
      parms.srcPos = make_cudaPos( m_elementSize * srcOffset[0], srcOffset[1], srcOffset[2] );
      parms.srcPtr = make_cudaPitchedPtr( const_cast<void *>(srcData), m_elementSize * ( srcStride[0] ? srcStride[0] : m_extent[0] ), srcStride[0] ? srcStride[0] : m_extent[0], srcStride[1] ? srcStride[1] : m_extent[1] );
      parms.dstPos = make_cudaPos( m_elementSize * dstOffset[0], dstOffset[1], dstOffset[2] );
      parms.dstPtr = m_pitchedPtr;
      parms.extent = make_cudaExtent( m_elementSize * ( srcExtent[0] ? srcExtent[0] : m_extent[0] ), srcExtent[1] ? srcExtent[1] : m_extent[1], srcExtent[2] ? srcExtent[2] : m_extent[2] );
      parms.kind = cudaMemcpyHostToDevice;
      CUDA_VERIFY( cudaMemcpy3D( &parms ) );
    }
Exemplo n.º 10
0
    void Buffer3D::getData( dp::cuda::BufferHostSharedPtr const& dstBuffer, dp::math::Vec3ui const& dstOffset, dp::math::Vec3ui const& dstStride, dp::math::Vec3ui const& dstExtent
                          , dp::math::Vec3ui const& srcOffset, dp::cuda::StreamSharedPtr const& stream )
    {
#if !defined(NDEBUG)
      for ( int i=0 ; i<3 ; i++ )
      {
        DP_ASSERT( dstOffset[i] + ( dstExtent[i] ? dstExtent[i] : m_extent[i] ) <= m_extent[i] );
        DP_ASSERT( srcOffset[i] + ( dstExtent[i] ? dstExtent[i] : m_extent[i] ) <= m_extent[i] );
      }
#endif
      cudaMemcpy3DParms parms = { 0 };
      parms.srcPos = make_cudaPos( m_elementSize * srcOffset[0], srcOffset[1], srcOffset[2] );
      parms.srcPtr = m_pitchedPtr;
      parms.dstPos = make_cudaPos( m_elementSize * dstOffset[0], dstOffset[1], dstOffset[2] );
      parms.dstPtr = make_cudaPitchedPtr( dstBuffer->getPointer<void>(), m_elementSize * ( dstStride[0] ? dstStride[0] : m_extent[0] ), dstStride[0] ? dstStride[0] : m_extent[0], dstStride[1] ? dstStride[1] : m_extent[1] );
      parms.extent = make_cudaExtent( m_elementSize * ( dstExtent[0] ? dstExtent[0] : m_extent[0] ), dstExtent[1] ? dstExtent[1] : m_extent[1], dstExtent[2] ? dstExtent[2] : m_extent[2] );
      parms.kind = cudaMemcpyDeviceToHost;
      CUDA_VERIFY( cudaMemcpy3DAsync( &parms ) );
    }