virtual void copy(DataSpace<DIM3> &hostCurrentSize) { cudaPitchedPtr hostPtr; hostPtr.pitch = this->host->getDataSpace()[0] * sizeof (TYPE); hostPtr.ptr = this->host->getBasePointer(); hostPtr.xsize = this->host->getDataSpace()[0] * sizeof (TYPE); hostPtr.ysize = this->host->getDataSpace()[1]; cudaMemcpy3DParms params; params.dstArray = NULL; params.dstPos = make_cudaPos(this->device->getOffset()[0] * sizeof (TYPE), this->device->getOffset()[1], this->device->getOffset()[2]); params.dstPtr = this->device->getCudaPitched(); params.srcArray = NULL; params.srcPos = make_cudaPos(0, 0, 0); params.srcPtr = hostPtr; params.extent = make_cudaExtent( hostCurrentSize[0] * sizeof (TYPE), hostCurrentSize[1], hostCurrentSize[2]); params.kind = cudaMemcpyHostToDevice; CUDA_CHECK(cudaMemcpy3DAsync(¶ms, this->getCudaStream())); }
virtual void copy(DataSpace<DIM3> &devCurrentSize) { cudaMemcpy3DParms params; // assert(this->source->getDataSpace().productOfComponents() <= this->destination->getDataSpace().productOfComponents()); params.srcArray = NULL; params.srcPos = make_cudaPos( this->source->getOffset()[0] * sizeof (TYPE), this->source->getOffset()[1], this->source->getOffset()[2]); params.srcPtr = this->source->getCudaPitched(); params.dstArray = NULL; params.dstPos = make_cudaPos( this->destination->getOffset()[0] * sizeof (TYPE), this->destination->getOffset()[1], this->destination->getOffset()[2]); ; params.dstPtr = this->destination->getCudaPitched(); params.extent = make_cudaExtent( devCurrentSize[0] * sizeof (TYPE), devCurrentSize[1], devCurrentSize[2]); params.kind = cudaMemcpyDeviceToDevice; CUDA_CHECK(cudaMemcpy3DAsync(¶ms, this->getCudaStream())); }
void operator()(Type* dest, const math::Size_t<2u> pitchDest, Type* source, const math::Size_t<2u> pitchSource, const math::Size_t<3>& size, flags::Memcopy::Direction direction) { const cudaMemcpyKind kind[] = {cudaMemcpyHostToDevice, cudaMemcpyDeviceToHost, cudaMemcpyHostToHost, cudaMemcpyDeviceToDevice}; cudaPitchedPtr pitchedPtrDest; pitchedPtrDest.pitch = pitchDest.x(); pitchedPtrDest.ptr = dest; pitchedPtrDest.xsize = size.x(); pitchedPtrDest.ysize = size.y(); cudaPitchedPtr pitchedPtrSource; pitchedPtrSource.pitch = pitchSource.x(); pitchedPtrSource.ptr = source; pitchedPtrSource.xsize = size.x(); pitchedPtrSource.ysize = size.y(); cudaMemcpy3DParms params; params.srcArray = NULL; params.srcPos = make_cudaPos(0,0,0); params.srcPtr = pitchedPtrSource; params.dstArray = NULL; params.dstPos = make_cudaPos(0,0,0); params.dstPtr = pitchedPtrDest; params.extent = make_cudaExtent(size.x() * sizeof(Type), size.y(), size.z()); params.kind = kind[direction]; CUDA_CHECK_NO_EXCEP(cudaMemcpy3D(¶ms)); }
bool ControlCubeCache::_readElement(NodeLinkedList<index_node_t> * element) { #ifndef NDEBUG if ((int)element->element > _maxNumCubes) { std::cerr<<"Control Cube CPU Cache, try to write outside reserved memory"<<std::endl; throw; } #endif index_node_t idCube = element->id; float * cube = (_memory + element->element*_sizeElement); if (!checkCubeInside(element->id)) { if (cudaSuccess != cudaMemset((void*)cube, 0, _sizeElement*sizeof(float))) { std::cout<<"---> "<<idCube<<" "<<_minValue<<" "<<_maxValue<<std::endl; LBERROR<<"Control Cube Cache: error copying to a device: "<<cudaGetErrorString(cudaGetLastError()) <<" "<<cube<<" "<<_sizeElement<<std::endl; throw; } return true; } index_node_t idCubeCPU = idCube >> 3*(_levelCube - _cpuCache->getCubeLevel()); float * pCube = _cpuCache->getAndBlockElement(idCubeCPU); if (pCube != 0) { vmml::vector<3, int> coord = getMinBoxIndex2(idCube, _levelCube, _nLevels); vmml::vector<3, int> coordC = getMinBoxIndex2(idCubeCPU, _cpuCache->getCubeLevel(), _nLevels); coord -= coordC; vmml::vector<3, int> realDimCPU = _cpuCache->getRealCubeDim(); cudaMemcpy3DParms myParms = {0}; myParms.srcPtr = make_cudaPitchedPtr((void*)pCube, realDimCPU.z()*sizeof(float), realDimCPU.x(), realDimCPU.y()); //myParms.dstPtr = make_cudaPitchedPtr((void*)cube, _realcubeDim.z()*sizeof(float), _realcubeDim.x(), _realcubeDim.y()); myParms.dstPtr = make_cudaPitchedPtr((void*)cube, _dimCube*sizeof(float), _dimCube, _dimCube); myParms.extent = make_cudaExtent(_dimCube*sizeof(float), _dimCube, _dimCube); myParms.dstPos = make_cudaPos(0,0,0); myParms.srcPos = make_cudaPos(coord.z()*sizeof(float), coord.y(), coord.x()); myParms.kind = cudaMemcpyHostToDevice; if (cudaSuccess != cudaMemcpy3DAsync(&myParms, _stream) || cudaSuccess != cudaStreamSynchronize(_stream)) { std::cout<<"---> "<<idCube<<" "<<_minValue<<" "<<_maxValue<<std::endl; LBERROR<<"Control Cube Cache: error copying to a device: "<<cudaGetErrorString(cudaGetLastError()) <<" "<<cube<<" "<<pCube<<" "<<_sizeElement<<std::endl; throw; } _cpuCache->unlockElement(idCubeCPU); return true; } else { return false; } }
void cpyD2H() { // D2H“]‘— cudaMemcpy3DParms parms = { 0 }; parms.srcPos = make_cudaPos(0, 0, 0); parms.srcPtr = make_cudaPitchedPtr(vdm.dPtr, sizeof(TYPE) * buf.pitch.x, buf.pitch.x, buf.pitch.y); parms.dstPos = make_cudaPos(sizeof(TYPE) * buf.lower.x, buf.lower.y, buf.lower.z); parms.dstPtr = make_cudaPitchedPtr(vdm.hPtr, sizeof(TYPE) * vdm.pitch.x, vdm.pitch.x, vdm.pitch.y); parms.extent = make_cudaExtent(sizeof(TYPE) * buf.pitch.x, buf.pitch.y, buf.pitch.z); parms.kind = cudaMemcpyDeviceToHost; cudaMemcpy3D(&parms); }
void CudaImagePyramidHost::copyFromHost(int width, int height, const void* source, int layer) { assert(isInitialized()); assert(_textureType == cudaTextureType2DLayered); cudaMemcpy3DParms myParms = {0}; myParms.srcPtr = make_cudaPitchedPtr((void*)source,width*_typeSize,width,height); myParms.srcPos = make_cudaPos(0,0,0); myParms.dstArray = _storage; myParms.dstPos = make_cudaPos(0,0,layer); myParms.extent = make_cudaExtent(width,height,1); myParms.kind = cudaMemcpyHostToDevice; cudaMemcpy3D(&myParms); checkCUDAError("Memcpy error", _name); }
void MemManager::cpyD2H(Task task) { cudaStream_t stream = task.stream; for (auto vdm : vdms) { VDMBuffer buf = vdm.bufs[task.n]; // D2H転送 cudaMemcpy3DParms parms = { 0 }; parms.srcPos = make_cudaPos(0, 0, 0); parms.srcPtr = make_cudaPitchedPtr(vdm.dPtr, sizeof(TYPE) * buf.pitchx(), buf.pitchx(), buf.pitchy()); parms.dstPos = make_cudaPos(sizeof(TYPE) * buf.lower.x, buf.lower.y, buf.lower.z); parms.dstPtr = make_cudaPitchedPtr(vdm.hPtr, sizeof(TYPE) * vdm.pitch.x, vdm.pitch.x, vdm.pitch.y); parms.extent = make_cudaExtent(sizeof(TYPE) * buf.pitchx(), buf.pitchy(), buf.pitchz()); parms.kind = cudaMemcpyDeviceToHost; cudaMemcpy3DAsync(&parms, stream); } }
void MemManager::cpyH2D(Task task) { cudaStream_t stream = task.stream; for (auto vdm : vdms) { VDMBuffer buf = vdm.bufs[task.n]; // H2D転送 cudaMemcpy3DParms parms = { 0 }; parms.dstPos = make_cudaPos(0, 0, 0); parms.dstPtr = make_cudaPitchedPtr(vdm.dPtr, sizeof(TYPE) * buf.pitchx(), buf.pitchx(), buf.pitchy()); parms.srcPos = make_cudaPos(sizeof(TYPE) * buf.lower.x, buf.lower.y, buf.lower.z); parms.srcPtr = make_cudaPitchedPtr(vdm.hPtr, sizeof(TYPE) * vdm.pitch.x, vdm.pitch.x, vdm.pitch.y); parms.extent = make_cudaExtent(sizeof(TYPE) * buf.pitchx(), buf.pitchy(), buf.pitchz()); parms.kind = cudaMemcpyHostToDevice; cudaMemcpy3DAsync(&parms, stream); } cudaMemcpyAsync(confsetDevPtr, confsetHosPtrs[task.n], sizeof(MKConf)*nVdm, cudaMemcpyHostToDevice, stream); }
void Buffer3D::setData( void const* srcData, dp::math::Vec3ui const& srcOffset, dp::math::Vec3ui const& srcStride, dp::math::Vec3ui const& srcExtent, dp::math::Vec3ui const& dstOffset ) { #if !defined(NDEBUG) for ( int i=0 ; i<3 ; i++ ) { DP_ASSERT( srcOffset[i] + ( srcExtent[i] ? srcExtent[i] : m_extent[i] ) <= m_extent[i] ); DP_ASSERT( dstOffset[i] + ( srcExtent[i] ? srcExtent[i] : m_extent[i] ) <= m_extent[i] ); } #endif cudaMemcpy3DParms parms = { 0 }; parms.srcPos = make_cudaPos( m_elementSize * srcOffset[0], srcOffset[1], srcOffset[2] ); parms.srcPtr = make_cudaPitchedPtr( const_cast<void *>(srcData), m_elementSize * ( srcStride[0] ? srcStride[0] : m_extent[0] ), srcStride[0] ? srcStride[0] : m_extent[0], srcStride[1] ? srcStride[1] : m_extent[1] ); parms.dstPos = make_cudaPos( m_elementSize * dstOffset[0], dstOffset[1], dstOffset[2] ); parms.dstPtr = m_pitchedPtr; parms.extent = make_cudaExtent( m_elementSize * ( srcExtent[0] ? srcExtent[0] : m_extent[0] ), srcExtent[1] ? srcExtent[1] : m_extent[1], srcExtent[2] ? srcExtent[2] : m_extent[2] ); parms.kind = cudaMemcpyHostToDevice; CUDA_VERIFY( cudaMemcpy3D( &parms ) ); }
void Buffer3D::getData( dp::cuda::BufferHostSharedPtr const& dstBuffer, dp::math::Vec3ui const& dstOffset, dp::math::Vec3ui const& dstStride, dp::math::Vec3ui const& dstExtent , dp::math::Vec3ui const& srcOffset, dp::cuda::StreamSharedPtr const& stream ) { #if !defined(NDEBUG) for ( int i=0 ; i<3 ; i++ ) { DP_ASSERT( dstOffset[i] + ( dstExtent[i] ? dstExtent[i] : m_extent[i] ) <= m_extent[i] ); DP_ASSERT( srcOffset[i] + ( dstExtent[i] ? dstExtent[i] : m_extent[i] ) <= m_extent[i] ); } #endif cudaMemcpy3DParms parms = { 0 }; parms.srcPos = make_cudaPos( m_elementSize * srcOffset[0], srcOffset[1], srcOffset[2] ); parms.srcPtr = m_pitchedPtr; parms.dstPos = make_cudaPos( m_elementSize * dstOffset[0], dstOffset[1], dstOffset[2] ); parms.dstPtr = make_cudaPitchedPtr( dstBuffer->getPointer<void>(), m_elementSize * ( dstStride[0] ? dstStride[0] : m_extent[0] ), dstStride[0] ? dstStride[0] : m_extent[0], dstStride[1] ? dstStride[1] : m_extent[1] ); parms.extent = make_cudaExtent( m_elementSize * ( dstExtent[0] ? dstExtent[0] : m_extent[0] ), dstExtent[1] ? dstExtent[1] : m_extent[1], dstExtent[2] ? dstExtent[2] : m_extent[2] ); parms.kind = cudaMemcpyDeviceToHost; CUDA_VERIFY( cudaMemcpy3DAsync( &parms ) ); }