virtual void copy(DataSpace<DIM3> &devCurrentSize) { cudaMemcpy3DParms params; // assert(this->source->getDataSpace().productOfComponents() <= this->destination->getDataSpace().productOfComponents()); params.srcArray = NULL; params.srcPos = make_cudaPos( this->source->getOffset()[0] * sizeof (TYPE), this->source->getOffset()[1], this->source->getOffset()[2]); params.srcPtr = this->source->getCudaPitched(); params.dstArray = NULL; params.dstPos = make_cudaPos( this->destination->getOffset()[0] * sizeof (TYPE), this->destination->getOffset()[1], this->destination->getOffset()[2]); ; params.dstPtr = this->destination->getCudaPitched(); params.extent = make_cudaExtent( devCurrentSize[0] * sizeof (TYPE), devCurrentSize[1], devCurrentSize[2]); params.kind = cudaMemcpyDeviceToDevice; CUDA_CHECK(cudaMemcpy3DAsync(¶ms, this->getCudaStream())); }
void operator()(Type* dest, const math::Size_t<2u> pitchDest, Type* source, const math::Size_t<2u> pitchSource, const math::Size_t<3>& size, flags::Memcopy::Direction direction) { const cudaMemcpyKind kind[] = {cudaMemcpyHostToDevice, cudaMemcpyDeviceToHost, cudaMemcpyHostToHost, cudaMemcpyDeviceToDevice}; cudaPitchedPtr pitchedPtrDest; pitchedPtrDest.pitch = pitchDest.x(); pitchedPtrDest.ptr = dest; pitchedPtrDest.xsize = size.x(); pitchedPtrDest.ysize = size.y(); cudaPitchedPtr pitchedPtrSource; pitchedPtrSource.pitch = pitchSource.x(); pitchedPtrSource.ptr = source; pitchedPtrSource.xsize = size.x(); pitchedPtrSource.ysize = size.y(); cudaMemcpy3DParms params; params.srcArray = NULL; params.srcPos = make_cudaPos(0,0,0); params.srcPtr = pitchedPtrSource; params.dstArray = NULL; params.dstPos = make_cudaPos(0,0,0); params.dstPtr = pitchedPtrDest; params.extent = make_cudaExtent(size.x() * sizeof(Type), size.y(), size.z()); params.kind = kind[direction]; CUDA_CHECK_NO_EXCEP(cudaMemcpy3D(¶ms)); }
//! @brief Copy the ND-array device array to host array. //! You must allocate the array with the appropriate size. __host__ inline void copy_to_host(T *host_data) const { if (N == 2) { SHAKTI_SAFE_CUDA_CALL(cudaMemcpy2D( host_data, _sizes[0] * sizeof(T), _data, _pitch, _sizes[0] * sizeof(T), _sizes[1], cudaMemcpyDeviceToHost)); } else if (N == 3) { cudaMemcpy3DParms params = { 0 }; params.srcPtr = make_cudaPitchedPtr(reinterpret_cast<void *>(_data), _pitch, _sizes[0], _sizes[1]); params.dstPtr = make_cudaPitchedPtr(host_data, _sizes[0] * sizeof(T), _sizes[0], _sizes[1]); params.extent = make_cudaExtent(_sizes[0]*sizeof(T), _sizes[1], _sizes[2]); params.kind = cudaMemcpyDeviceToHost; SHAKTI_SAFE_CUDA_CALL(cudaMemcpy3D(¶ms)); } else SHAKTI_SAFE_CUDA_CALL(cudaMemcpy(host_data, _data, sizeof(T) * size(), cudaMemcpyDeviceToHost)); }
virtual void copy(DataSpace<DIM3> &hostCurrentSize) { cudaPitchedPtr hostPtr; hostPtr.pitch = this->host->getDataSpace()[0] * sizeof (TYPE); hostPtr.ptr = this->host->getBasePointer(); hostPtr.xsize = this->host->getDataSpace()[0] * sizeof (TYPE); hostPtr.ysize = this->host->getDataSpace()[1]; cudaMemcpy3DParms params; params.dstArray = NULL; params.dstPos = make_cudaPos(this->device->getOffset()[0] * sizeof (TYPE), this->device->getOffset()[1], this->device->getOffset()[2]); params.dstPtr = this->device->getCudaPitched(); params.srcArray = NULL; params.srcPos = make_cudaPos(0, 0, 0); params.srcPtr = hostPtr; params.extent = make_cudaExtent( hostCurrentSize[0] * sizeof (TYPE), hostCurrentSize[1], hostCurrentSize[2]); params.kind = cudaMemcpyHostToDevice; CUDA_CHECK(cudaMemcpy3DAsync(¶ms, this->getCudaStream())); }
bool ControlCubeCache::_readElement(NodeLinkedList<index_node_t> * element) { #ifndef NDEBUG if ((int)element->element > _maxNumCubes) { std::cerr<<"Control Cube CPU Cache, try to write outside reserved memory"<<std::endl; throw; } #endif index_node_t idCube = element->id; float * cube = (_memory + element->element*_sizeElement); if (!checkCubeInside(element->id)) { if (cudaSuccess != cudaMemset((void*)cube, 0, _sizeElement*sizeof(float))) { std::cout<<"---> "<<idCube<<" "<<_minValue<<" "<<_maxValue<<std::endl; LBERROR<<"Control Cube Cache: error copying to a device: "<<cudaGetErrorString(cudaGetLastError()) <<" "<<cube<<" "<<_sizeElement<<std::endl; throw; } return true; } index_node_t idCubeCPU = idCube >> 3*(_levelCube - _cpuCache->getCubeLevel()); float * pCube = _cpuCache->getAndBlockElement(idCubeCPU); if (pCube != 0) { vmml::vector<3, int> coord = getMinBoxIndex2(idCube, _levelCube, _nLevels); vmml::vector<3, int> coordC = getMinBoxIndex2(idCubeCPU, _cpuCache->getCubeLevel(), _nLevels); coord -= coordC; vmml::vector<3, int> realDimCPU = _cpuCache->getRealCubeDim(); cudaMemcpy3DParms myParms = {0}; myParms.srcPtr = make_cudaPitchedPtr((void*)pCube, realDimCPU.z()*sizeof(float), realDimCPU.x(), realDimCPU.y()); //myParms.dstPtr = make_cudaPitchedPtr((void*)cube, _realcubeDim.z()*sizeof(float), _realcubeDim.x(), _realcubeDim.y()); myParms.dstPtr = make_cudaPitchedPtr((void*)cube, _dimCube*sizeof(float), _dimCube, _dimCube); myParms.extent = make_cudaExtent(_dimCube*sizeof(float), _dimCube, _dimCube); myParms.dstPos = make_cudaPos(0,0,0); myParms.srcPos = make_cudaPos(coord.z()*sizeof(float), coord.y(), coord.x()); myParms.kind = cudaMemcpyHostToDevice; if (cudaSuccess != cudaMemcpy3DAsync(&myParms, _stream) || cudaSuccess != cudaStreamSynchronize(_stream)) { std::cout<<"---> "<<idCube<<" "<<_minValue<<" "<<_maxValue<<std::endl; LBERROR<<"Control Cube Cache: error copying to a device: "<<cudaGetErrorString(cudaGetLastError()) <<" "<<cube<<" "<<pCube<<" "<<_sizeElement<<std::endl; throw; } _cpuCache->unlockElement(idCubeCPU); return true; } else { return false; } }
inline cudaArray* MallocArray3D< uchar4 >( VolumeDescription volumeDescription ) { cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc( 8, 8, 8, 8, cudaChannelFormatKindUnsigned ); cudaExtent volumeExtent = make_cudaExtent( volumeDescription.numVoxels.x, volumeDescription.numVoxels.y, volumeDescription.numVoxels.z ); cudaArray* cuArray; MOJO_CUDA_SAFE( cudaMalloc3DArray( &cuArray, &channelDesc, volumeExtent ) ); return cuArray; }
void SingleParticle2dx::Methods::CUDAProjectionMethod::prepareForProjections(SingleParticle2dx::DataStructures::ParticleContainer& cont) { cudaSetDevice(getMyGPU()); cudaStreamCreate(&m_stream); cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat); cudaExtent VS = make_cudaExtent(m_size, m_size, m_size); if( m_alloc_done == false ) { cudaMalloc3DArray(&m_cuArray, &channelDesc, VS); } SingleParticle2dx::real_array3d_type real_data( boost::extents[m_size][m_size][m_size] ); m_context->getRealSpaceData(real_data); unsigned int size = m_size*m_size*m_size*sizeof(float); if( m_alloc_done == false ) { res_data_h = (float*)malloc(m_size*m_size*sizeof(float)); cudaMalloc((void**)&res_data_d, m_size*m_size*sizeof(float)); m_alloc_done = true; } cudaMemcpy3DParms copyParams = {0}; copyParams.srcPtr = make_cudaPitchedPtr((void*)real_data.origin(), VS.width*sizeof(float), VS.width, VS.height); copyParams.dstArray = m_cuArray; copyParams.extent = VS; copyParams.kind = cudaMemcpyHostToDevice; // cudaMemcpy3D(©Params); cudaMemcpy3DAsync(©Params, m_stream); struct cudaResourceDesc resDesc; memset(&resDesc, 0, sizeof(resDesc)); resDesc.resType = cudaResourceTypeArray; resDesc.res.array.array = m_cuArray; struct cudaTextureDesc texDesc; memset(&texDesc, 0, sizeof(texDesc)); texDesc.addressMode[0] = cudaAddressModeClamp; texDesc.addressMode[1] = cudaAddressModeClamp; texDesc.addressMode[2] = cudaAddressModeClamp; texDesc.filterMode = cudaFilterModeLinear; texDesc.readMode = cudaReadModeElementType; texDesc.normalizedCoords = 0; if(m_alloc_done == true) { cudaDestroyTextureObject(m_texObj); } m_texObj = 0; cudaCreateTextureObject(&m_texObj, &resDesc, &texDesc, NULL); }
void cpyD2H() { // D2H“]‘— cudaMemcpy3DParms parms = { 0 }; parms.srcPos = make_cudaPos(0, 0, 0); parms.srcPtr = make_cudaPitchedPtr(vdm.dPtr, sizeof(TYPE) * buf.pitch.x, buf.pitch.x, buf.pitch.y); parms.dstPos = make_cudaPos(sizeof(TYPE) * buf.lower.x, buf.lower.y, buf.lower.z); parms.dstPtr = make_cudaPitchedPtr(vdm.hPtr, sizeof(TYPE) * vdm.pitch.x, vdm.pitch.x, vdm.pitch.y); parms.extent = make_cudaExtent(sizeof(TYPE) * buf.pitch.x, buf.pitch.y, buf.pitch.z); parms.kind = cudaMemcpyDeviceToHost; cudaMemcpy3D(&parms); }
void Buffer3D::fill( int value, dp::math::Vec3ui const& offset, dp::math::Vec3ui const& extent ) { #if !defined(NDEBUG) for ( int i=0 ; i<3 ; i++ ) { DP_ASSERT( offset[i] + ( extent[i] ? extent[i] : m_extent[i] ) <= m_extent[i] ); } #endif DP_ASSERT( ( offset[0] == 0 ) && ( offset[1] == 0 ) && ( offset[2] == 0 ) ); CUDA_VERIFY( cudaMemset3D( m_pitchedPtr, value, make_cudaExtent( m_elementSize * ( extent[0] ? extent[0] : m_extent[0] ), extent[1] ? extent[1] : m_extent[1], extent[2] ? extent[2] : m_extent[2] ) ) ); }
TEST(Malloc3DArray, NegativeChannels) { struct cudaArray * ary; struct cudaChannelFormatDesc dsc; dsc.x = dsc.y = dsc.z = 8; dsc.w = -8; dsc.f = cudaChannelFormatKindSigned; cudaError_t ret; ret = cudaMalloc3DArray(&ary, &dsc, make_cudaExtent(1, 1, 1), 0); EXPECT_EQ(cudaErrorInvalidChannelDescriptor, ret); }
void CudaImagePyramidHost::copyFromHost(int width, int height, const void* source, int layer) { assert(isInitialized()); assert(_textureType == cudaTextureType2DLayered); cudaMemcpy3DParms myParms = {0}; myParms.srcPtr = make_cudaPitchedPtr((void*)source,width*_typeSize,width,height); myParms.srcPos = make_cudaPos(0,0,0); myParms.dstArray = _storage; myParms.dstPos = make_cudaPos(0,0,layer); myParms.extent = make_cudaExtent(width,height,1); myParms.kind = cudaMemcpyHostToDevice; cudaMemcpy3D(&myParms); checkCUDAError("Memcpy error", _name); }
void MemManager::cpyD2H(Task task) { cudaStream_t stream = task.stream; for (auto vdm : vdms) { VDMBuffer buf = vdm.bufs[task.n]; // D2H転送 cudaMemcpy3DParms parms = { 0 }; parms.srcPos = make_cudaPos(0, 0, 0); parms.srcPtr = make_cudaPitchedPtr(vdm.dPtr, sizeof(TYPE) * buf.pitchx(), buf.pitchx(), buf.pitchy()); parms.dstPos = make_cudaPos(sizeof(TYPE) * buf.lower.x, buf.lower.y, buf.lower.z); parms.dstPtr = make_cudaPitchedPtr(vdm.hPtr, sizeof(TYPE) * vdm.pitch.x, vdm.pitch.x, vdm.pitch.y); parms.extent = make_cudaExtent(sizeof(TYPE) * buf.pitchx(), buf.pitchy(), buf.pitchz()); parms.kind = cudaMemcpyDeviceToHost; cudaMemcpy3DAsync(&parms, stream); } }
TEST(Malloc3DArray, Attributes) { struct cudaArray * ary; struct cudaChannelFormatDesc dsc; dsc.x = dsc.y = dsc.z = dsc.w = 8; dsc.f = cudaChannelFormatKindSigned; cudaError_t ret; ret = cudaMalloc3DArray(&ary, &dsc, make_cudaExtent(1, 1, 1), 0); ASSERT_EQ(cudaSuccess, ret); struct cudaPointerAttributes attr; ret = cudaPointerGetAttributes(&attr, ary); EXPECT_EQ(cudaErrorInvalidValue, ret); EXPECT_EQ(cudaSuccess, cudaFreeArray(ary)); }
void Buffer3D::setData( void const* srcData, dp::math::Vec3ui const& srcOffset, dp::math::Vec3ui const& srcStride, dp::math::Vec3ui const& srcExtent, dp::math::Vec3ui const& dstOffset ) { #if !defined(NDEBUG) for ( int i=0 ; i<3 ; i++ ) { DP_ASSERT( srcOffset[i] + ( srcExtent[i] ? srcExtent[i] : m_extent[i] ) <= m_extent[i] ); DP_ASSERT( dstOffset[i] + ( srcExtent[i] ? srcExtent[i] : m_extent[i] ) <= m_extent[i] ); } #endif cudaMemcpy3DParms parms = { 0 }; parms.srcPos = make_cudaPos( m_elementSize * srcOffset[0], srcOffset[1], srcOffset[2] ); parms.srcPtr = make_cudaPitchedPtr( const_cast<void *>(srcData), m_elementSize * ( srcStride[0] ? srcStride[0] : m_extent[0] ), srcStride[0] ? srcStride[0] : m_extent[0], srcStride[1] ? srcStride[1] : m_extent[1] ); parms.dstPos = make_cudaPos( m_elementSize * dstOffset[0], dstOffset[1], dstOffset[2] ); parms.dstPtr = m_pitchedPtr; parms.extent = make_cudaExtent( m_elementSize * ( srcExtent[0] ? srcExtent[0] : m_extent[0] ), srcExtent[1] ? srcExtent[1] : m_extent[1], srcExtent[2] ? srcExtent[2] : m_extent[2] ); parms.kind = cudaMemcpyHostToDevice; CUDA_VERIFY( cudaMemcpy3D( &parms ) ); }
void MemManager::cpyH2D(Task task) { cudaStream_t stream = task.stream; for (auto vdm : vdms) { VDMBuffer buf = vdm.bufs[task.n]; // H2D転送 cudaMemcpy3DParms parms = { 0 }; parms.dstPos = make_cudaPos(0, 0, 0); parms.dstPtr = make_cudaPitchedPtr(vdm.dPtr, sizeof(TYPE) * buf.pitchx(), buf.pitchx(), buf.pitchy()); parms.srcPos = make_cudaPos(sizeof(TYPE) * buf.lower.x, buf.lower.y, buf.lower.z); parms.srcPtr = make_cudaPitchedPtr(vdm.hPtr, sizeof(TYPE) * vdm.pitch.x, vdm.pitch.x, vdm.pitch.y); parms.extent = make_cudaExtent(sizeof(TYPE) * buf.pitchx(), buf.pitchy(), buf.pitchz()); parms.kind = cudaMemcpyHostToDevice; cudaMemcpy3DAsync(&parms, stream); } cudaMemcpyAsync(confsetDevPtr, confsetHosPtrs[task.n], sizeof(MKConf)*nVdm, cudaMemcpyHostToDevice, stream); }
void Buffer3D::getData( dp::cuda::BufferHostSharedPtr const& dstBuffer, dp::math::Vec3ui const& dstOffset, dp::math::Vec3ui const& dstStride, dp::math::Vec3ui const& dstExtent , dp::math::Vec3ui const& srcOffset, dp::cuda::StreamSharedPtr const& stream ) { #if !defined(NDEBUG) for ( int i=0 ; i<3 ; i++ ) { DP_ASSERT( dstOffset[i] + ( dstExtent[i] ? dstExtent[i] : m_extent[i] ) <= m_extent[i] ); DP_ASSERT( srcOffset[i] + ( dstExtent[i] ? dstExtent[i] : m_extent[i] ) <= m_extent[i] ); } #endif cudaMemcpy3DParms parms = { 0 }; parms.srcPos = make_cudaPos( m_elementSize * srcOffset[0], srcOffset[1], srcOffset[2] ); parms.srcPtr = m_pitchedPtr; parms.dstPos = make_cudaPos( m_elementSize * dstOffset[0], dstOffset[1], dstOffset[2] ); parms.dstPtr = make_cudaPitchedPtr( dstBuffer->getPointer<void>(), m_elementSize * ( dstStride[0] ? dstStride[0] : m_extent[0] ), dstStride[0] ? dstStride[0] : m_extent[0], dstStride[1] ? dstStride[1] : m_extent[1] ); parms.extent = make_cudaExtent( m_elementSize * ( dstExtent[0] ? dstExtent[0] : m_extent[0] ), dstExtent[1] ? dstExtent[1] : m_extent[1], dstExtent[2] ? dstExtent[2] : m_extent[2] ); parms.kind = cudaMemcpyDeviceToHost; CUDA_VERIFY( cudaMemcpy3DAsync( &parms ) ); }
const char *sOriginal[] = { "volume.ppm", NULL }; const char *sReference[] = { "ref_volume.ppm", NULL }; const char *sSDKsample = "CUDA 3D Volume Render"; char *volumeFilename = "Bucky.raw"; cudaExtent volumeSize = make_cudaExtent(32, 32, 32); typedef unsigned char VolumeType; //char *volumeFilename = "mrt16_angio.raw"; //cudaExtent volumeSize = make_cudaExtent(416, 512, 112); //typedef unsigned short VolumeType; uint width = 512, height = 512; dim3 blockSize(16, 16); dim3 gridSize; float3 viewRotation; float3 viewTranslation = make_float3(0.0, 0.0, -4.0f); float invViewMatrix[12]; float density = 0.05f;
Buffer3D::Buffer3D( size_t elementSize, dp::math::Vec3ui const& extent ) : m_elementSize( elementSize ) , m_extent( extent ) { CUDA_VERIFY( cudaMalloc3D( &m_pitchedPtr, make_cudaExtent( m_elementSize * m_extent[0], m_extent[1], m_extent[2] ) ) ); }
void VolSkin::init( int width, int height, TetMesh *tm ) { this->width = width; this->height = height; tetMesh = tm; // TEMP initialize volume data cudaExtent volumeSize = make_cudaExtent(128, 128, 128); //cudaExtent volumeSize = make_cudaExtent(256, 256, 256); // generate raw volume data float *h_densityData = (float*)malloc( sizeof(float)*volumeSize.width*volumeSize.height*volumeSize.depth ); math::PerlinNoise pn; pn.setDepth( 4 ); pn.setFrequency(3.0f); //pn.setInflection(true); for( int k=0;k<volumeSize.depth;++k ) for( int j=0;j<volumeSize.height;++j ) for( int i=0;i<volumeSize.width;++i ) { int index = k*volumeSize.width*volumeSize.height + j*volumeSize.width + i; math::Vec3f uvw( (float)(i)/(float)(volumeSize.width), (float)(j)/(float)(volumeSize.height), (float)(k)/(float)(volumeSize.depth)); float t = (float)(j)/(float)(volumeSize.height); //h_densityData[index] = 0.5f; //h_densityData[index] = (1.0f-t)*1.0f; h_densityData[index] = std::max( 0.0f, pn.perlinNoise_3D( uvw.x, uvw.y*2.0, uvw.z ) )*1.0f; // cylinder //h_densityData[index] = std::max( 0.0f, pn.perlinNoise_3D( uvw.x*2.0f, uvw.y*2.0f, uvw.z*2.0f ))*1.0f; // tetraeder //h_densityData[index] = (uvw.getLength() < 0.2f ? 1.0f : 0.0f)*2.0f; } // create 3D array d_densityArray = 0; cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>(); cudaMalloc3DArray(&d_densityArray, &channelDesc, volumeSize); // copy data to 3D array cudaMemcpy3DParms copyParams = {0}; copyParams.srcPtr = make_cudaPitchedPtr((void*)h_densityData, volumeSize.width*sizeof(float), volumeSize.width, volumeSize.height); copyParams.dstArray = d_densityArray; copyParams.extent = volumeSize; copyParams.kind = cudaMemcpyHostToDevice; cudaMemcpy3D(©Params); // TMP /* h_debugVec.resize( 1000.0f ); d_debugVec = h_debugVec; h_debugInfo.samples = convertToKernel(d_debugVec); h_debugInfo.numSamples = 0; cudaMemcpyToSymbol( d_debugInfo, &h_debugInfo, sizeof(DebugInfo), 0, cudaMemcpyHostToDevice ); */ // setup lighting m_light0.cam = base::CameraPtr( new base::Camera() ); m_light0.cam->m_aspectRatio = 1.0; //m_light0.cam->m_transform = math::createLookAtMatrix( math::Vec3f( -2.0f, -2.0f, 2.0f ), math::Vec3f( 0.0f, 0.0f, 0.0f ), math::Vec3f( 0.0f, 1.0f, 0.0f ), false ); //m_light0.cam->m_transform = math::Matrix44f::TranslationMatrix( 0.3f, 0.15f, 2.0f ); //m_light0.cam->m_transform = math::Matrix44f::TranslationMatrix( -3.0f, 0.0f, 0.0f ); m_light0.cam->m_transform = math::createLookAtMatrix( math::Vec3f( 4.0f, 0.0f, 0.0f ), math::Vec3f( 0.0f, 0.0f, 0.0f ), math::Vec3f( 0.0f, 1.0f, 0.0f ), false ); m_light0.cam->update(); cudaMalloc( &m_light0.d_dctCoefficients, width*height*sizeof(float)*8 );// 8 floats /6 coefficients // set defaults setTotalCrossSection( 10.0f ); setAlbedo( 1.0f ); setAbsorptionColor( math::Vec3f(0.5f,0.5f, 0.5f) ); setScatteringColor(math::Vec3f(0.5f, 0.5f, 0.5f)); setLight(0, math::Vec3f(1.0f, 1.0f, 1.0f), 0.0f); setTime( 0.0f ); setStepSize( 0.01f ); // get tetmesh onto gpu gpuUploadTetMesh(); }
TEST(Malloc3DArray, NullArguments) { struct cudaArray * ary; struct cudaChannelFormatDesc dsc; dsc.x = dsc.y = dsc.z = dsc.w = 8; dsc.f = cudaChannelFormatKindSigned; // Commented out cases segfault. cudaError_t ret; ret = cudaMalloc3DArray(NULL, NULL, make_cudaExtent(0, 0, 0), 0); EXPECT_EQ(cudaErrorInvalidValue, ret); ret = cudaMalloc3DArray(NULL, NULL, make_cudaExtent(0, 0, 8), 0); EXPECT_EQ(cudaErrorInvalidValue, ret); ret = cudaMalloc3DArray(NULL, NULL, make_cudaExtent(0, 8, 0), 0); EXPECT_EQ(cudaErrorInvalidValue, ret); ret = cudaMalloc3DArray(NULL, NULL, make_cudaExtent(0, 8, 8), 0); EXPECT_EQ(cudaErrorInvalidValue, ret); // ret = cudaMalloc3DArray(NULL, NULL, make_cudaExtent(8, 0, 0), 0); // EXPECT_EQ(cudaErrorInvalidValue, ret); ret = cudaMalloc3DArray(NULL, NULL, make_cudaExtent(8, 0, 8), 0); EXPECT_EQ(cudaErrorInvalidValue, ret); // ret = cudaMalloc3DArray(NULL, NULL, make_cudaExtent(8, 8, 0), 0); // EXPECT_EQ(cudaErrorInvalidValue, ret); // ret = cudaMalloc3DArray(NULL, NULL, make_cudaExtent(8, 8, 8), 0); // EXPECT_EQ(cudaErrorInvalidValue, ret); ret = cudaMalloc3DArray(&ary, NULL, make_cudaExtent(0, 0, 0), 0); EXPECT_EQ(cudaSuccess, ret); ret = cudaFreeArray(ary); EXPECT_EQ(cudaSuccess, ret); ret = cudaMalloc3DArray(&ary, NULL, make_cudaExtent(0, 0, 8), 0); EXPECT_EQ(cudaSuccess, ret); ret = cudaFreeArray(ary); EXPECT_EQ(cudaSuccess, ret); ret = cudaMalloc3DArray(&ary, NULL, make_cudaExtent(0, 8, 0), 0); EXPECT_EQ(cudaSuccess, ret); ret = cudaFreeArray(ary); EXPECT_EQ(cudaSuccess, ret); ret = cudaMalloc3DArray(&ary, NULL, make_cudaExtent(0, 8, 8), 0); EXPECT_EQ(cudaSuccess, ret); ret = cudaFreeArray(ary); EXPECT_EQ(cudaSuccess, ret); // ret = cudaMalloc3DArray(&ary, NULL, make_cudaExtent(8, 0, 0), 0); // EXPECT_EQ(cudaErrorInvalidValue, ret); /** * There's no reason why this should pass... ret = cudaMalloc3DArray(&ary, NULL, make_cudaExtent(8, 0, 8), 0); EXPECT_EQ(cudaSuccess, ret); ret = cudaFreeArray(ary); EXPECT_EQ(cudaSuccess, ret); */ // ret = cudaMalloc3DArray(&ary, NULL, make_cudaExtent(8, 8, 0), 0); // EXPECT_EQ(cudaErrorInvalidValue, ret); // ret = cudaMalloc3DArray(&ary, NULL, make_cudaExtent(8, 8, 8), 0); // EXPECT_EQ(cudaErrorInvalidValue, ret); ret = cudaMalloc3DArray(NULL, &dsc, make_cudaExtent(0, 0, 0), 0); EXPECT_EQ(cudaErrorInvalidValue, ret); ret = cudaMalloc3DArray(NULL, &dsc, make_cudaExtent(0, 0, 8), 0); EXPECT_EQ(cudaErrorInvalidValue, ret); ret = cudaMalloc3DArray(NULL, &dsc, make_cudaExtent(0, 8, 0), 0); EXPECT_EQ(cudaErrorInvalidValue, ret); ret = cudaMalloc3DArray(NULL, &dsc, make_cudaExtent(0, 8, 8), 0); EXPECT_EQ(cudaErrorInvalidValue, ret); // ret = cudaMalloc3DArray(NULL, &dsc, make_cudaExtent(8, 0, 0), 0); // EXPECT_EQ(cudaErrorInvalidValue, ret); ret = cudaMalloc3DArray(NULL, &dsc, make_cudaExtent(8, 0, 8), 0); EXPECT_EQ(cudaErrorInvalidValue, ret); // ret = cudaMalloc3DArray(NULL, &dsc, make_cudaExtent(8, 8, 0), 0); // EXPECT_EQ(cudaErrorInvalidValue, ret); // ret = cudaMalloc3DArray(NULL, &dsc, make_cudaExtent(8, 8, 8), 0); // EXPECT_EQ(cudaErrorInvalidValue, ret); }
TEST(Malloc3DArray, Limits) { struct cudaArray * ary; struct cudaChannelFormatDesc dsc; dsc.x = dsc.y = dsc.z = dsc.w = 8; dsc.f = cudaChannelFormatKindSigned; cudaError_t ret; ret = cudaMalloc3DArray(&ary, &dsc, make_cudaExtent(0, 0, 0), 0); EXPECT_EQ(cudaSuccess, ret); if (ret == cudaSuccess) { EXPECT_EQ(cudaSuccess, cudaFreeArray(ary)); } int device; ret = cudaGetDevice(&device); ASSERT_EQ(cudaSuccess, ret); struct cudaDeviceProp prop; ret = cudaGetDeviceProperties(&prop, device); ASSERT_EQ(cudaSuccess, ret); /* Adapt to what's available by a safe margin */ size_t targetable = prop.totalGlobalMem / 8; if ((size_t) prop.maxTexture1D < targetable) { ret = cudaMalloc3DArray(&ary, &dsc, make_cudaExtent(prop.maxTexture1D, 0, 0), 0); EXPECT_EQ(cudaSuccess, ret); if (ret == cudaSuccess) { EXPECT_EQ(cudaSuccess, cudaFreeArray(ary)); } ret = cudaMalloc3DArray(&ary, &dsc, make_cudaExtent(prop.maxTexture1D + 1, 0, 0), 0); EXPECT_EQ(cudaErrorInvalidValue, ret); if (ret == cudaSuccess) { EXPECT_EQ(cudaSuccess, cudaFreeArray(ary)); } } if ((size_t) prop.maxTexture2D[0] < targetable) { ret = cudaMalloc3DArray(&ary, &dsc, make_cudaExtent(prop.maxTexture2D[0], 1, 0), 0); EXPECT_EQ(cudaSuccess, ret); if (ret == cudaSuccess) { EXPECT_EQ(cudaSuccess, cudaFreeArray(ary)); } ret = cudaMalloc3DArray(&ary, &dsc, make_cudaExtent(prop.maxTexture2D[0] + 1, 1, 0), 0); EXPECT_EQ(cudaErrorInvalidValue, ret); if (ret == cudaSuccess) { EXPECT_EQ(cudaSuccess, cudaFreeArray(ary)); } } if ((size_t) prop.maxTexture2D[1] < targetable) { ret = cudaMalloc3DArray(&ary, &dsc, make_cudaExtent(1, prop.maxTexture2D[1], 0), 0); EXPECT_EQ(cudaSuccess, ret); if (ret == cudaSuccess) { EXPECT_EQ(cudaSuccess, cudaFreeArray(ary)); } ret = cudaMalloc3DArray(&ary, &dsc, make_cudaExtent(1, prop.maxTexture2D[1] + 1, 0), 0); EXPECT_EQ(cudaErrorInvalidValue, ret); if (ret == cudaSuccess) { EXPECT_EQ(cudaSuccess, cudaFreeArray(ary)); } } if ((size_t) prop.maxTexture2D[0] * prop.maxTexture2D[1] < targetable) { ret = cudaMalloc3DArray(&ary, &dsc, make_cudaExtent(prop.maxTexture2D[0], prop.maxTexture2D[1], 0), 0); EXPECT_EQ(cudaSuccess, ret); if (ret == cudaSuccess) { EXPECT_EQ(cudaSuccess, cudaFreeArray(ary)); } ret = cudaMalloc3DArray(&ary, &dsc, make_cudaExtent(prop.maxTexture2D[0], prop.maxTexture2D[1] + 1, 0), 0); EXPECT_EQ(cudaErrorInvalidValue, ret); if (ret == cudaSuccess) { EXPECT_EQ(cudaSuccess, cudaFreeArray(ary)); } ret = cudaMalloc3DArray(&ary, &dsc, make_cudaExtent(prop.maxTexture2D[0] + 1, prop.maxTexture2D[1], 0), 0); EXPECT_EQ(cudaErrorInvalidValue, ret); if (ret == cudaSuccess) { EXPECT_EQ(cudaSuccess, cudaFreeArray(ary)); } ret = cudaMalloc3DArray(&ary, &dsc, make_cudaExtent(prop.maxTexture2D[0] + 1, prop.maxTexture2D[1] + 1, 0), 0); EXPECT_EQ(cudaErrorInvalidValue, ret); if (ret == cudaSuccess) { EXPECT_EQ(cudaSuccess, cudaFreeArray(ary)); } } else if ((size_t) prop.maxTexture2D[0] * prop.maxTexture2D[1] > prop.totalGlobalMem) { ret = cudaMalloc3DArray(&ary, &dsc, make_cudaExtent(prop.maxTexture2D[0], prop.maxTexture2D[1], 0), 0); EXPECT_EQ(cudaErrorMemoryAllocation, ret); } ret = cudaMalloc3DArray(&ary, &dsc, make_cudaExtent(1, 1, 1), 0); EXPECT_EQ(cudaSuccess, ret); if (ret == cudaSuccess) { EXPECT_EQ(cudaSuccess, cudaFreeArray(ary)); } ret = cudaMalloc3DArray(&ary, &dsc, make_cudaExtent(64, 64, 64), 0); EXPECT_EQ(cudaSuccess, ret); if (ret == cudaSuccess) { EXPECT_EQ(cudaSuccess, cudaFreeArray(ary)); } /* TODO: More 3D tests. */ }
void CudaImagePyramidHost::initialize(int width, int height, cudaTextureFilterMode filter_mode, int depth) { qDebug() << "pyramid host initializing with params: " << width << height << filter_mode << depth; if (isInitialized() && width == _baseWidth && height == _baseHeight && filter_mode == _filterMode) { return; } clear(); qDebug() << "Clear done."; _baseWidth = width; _baseHeight = height; _filterMode = filter_mode; _numLayers = depth; // Get the texture and its channel descriptor to allocate the storage. const textureReference* constTexRefPtr=NULL; cudaGetTextureReference(&constTexRefPtr, _texture_name); qDebug() << "Texture Ref got:" << _name; if (constTexRefPtr == 0) { qDebug() << "constTexRefPtr==0"; } checkCUDAError("Can't get tex ref for init TEXTURE_PYRAMID", _name); cudaChannelFormatDesc formatDesc = constTexRefPtr->channelDesc; if(_textureType == cudaTextureType2DLayered){ cudaDeviceProp prop; qDebug() << "to get CUDA device prop"; cudaGetDeviceProperties(&prop,0); qDebug() << "CUDA Device Prop got"; if(prop.maxTexture2DLayered[0] < _baseWidth || prop.maxTexture2DLayered[1] < _baseHeight || prop.maxTexture2DLayered[2] < _numLayers){ qDebug()<< "Max layered texture size:" << prop.maxTexture2DLayered[0] << " x " << prop.maxTexture2DLayered[1] << " x " << prop.maxTexture2DLayered[2]; assert(0); } cudaExtent extent = make_cudaExtent(_baseWidth, _baseHeight, _numLayers); cudaMalloc3DArray(&_storage, &formatDesc, extent, cudaArrayLayered); }else{ cudaMallocArray(&_storage, &formatDesc, _baseWidth, _baseHeight); } checkCUDAError("Failure to allocate", _name); qDebug() << "allocate done"; // Set texture parameters. // Evil hack to get around an apparent bug in the cuda api: // cudaGetTextureReference only returns a const reference, and // there is no way to set the parameters with a reference other // than cast it to non-const. textureReference* texRefPtr=NULL; texRefPtr = const_cast<textureReference*>( constTexRefPtr ); texRefPtr->addressMode[0] = cudaAddressModeClamp; texRefPtr->addressMode[1] = cudaAddressModeClamp; texRefPtr->filterMode = filter_mode; texRefPtr->normalized = false; // Use unnormalized (pixel) coordinates for addressing. This forbids texture mode wrap. bindTexture(); qDebug() << "texture binded"; bool found = false; for (size_t i = 0; i < _instances.size(); i++) { if (_instances[i] == this) found = true; } if (!found) { qDebug() << "Not found"; _instances.push_back(this); } qDebug() << "paramid host initialized."; }