void tex_alloc(const char *name, device_memory& mem, bool interpolation, bool periodic) { /* determine format */ CUarray_format_enum format; size_t dsize = datatype_size(mem.data_type); size_t size = mem.memory_size(); switch(mem.data_type) { case TYPE_UCHAR: format = CU_AD_FORMAT_UNSIGNED_INT8; break; case TYPE_UINT: format = CU_AD_FORMAT_UNSIGNED_INT32; break; case TYPE_INT: format = CU_AD_FORMAT_SIGNED_INT32; break; case TYPE_FLOAT: format = CU_AD_FORMAT_FLOAT; break; default: assert(0); return; } CUtexref texref = NULL; cuda_push_context(); cuda_assert(cuModuleGetTexRef(&texref, cuModule, name)) if(!texref) { cuda_pop_context(); return; } if(interpolation) { CUarray handle = NULL; CUDA_ARRAY_DESCRIPTOR desc; desc.Width = mem.data_width; desc.Height = mem.data_height; desc.Format = format; desc.NumChannels = mem.data_elements; cuda_assert(cuArrayCreate(&handle, &desc)) if(!handle) { cuda_pop_context(); return; } if(mem.data_height > 1) { CUDA_MEMCPY2D param; memset(¶m, 0, sizeof(param)); param.dstMemoryType = CU_MEMORYTYPE_ARRAY; param.dstArray = handle; param.srcMemoryType = CU_MEMORYTYPE_HOST; param.srcHost = (void*)mem.data_pointer; param.srcPitch = mem.data_width*dsize*mem.data_elements; param.WidthInBytes = param.srcPitch; param.Height = mem.data_height; cuda_assert(cuMemcpy2D(¶m)) } else
CUresult CuContext::CreateTex2D(int width, int height, CUarray_format format, int numChannels, TexturePtr *ppTexture) { TexturePtr texture(new CuTexture); texture->_context = this; texture->_width = width; texture->_height = height; texture->_depth = 0; texture->_dim = 2; texture->_format = format; texture->_numChannels = numChannels; CUDA_ARRAY_DESCRIPTOR ad; ad.Width = width; ad.Height = height; ad.Format = format; ad.NumChannels = numChannels; CUresult result = cuArrayCreate(&texture->_texture, &ad); HANDLE_RESULT(); ppTexture->swap(texture); return CUDA_SUCCESS; }
void swanMakeTexture1DEx( const char *modname, const char *texname, size_t width, void *ptr, size_t typesize, int flags ) { int err; // get the texture CUtexref cu_texref; int mode, channels; CUarray array; CUDA_MEMCPY2D copyParam; CUDA_ARRAY_DESCRIPTOR p; // get the module CUmodule mod = swanGetModule( modname ); err = cuModuleGetTexRef(&cu_texref, mod, texname ); if( err != CUDA_SUCCESS) { error( "swanMakeTexture1D failed -- texture not found" ); } p.Width = width; p.Height= 1; mode = flags & TEXTURE_TYPE_MASK; channels = typesize / sizeof(float); switch( mode ) { case TEXTURE_FLOAT: p.Format = CU_AD_FORMAT_FLOAT; p.NumChannels = channels; break; case TEXTURE_INT: p.Format = CU_AD_FORMAT_SIGNED_INT32; p.NumChannels = channels; break; case TEXTURE_UINT: p.Format = CU_AD_FORMAT_UNSIGNED_INT32; p.NumChannels = channels; break; default: error( "swanMakeTexture1D failed -- invalid format" ); } err = cuArrayCreate( &array , &p); if( err != CUDA_SUCCESS) { error( "swanMakeTexture1D failed -- array create failed" ); } memset(©Param, 0, sizeof(copyParam)); copyParam.dstMemoryType = CU_MEMORYTYPE_ARRAY; copyParam.dstArray = array; copyParam.srcMemoryType = CU_MEMORYTYPE_HOST; copyParam.srcHost = ptr; copyParam.srcPitch = width * sizeof(float); copyParam.WidthInBytes = copyParam.srcPitch; copyParam.Height = 1; // err = cuMemcpy2D(©Param); err = cuMemcpyHtoA( array, 0, ptr, typesize * width ); if( err != CUDA_SUCCESS) { error( "swanMakeTexture1D failed -- memcpy failed" ); } err = cuTexRefSetArray ( cu_texref, array, CU_TRSA_OVERRIDE_FORMAT ); if( err != CUDA_SUCCESS) { error( "swanMakeTexture1D failed -- setarray failed" ); } if( (flags & TEXTURE_INTERPOLATE) == TEXTURE_INTERPOLATE ) { err = cuTexRefSetFilterMode( cu_texref, CU_TR_FILTER_MODE_LINEAR ); } else { err = cuTexRefSetFilterMode( cu_texref, CU_TR_FILTER_MODE_POINT ); } if( err != CUDA_SUCCESS) { error( "swanBindToTexture1D failed -- setfiltermode failed" ); } if( (flags & TEXTURE_NORMALISE ) == TEXTURE_NORMALISE ) { err = cuTexRefSetFlags(cu_texref, CU_TRSF_NORMALIZED_COORDINATES); err |= cuTexRefSetAddressMode(cu_texref, 0, CU_TR_ADDRESS_MODE_CLAMP); err |= cuTexRefSetAddressMode(cu_texref, 1, CU_TR_ADDRESS_MODE_CLAMP); if( err != CUDA_SUCCESS) { error( "swanBindToTexture1D failed -- setflags 1 failed" ); } } err = cuTexRefSetFormat( cu_texref, CU_AD_FORMAT_FLOAT, channels ); if( err != CUDA_SUCCESS) { error( "swanBindToTexture1D failed -- setformat failed" ); } //printf("TEX BIND DONE\n"); }
bool ResourceCUDA::updateResourceFromCUDA(CUstream streamID) { CUresult res; res = cuGraphicsMapResources( 1, &m_cudaResource, streamID ); if(res) { LOGE("Error>> CUDA failed map some target resources\n"); return false; } // // Walk through output resources and perform the copies // CUarray cuArray; //# define DBGDUMMYCOPY # ifdef DBGDUMMYCOPY // interop has issues... let's compare with a copy to a basic cuda array int www = m_xByteSz/4; CUDA_ARRAY_DESCRIPTOR descr = { www,//unsigned int Width; m_creationData.sz[1],//unsigned int Height; CU_AD_FORMAT_UNSIGNED_INT8,//CUarray_format Format; 4//unsigned int NumChannels; }; res = cuArrayCreate(&cuArray, &descr); # else res = cuGraphicsSubResourceGetMappedArray( &cuArray, m_cudaResource, 0/*arrayIndex*/, 0/*mipLevel*/); # endif if(res) { res = cuGraphicsUnmapResources( 1, &m_cudaResource, streamID ); return false; } CUDA_MEMCPY2D copyInfo = { 0, ///< Source X in bytes 0, ///< Source Y CU_MEMORYTYPE_DEVICE,//< Source memory type (host, device, array) NULL, ///< Source host pointer m_dptr, ///< Source device pointer NULL, ///< Source array reference m_pitch, ///< Source pitch (ignored when src is array) 0, ///< Destination X in bytes 0, ///< Destination Y CU_MEMORYTYPE_ARRAY,///< Destination memory type (host, device, array) NULL, ///< Destination host pointer NULL, ///< Destination device pointer cuArray, ///< Destination array reference 0, ///< Destination pitch (ignored when dst is array) m_xByteSz, ///< Width of 2D memory copy in bytes m_creationData.sz[1] ///< Height of 2D memory copy }; //LOGI("cuMemcpy2D(): CU_MEMORYTYPE_DEVICE source=%x pitch=%d CU_MEMORYTYPE_ARRAY=%x widthBytes=%d height=%d\n",m_dptr, m_pitch, cuArray, m_xByteSz, m_creationData.sz[1]); res = cuMemcpy2D( ©Info ); if(res) { LOGE("Error>> CUDA failed to copy linear memory to texture (array memory)\n"); res = cuGraphicsUnmapResources( 1, &m_cudaResource, streamID ); return false; } # ifdef DBGDUMMYCOPY res = cuArrayDestroy(cuArray); # endif res = cuGraphicsUnmapResources( 1, &m_cudaResource, streamID ); if(res) { LOGE("Error>> CUDA failed unmap the resource for output result of the kernel\n"); return false; } return true; }
memory_v* device_t<CUDA>::talloc(const int dim, const occa::dim &dims, void *source, occa::formatType type, const int permissions){ OCCA_EXTRACT_DATA(CUDA, Device); memory_v *mem = new memory_t<CUDA>; mem->dev = dev; mem->handle = new CUDATextureData_t; mem->size = ((dim == 1) ? dims.x : (dims.x * dims.y)) * type.bytes(); mem->isTexture = true; mem->textureInfo.dim = dim; mem->textureInfo.w = dims.x; mem->textureInfo.h = dims.y; mem->textureInfo.d = dims.z; mem->textureInfo.bytesInEntry = type.bytes(); CUarray &array = ((CUDATextureData_t*) mem->handle)->array; CUsurfObject &surface = ((CUDATextureData_t*) mem->handle)->surface; CUDA_ARRAY_DESCRIPTOR arrayDesc; CUDA_RESOURCE_DESC surfDesc; memset(&arrayDesc, 0, sizeof(arrayDesc)); memset(&surfDesc , 0, sizeof(surfDesc)); arrayDesc.Width = dims.x; arrayDesc.Height = (dim == 1) ? 0 : dims.y; arrayDesc.Format = *((CUarray_format*) type.format<CUDA>()); arrayDesc.NumChannels = type.count(); OCCA_CUDA_CHECK("Device: Creating Array", cuArrayCreate(&array, (CUDA_ARRAY_DESCRIPTOR*) &arrayDesc) ); surfDesc.res.array.hArray = array; surfDesc.resType = CU_RESOURCE_TYPE_ARRAY; OCCA_CUDA_CHECK("Device: Creating Surface Object", cuSurfObjectCreate(&surface, &surfDesc) ); mem->textureInfo.arg = new int; *((int*) mem->textureInfo.arg) = CUDA_ADDRESS_CLAMP; mem->copyFrom(source); /* if(dims == 3){ CUDA_ARRAY3D_DESCRIPTOR arrayDesc; memset(&arrayDesc, 0, sizeof(arrayDesc); arrayDesc.Width = size.x; arrayDesc.Height = size.y; arrayDesc.Depth = size.z; arrayDesc.Format = type.format<CUDA>(); arrayDesc.NumChannels = type.count(); cuArray3DCreate(&arr, (CUDA_ARRAY3D_DESCRIPTOR*) &arrayDesc); } */ return mem; }