CUresult CuModule::FindTexRef(const std::string& name, CuModule::TexBind** ppTexBind) { for(size_t i(0); i < _textures.size(); ++i) if(name == _textures[i].name) { *ppTexBind = &_textures[i]; return CUDA_SUCCESS; } CUtexref texRef; CUresult result = cuModuleGetTexRef(&texRef, _module, name.c_str()); HANDLE_RESULT(); TexBind texBind; texBind.texRef = texRef; texBind.name = name; memset(&texBind.sampler, -1, sizeof(CuTexSamplerAttr)); _textures.push_back(texBind); CuTexSamplerAttr sampler; sampler.addressX = CU_TR_ADDRESS_MODE_WRAP; sampler.addressY = CU_TR_ADDRESS_MODE_WRAP; sampler.addressZ = CU_TR_ADDRESS_MODE_WRAP; sampler.filter = CU_TR_FILTER_MODE_LINEAR; sampler.fmt = CU_AD_FORMAT_UNSIGNED_INT8; sampler.numPackedComponents = 4; sampler.normCoord = true; sampler.readAsInteger = false; SetSampler(&_textures.back(), sampler); *ppTexBind = &_textures.back(); return CUDA_SUCCESS; }
CUtexref CudaModule::getTexRef(const std::string& name) { CUtexref &texref = m_texrefHash[name]; if (0 == texref) { checkError("cuModuleGetTexRef", cuModuleGetTexRef( &texref, m_module, name.c_str())); } return texref; }
void tex_alloc(const char *name, device_memory& mem, bool interpolation, bool periodic) { /* determine format */ CUarray_format_enum format; size_t dsize = datatype_size(mem.data_type); size_t size = mem.memory_size(); switch(mem.data_type) { case TYPE_UCHAR: format = CU_AD_FORMAT_UNSIGNED_INT8; break; case TYPE_UINT: format = CU_AD_FORMAT_UNSIGNED_INT32; break; case TYPE_INT: format = CU_AD_FORMAT_SIGNED_INT32; break; case TYPE_FLOAT: format = CU_AD_FORMAT_FLOAT; break; default: assert(0); return; } CUtexref texref = NULL; cuda_push_context(); cuda_assert(cuModuleGetTexRef(&texref, cuModule, name)) if(!texref) { cuda_pop_context(); return; } if(interpolation) { CUarray handle = NULL; CUDA_ARRAY_DESCRIPTOR desc; desc.Width = mem.data_width; desc.Height = mem.data_height; desc.Format = format; desc.NumChannels = mem.data_elements; cuda_assert(cuArrayCreate(&handle, &desc)) if(!handle) { cuda_pop_context(); return; } if(mem.data_height > 1) { CUDA_MEMCPY2D param; memset(¶m, 0, sizeof(param)); param.dstMemoryType = CU_MEMORYTYPE_ARRAY; param.dstArray = handle; param.srcMemoryType = CU_MEMORYTYPE_HOST; param.srcHost = (void*)mem.data_pointer; param.srcPitch = mem.data_width*dsize*mem.data_elements; param.WidthInBytes = param.srcPitch; param.Height = mem.data_height; cuda_assert(cuMemcpy2D(¶m)) } else
void swanBindToTexture1DEx( const char *modname, const char *texname, size_t width, void *ptr, size_t typesize, int flags ) { CUresult err; CUtexref cu_texref; int mode, channels; // get the module CUmodule mod = swanGetModule( modname ); // get the texture err = cuModuleGetTexRef(&cu_texref, mod, texname ); if( err != CUDA_SUCCESS) { error( "swanBindToTexture1D failed -- texture not found" ); } // now bind err = cuTexRefSetAddress( NULL, cu_texref, PTR_TO_CUDEVPTR(ptr), width * typesize ); if( err != CUDA_SUCCESS) { printf("EEERRR = %d\n", err ); error( "swanBindToTexture1D failed -- bind failed" ); } // does not work for linear memory /* if( (flags & TEXTURE_INTERPOLATE) == TEXTURE_INTERPOLATE ) { err = cuTexRefSetFilterMode( cu_texref, CU_TR_FILTER_MODE_LINEAR ); } else { err = cuTexRefSetFilterMode( cu_texref, CU_TR_FILTER_MODE_POINT ); } if( err != CUDA_SUCCESS) { error( "swanBindToTexture1D failed -- setfiltermode failed" ); } */ mode = flags & TEXTURE_TYPE_MASK; channels = typesize / sizeof(float); switch( mode ) { case TEXTURE_FLOAT: err = cuTexRefSetFormat( cu_texref, CU_AD_FORMAT_FLOAT, channels ); break; case TEXTURE_INT: err = cuTexRefSetFormat( cu_texref, CU_AD_FORMAT_SIGNED_INT32, channels ); break; case TEXTURE_UINT: err = cuTexRefSetFormat( cu_texref, CU_AD_FORMAT_UNSIGNED_INT32, channels ); break; default: error( "swanBinToTexture1D failed -- invalid format" ); } if( err != CUDA_SUCCESS) { error( "swanBinToTexture1D failed -- setformat failed" ); } return; }
SEXP R_auto_cuModuleGetTexRef(SEXP r_hmod, SEXP r_name) { SEXP r_ans = R_NilValue; CUtexref pTexRef; CUmodule hmod = (CUmodule) getRReference(r_hmod); const char * name = CHAR(STRING_ELT(r_name, 0)); CUresult ans; ans = cuModuleGetTexRef(& pTexRef, hmod, name); if(ans) return(R_cudaErrorInfo(ans)); r_ans = R_createRef(pTexRef, "CUtexref") ; return(r_ans); }
sparseStatus_t sparseEngine_d::LoadKernel(sparsePrec_t prec, sparseEngine_d::Kernel** ppKernel) { // First attempt to load the finalize module if it is not yet loaded. CUresult result = CUDA_SUCCESS; // Check if the requested kernel is available, and if not, load it. int p = (int)prec; if(!multiply[p].get()) { std::auto_ptr<Kernel> k(new Kernel); std::string filename = kernelPath + "spmxv_" + PrecNames[p] + ".cubin"; result = context->LoadModuleFilename(filename, &k->module); if(CUDA_SUCCESS != result) return SPARSE_STATUS_KERNEL_NOT_FOUND; // Load the five SpMxV kernels for different valuesPerThread counts. for(int i(0); i < NumVT; ++i) { std::ostringstream oss; oss<< "SpMxV_"<< ValuesPerThread[i]; result = k->module->GetFunction(oss.str(), make_int3(BlockSize, 1,1), &k->func[i]); if(CUDA_SUCCESS != result) return SPARSE_STATUS_KERNEL_ERROR; } // Load the finalize function. result = k->module->GetFunction("Finalize", make_int3(BlockSize, 1, 1), &k->finalize); if(CUDA_SUCCESS != result) return SPARSE_STATUS_KERNEL_ERROR; // Cache the texture reference result = cuModuleGetTexRef(&k->xVec_texture, k->module->Handle(), "xVec_texture"); if(CUDA_SUCCESS != result) return SPARSE_STATUS_KERNEL_ERROR; result = cuTexRefSetFlags(k->xVec_texture, CU_TRSF_READ_AS_INTEGER); if(CUDA_SUCCESS != result) return SPARSE_STATUS_KERNEL_ERROR; result = cuTexRefSetFormat(k->xVec_texture, PrecTerms[p].vecFormat, PrecTerms[p].vecChannels); if(CUDA_SUCCESS != result) return SPARSE_STATUS_KERNEL_ERROR; multiply[p] = k; } *ppKernel = multiply[p].get(); return SPARSE_STATUS_SUCCESS; }
void CudaModuleScene::initCudaObj(ApexCudaTexRef& texRef) { const char* texRefName = texRef.getName(); for (int j = 0 ; j < numRegisteredTextures ; j++) { if (nvidia::strcmp(textureTable[j].texRefName, texRefName) == 0) { ApexCudaModule* cudaModule = getCudaModule(textureTable[j].modIndex); PX_ASSERT(cudaModule->isValid()); CUtexref cuTexRef; CUT_SAFE_CALL(cuModuleGetTexRef(&cuTexRef, cudaModule->getCuModule(), texRefName)); const struct textureReference* texRefData = textureTable[j].texRefData; PX_ASSERT(texRefData->channelDesc.x > 0); int numChannels = 1; if (texRefData->channelDesc.y > 0) { PX_ASSERT(texRefData->channelDesc.y == texRefData->channelDesc.x); ++numChannels; } if (texRefData->channelDesc.z > 0) { PX_ASSERT(texRefData->channelDesc.z == texRefData->channelDesc.x); ++numChannels; } if (texRefData->channelDesc.w > 0) { PX_ASSERT(texRefData->channelDesc.w == texRefData->channelDesc.x); ++numChannels; } CUarray_format cuFormat = CUarray_format(0); switch (texRefData->channelDesc.f) { case cudaChannelFormatKindSigned: switch (texRefData->channelDesc.x) { case 8: cuFormat = CU_AD_FORMAT_SIGNED_INT8; break; case 16: cuFormat = CU_AD_FORMAT_SIGNED_INT16; break; case 32: cuFormat = CU_AD_FORMAT_SIGNED_INT32; break; } break; case cudaChannelFormatKindUnsigned: switch (texRefData->channelDesc.x) { case 8: cuFormat = CU_AD_FORMAT_UNSIGNED_INT8; break; case 16: cuFormat = CU_AD_FORMAT_UNSIGNED_INT16; break; case 32: cuFormat = CU_AD_FORMAT_UNSIGNED_INT32; break; } break; case cudaChannelFormatKindFloat: cuFormat = CU_AD_FORMAT_FLOAT; break; default: PX_ASSERT(0); }; PX_ASSERT(cuFormat != 0); int cuFlags = 0; if (textureTable[j].read_normalized_float == 0) { cuFlags |= CU_TRSF_READ_AS_INTEGER; } if (textureTable[j].texRefData->normalized != 0) { cuFlags |= CU_TRSF_NORMALIZED_COORDINATES; } texRef.init(this, cuTexRef, cudaModule, cuFormat, numChannels, textureTable[j].dim, cuFlags); break; } } }
void swanMakeTexture1DEx( const char *modname, const char *texname, size_t width, void *ptr, size_t typesize, int flags ) { int err; // get the texture CUtexref cu_texref; int mode, channels; CUarray array; CUDA_MEMCPY2D copyParam; CUDA_ARRAY_DESCRIPTOR p; // get the module CUmodule mod = swanGetModule( modname ); err = cuModuleGetTexRef(&cu_texref, mod, texname ); if( err != CUDA_SUCCESS) { error( "swanMakeTexture1D failed -- texture not found" ); } p.Width = width; p.Height= 1; mode = flags & TEXTURE_TYPE_MASK; channels = typesize / sizeof(float); switch( mode ) { case TEXTURE_FLOAT: p.Format = CU_AD_FORMAT_FLOAT; p.NumChannels = channels; break; case TEXTURE_INT: p.Format = CU_AD_FORMAT_SIGNED_INT32; p.NumChannels = channels; break; case TEXTURE_UINT: p.Format = CU_AD_FORMAT_UNSIGNED_INT32; p.NumChannels = channels; break; default: error( "swanMakeTexture1D failed -- invalid format" ); } err = cuArrayCreate( &array , &p); if( err != CUDA_SUCCESS) { error( "swanMakeTexture1D failed -- array create failed" ); } memset(©Param, 0, sizeof(copyParam)); copyParam.dstMemoryType = CU_MEMORYTYPE_ARRAY; copyParam.dstArray = array; copyParam.srcMemoryType = CU_MEMORYTYPE_HOST; copyParam.srcHost = ptr; copyParam.srcPitch = width * sizeof(float); copyParam.WidthInBytes = copyParam.srcPitch; copyParam.Height = 1; // err = cuMemcpy2D(©Param); err = cuMemcpyHtoA( array, 0, ptr, typesize * width ); if( err != CUDA_SUCCESS) { error( "swanMakeTexture1D failed -- memcpy failed" ); } err = cuTexRefSetArray ( cu_texref, array, CU_TRSA_OVERRIDE_FORMAT ); if( err != CUDA_SUCCESS) { error( "swanMakeTexture1D failed -- setarray failed" ); } if( (flags & TEXTURE_INTERPOLATE) == TEXTURE_INTERPOLATE ) { err = cuTexRefSetFilterMode( cu_texref, CU_TR_FILTER_MODE_LINEAR ); } else { err = cuTexRefSetFilterMode( cu_texref, CU_TR_FILTER_MODE_POINT ); } if( err != CUDA_SUCCESS) { error( "swanBindToTexture1D failed -- setfiltermode failed" ); } if( (flags & TEXTURE_NORMALISE ) == TEXTURE_NORMALISE ) { err = cuTexRefSetFlags(cu_texref, CU_TRSF_NORMALIZED_COORDINATES); err |= cuTexRefSetAddressMode(cu_texref, 0, CU_TR_ADDRESS_MODE_CLAMP); err |= cuTexRefSetAddressMode(cu_texref, 1, CU_TR_ADDRESS_MODE_CLAMP); if( err != CUDA_SUCCESS) { error( "swanBindToTexture1D failed -- setflags 1 failed" ); } } err = cuTexRefSetFormat( cu_texref, CU_AD_FORMAT_FLOAT, channels ); if( err != CUDA_SUCCESS) { error( "swanBindToTexture1D failed -- setformat failed" ); } //printf("TEX BIND DONE\n"); }
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) { CUresult cudastatus = CUDA_SUCCESS; if (nrhs != 2) mexErrMsgTxt("Wrong number of arguments"); if (init == 0) { // Initialize function //mexLock(); // load GPUmat gm = gmGetGPUmat(); // load module CUmodule *drvmod = gmGetModule("examples_texture"); // load float GPU function CUresult status = cuModuleGetFunction(&drvfunf, *drvmod, "LININTERF"); if (CUDA_SUCCESS != status) { mexErrMsgTxt("Unable to load user function."); } // load double GPU function status = cuModuleGetFunction(&drvfund, *drvmod, "LININTERD"); if (CUDA_SUCCESS != status) { mexErrMsgTxt("Unable to load user function."); } // load textures defined in module status = cuModuleGetTexRef(&texf, *drvmod, "texref_f1_a"); if (CUDA_SUCCESS != status) { mexErrMsgTxt("Unable to load texture."); } status = cuModuleGetTexRef(&texd, *drvmod, "texref_d1_a"); if (CUDA_SUCCESS != status) { mexErrMsgTxt("Unable to load texture."); } // no complex function support init = 1; } // mex parameters are: // 1. IN1. Input array // 2. IN2. Input indexes array //IN1 is the input GPU array GPUtype IN1 = gm->gputype.getGPUtype(prhs[0]); //IN2 is the input GPU array GPUtype IN2 = gm->gputype.getGPUtype(prhs[1]); //OUT is the output GPU array (result) // Create of the same size of IN1 gpuTYPE_t in1_t = gm->gputype.getType(IN1); int in1_d = gm->gputype.getNdims(IN1); const int * in1_s = gm->gputype.getSize(IN1); int in1_n = gm->gputype.getNumel(IN1); int in1_b = gm->gputype.getDataSize(IN1); gpuTYPE_t in2_t = gm->gputype.getType(IN2); int in2_d = gm->gputype.getNdims(IN2); const int * in2_s = gm->gputype.getSize(IN2); int in2_n = gm->gputype.getNumel(IN2); if ((in1_t==gpuCFLOAT) || (in1_t==gpuCDOUBLE)) { mexErrMsgTxt("Complex TYPE not supported"); } if (in1_t != in2_t) { mexErrMsgTxt("Input arguments must be of the same type"); } if (in1_n != in2_n) { mexErrMsgTxt("Input arguments must have the same number of elements"); } //OUT is the output GPU array (result) // Create of the same size of IN1 GPUtype OUT = gm->gputype.create(in1_t, in1_d, in1_s, NULL); // I need the pointers to GPU memory CUdeviceptr d_IN1 = (CUdeviceptr) (UINTPTR gm->gputype.getGPUptr(IN1)); CUdeviceptr d_IN2 = (CUdeviceptr) (UINTPTR gm->gputype.getGPUptr(IN2)); CUdeviceptr d_OUT = (CUdeviceptr) (UINTPTR gm->gputype.getGPUptr(OUT)); // The GPU kernel depends on the type of input/output CUfunction drvfun; CUtexref drvtex; CUarray_format_enum drvtexformat; int drvtexnum; if (in1_t == gpuFLOAT) { drvfun = drvfunf; drvtex = texf; drvtexformat = CU_AD_FORMAT_FLOAT; drvtexnum = 1; } else if (in1_t == gpuDOUBLE) { drvfun = drvfund; drvtex = texd; drvtexformat = CU_AD_FORMAT_SIGNED_INT32; drvtexnum = 2; } if (CUDA_SUCCESS != cuTexRefSetFormat(drvtex, drvtexformat, drvtexnum)) { mexErrMsgTxt("Execution error (texture)."); } if (CUDA_SUCCESS != cuTexRefSetAddress(NULL, drvtex, UINTPTR d_IN1, in1_n*in1_b)) { mexErrMsgTxt("Execution error (texture)."); } if (CUDA_SUCCESS != cuParamSetTexRef(drvfun, CU_PARAM_TR_DEFAULT, drvtex)) { mexErrMsgTxt("Execution error (texture1)."); } hostdrv_pars_t gpuprhs[2]; int gpunrhs = 2; gpuprhs[0] = hostdrv_pars(&d_IN2,sizeof(d_IN2),__alignof(d_IN2)); gpuprhs[1] = hostdrv_pars(&d_OUT,sizeof(d_OUT),__alignof(d_OUT)); int N = in1_n; hostGPUDRV(drvfun, N, gpunrhs, gpuprhs); // return result plhs[0] = gm->gputype.createMxArray(OUT); }
static av_cold int cudascale_config_props(AVFilterLink *outlink) { AVFilterContext *ctx = outlink->src; AVFilterLink *inlink = outlink->src->inputs[0]; CUDAScaleContext *s = ctx->priv; AVHWFramesContext *frames_ctx = (AVHWFramesContext*)inlink->hw_frames_ctx->data; AVCUDADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx; CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx; int w, h; int ret; extern char vf_scale_cuda_ptx[]; ret = CHECK_CU(cuCtxPushCurrent(cuda_ctx)); if (ret < 0) goto fail; ret = CHECK_CU(cuModuleLoadData(&s->cu_module, vf_scale_cuda_ptx)); if (ret < 0) goto fail; CHECK_CU(cuModuleGetFunction(&s->cu_func_uchar, s->cu_module, "Subsample_Bilinear_uchar")); CHECK_CU(cuModuleGetFunction(&s->cu_func_uchar2, s->cu_module, "Subsample_Bilinear_uchar2")); CHECK_CU(cuModuleGetFunction(&s->cu_func_uchar4, s->cu_module, "Subsample_Bilinear_uchar4")); CHECK_CU(cuModuleGetFunction(&s->cu_func_ushort, s->cu_module, "Subsample_Bilinear_ushort")); CHECK_CU(cuModuleGetFunction(&s->cu_func_ushort2, s->cu_module, "Subsample_Bilinear_ushort2")); CHECK_CU(cuModuleGetFunction(&s->cu_func_ushort4, s->cu_module, "Subsample_Bilinear_ushort4")); CHECK_CU(cuModuleGetTexRef(&s->cu_tex_uchar, s->cu_module, "uchar_tex")); CHECK_CU(cuModuleGetTexRef(&s->cu_tex_uchar2, s->cu_module, "uchar2_tex")); CHECK_CU(cuModuleGetTexRef(&s->cu_tex_uchar4, s->cu_module, "uchar4_tex")); CHECK_CU(cuModuleGetTexRef(&s->cu_tex_ushort, s->cu_module, "ushort_tex")); CHECK_CU(cuModuleGetTexRef(&s->cu_tex_ushort2, s->cu_module, "ushort2_tex")); CHECK_CU(cuModuleGetTexRef(&s->cu_tex_ushort4, s->cu_module, "ushort4_tex")); CHECK_CU(cuTexRefSetFlags(s->cu_tex_uchar, CU_TRSF_READ_AS_INTEGER)); CHECK_CU(cuTexRefSetFlags(s->cu_tex_uchar2, CU_TRSF_READ_AS_INTEGER)); CHECK_CU(cuTexRefSetFlags(s->cu_tex_uchar4, CU_TRSF_READ_AS_INTEGER)); CHECK_CU(cuTexRefSetFlags(s->cu_tex_ushort, CU_TRSF_READ_AS_INTEGER)); CHECK_CU(cuTexRefSetFlags(s->cu_tex_ushort2, CU_TRSF_READ_AS_INTEGER)); CHECK_CU(cuTexRefSetFlags(s->cu_tex_ushort4, CU_TRSF_READ_AS_INTEGER)); CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_uchar, CU_TR_FILTER_MODE_LINEAR)); CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_uchar2, CU_TR_FILTER_MODE_LINEAR)); CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_uchar4, CU_TR_FILTER_MODE_LINEAR)); CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_ushort, CU_TR_FILTER_MODE_LINEAR)); CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_ushort2, CU_TR_FILTER_MODE_LINEAR)); CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_ushort4, CU_TR_FILTER_MODE_LINEAR)); CHECK_CU(cuCtxPopCurrent(&dummy)); if ((ret = ff_scale_eval_dimensions(s, s->w_expr, s->h_expr, inlink, outlink, &w, &h)) < 0) goto fail; if (((int64_t)h * inlink->w) > INT_MAX || ((int64_t)w * inlink->h) > INT_MAX) av_log(ctx, AV_LOG_ERROR, "Rescaled value for width or height is too big.\n"); outlink->w = w; outlink->h = h; ret = init_processing_chain(ctx, inlink->w, inlink->h, w, h); if (ret < 0) return ret; av_log(ctx, AV_LOG_VERBOSE, "w:%d h:%d -> w:%d h:%d\n", inlink->w, inlink->h, outlink->w, outlink->h); if (inlink->sample_aspect_ratio.num) { outlink->sample_aspect_ratio = av_mul_q((AVRational){outlink->h*inlink->w, outlink->w*inlink->h}, inlink->sample_aspect_ratio); } else { outlink->sample_aspect_ratio = inlink->sample_aspect_ratio; } return 0; fail: return ret; }