void CuModule::SetSampler(CuModule::TexBind* texBind, const CuTexSamplerAttr& sampler) { SetFormat(texBind, sampler.fmt, sampler.numPackedComponents); if(texBind->sampler.addressX != sampler.addressX) { cuTexRefSetAddressMode(texBind->texRef, 0, texBind->sampler.addressX); texBind->sampler.addressX = sampler.addressX; } if(texBind->sampler.addressY != sampler.addressY) { cuTexRefSetAddressMode(texBind->texRef, 1, texBind->sampler.addressY); texBind->sampler.addressY = sampler.addressY; } if(texBind->sampler.addressZ != sampler.addressZ) { cuTexRefSetAddressMode(texBind->texRef, 2, texBind->sampler.addressZ); texBind->sampler.addressZ = sampler.addressZ; } if(texBind->sampler.filter != sampler.filter) { cuTexRefSetFilterMode(texBind->texRef, sampler.filter); texBind->sampler.filter = sampler.filter; } if((texBind->sampler.readAsInteger != sampler.readAsInteger) || (texBind->sampler.normCoord != sampler.normCoord)) { uint flags = (sampler.readAsInteger ? CU_TRSF_READ_AS_INTEGER : 0) | (sampler.normCoord ? CU_TRSF_NORMALIZED_COORDINATES : 0); texBind->sampler.readAsInteger = sampler.readAsInteger; texBind->sampler.normCoord = sampler.normCoord; cuTexRefSetFlags(texBind->texRef, flags); } }
void CudaModule::setTexRef( const std::string& name, CUarray cudaArray, bool wrap, bool bilinear, bool normalizedCoords, bool readAsInt) { U32 flags = 0; if (normalizedCoords) { flags |= CU_TRSF_NORMALIZED_COORDINATES; } if (readAsInt) { flags |= CU_TRSF_READ_AS_INTEGER; } CUaddress_mode addressMode; CUfilter_mode filterMode; addressMode = (wrap) ? CU_TR_ADDRESS_MODE_WRAP : CU_TR_ADDRESS_MODE_CLAMP; filterMode = (bilinear) ? CU_TR_FILTER_MODE_LINEAR : CU_TR_FILTER_MODE_POINT; CUtexref texRef = getTexRef(name); for (int dim=0; dim<3; ++dim) { checkError( "cuTexRefSetAddressMode", cuTexRefSetAddressMode(texRef, dim, addressMode)); } checkError("cuTexRefSetFilterMode", cuTexRefSetFilterMode(texRef, filterMode)); checkError("cuTexRefSetFlags", cuTexRefSetFlags(texRef, flags)); checkError("cuTexRefSetArray", cuTexRefSetArray(texRef, cudaArray, CU_TRSA_OVERRIDE_FORMAT)); }
sparseStatus_t sparseEngine_d::LoadKernel(sparsePrec_t prec, sparseEngine_d::Kernel** ppKernel) { // First attempt to load the finalize module if it is not yet loaded. CUresult result = CUDA_SUCCESS; // Check if the requested kernel is available, and if not, load it. int p = (int)prec; if(!multiply[p].get()) { std::auto_ptr<Kernel> k(new Kernel); std::string filename = kernelPath + "spmxv_" + PrecNames[p] + ".cubin"; result = context->LoadModuleFilename(filename, &k->module); if(CUDA_SUCCESS != result) return SPARSE_STATUS_KERNEL_NOT_FOUND; // Load the five SpMxV kernels for different valuesPerThread counts. for(int i(0); i < NumVT; ++i) { std::ostringstream oss; oss<< "SpMxV_"<< ValuesPerThread[i]; result = k->module->GetFunction(oss.str(), make_int3(BlockSize, 1,1), &k->func[i]); if(CUDA_SUCCESS != result) return SPARSE_STATUS_KERNEL_ERROR; } // Load the finalize function. result = k->module->GetFunction("Finalize", make_int3(BlockSize, 1, 1), &k->finalize); if(CUDA_SUCCESS != result) return SPARSE_STATUS_KERNEL_ERROR; // Cache the texture reference result = cuModuleGetTexRef(&k->xVec_texture, k->module->Handle(), "xVec_texture"); if(CUDA_SUCCESS != result) return SPARSE_STATUS_KERNEL_ERROR; result = cuTexRefSetFlags(k->xVec_texture, CU_TRSF_READ_AS_INTEGER); if(CUDA_SUCCESS != result) return SPARSE_STATUS_KERNEL_ERROR; result = cuTexRefSetFormat(k->xVec_texture, PrecTerms[p].vecFormat, PrecTerms[p].vecChannels); if(CUDA_SUCCESS != result) return SPARSE_STATUS_KERNEL_ERROR; multiply[p] = k; } *ppKernel = multiply[p].get(); return SPARSE_STATUS_SUCCESS; }
void swanMakeTexture1DEx( const char *modname, const char *texname, size_t width, void *ptr, size_t typesize, int flags ) { int err; // get the texture CUtexref cu_texref; int mode, channels; CUarray array; CUDA_MEMCPY2D copyParam; CUDA_ARRAY_DESCRIPTOR p; // get the module CUmodule mod = swanGetModule( modname ); err = cuModuleGetTexRef(&cu_texref, mod, texname ); if( err != CUDA_SUCCESS) { error( "swanMakeTexture1D failed -- texture not found" ); } p.Width = width; p.Height= 1; mode = flags & TEXTURE_TYPE_MASK; channels = typesize / sizeof(float); switch( mode ) { case TEXTURE_FLOAT: p.Format = CU_AD_FORMAT_FLOAT; p.NumChannels = channels; break; case TEXTURE_INT: p.Format = CU_AD_FORMAT_SIGNED_INT32; p.NumChannels = channels; break; case TEXTURE_UINT: p.Format = CU_AD_FORMAT_UNSIGNED_INT32; p.NumChannels = channels; break; default: error( "swanMakeTexture1D failed -- invalid format" ); } err = cuArrayCreate( &array , &p); if( err != CUDA_SUCCESS) { error( "swanMakeTexture1D failed -- array create failed" ); } memset(©Param, 0, sizeof(copyParam)); copyParam.dstMemoryType = CU_MEMORYTYPE_ARRAY; copyParam.dstArray = array; copyParam.srcMemoryType = CU_MEMORYTYPE_HOST; copyParam.srcHost = ptr; copyParam.srcPitch = width * sizeof(float); copyParam.WidthInBytes = copyParam.srcPitch; copyParam.Height = 1; // err = cuMemcpy2D(©Param); err = cuMemcpyHtoA( array, 0, ptr, typesize * width ); if( err != CUDA_SUCCESS) { error( "swanMakeTexture1D failed -- memcpy failed" ); } err = cuTexRefSetArray ( cu_texref, array, CU_TRSA_OVERRIDE_FORMAT ); if( err != CUDA_SUCCESS) { error( "swanMakeTexture1D failed -- setarray failed" ); } if( (flags & TEXTURE_INTERPOLATE) == TEXTURE_INTERPOLATE ) { err = cuTexRefSetFilterMode( cu_texref, CU_TR_FILTER_MODE_LINEAR ); } else { err = cuTexRefSetFilterMode( cu_texref, CU_TR_FILTER_MODE_POINT ); } if( err != CUDA_SUCCESS) { error( "swanBindToTexture1D failed -- setfiltermode failed" ); } if( (flags & TEXTURE_NORMALISE ) == TEXTURE_NORMALISE ) { err = cuTexRefSetFlags(cu_texref, CU_TRSF_NORMALIZED_COORDINATES); err |= cuTexRefSetAddressMode(cu_texref, 0, CU_TR_ADDRESS_MODE_CLAMP); err |= cuTexRefSetAddressMode(cu_texref, 1, CU_TR_ADDRESS_MODE_CLAMP); if( err != CUDA_SUCCESS) { error( "swanBindToTexture1D failed -- setflags 1 failed" ); } } err = cuTexRefSetFormat( cu_texref, CU_AD_FORMAT_FLOAT, channels ); if( err != CUDA_SUCCESS) { error( "swanBindToTexture1D failed -- setformat failed" ); } //printf("TEX BIND DONE\n"); }
static av_cold int cudascale_config_props(AVFilterLink *outlink) { AVFilterContext *ctx = outlink->src; AVFilterLink *inlink = outlink->src->inputs[0]; CUDAScaleContext *s = ctx->priv; AVHWFramesContext *frames_ctx = (AVHWFramesContext*)inlink->hw_frames_ctx->data; AVCUDADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx; CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx; int w, h; int ret; extern char vf_scale_cuda_ptx[]; ret = CHECK_CU(cuCtxPushCurrent(cuda_ctx)); if (ret < 0) goto fail; ret = CHECK_CU(cuModuleLoadData(&s->cu_module, vf_scale_cuda_ptx)); if (ret < 0) goto fail; CHECK_CU(cuModuleGetFunction(&s->cu_func_uchar, s->cu_module, "Subsample_Bilinear_uchar")); CHECK_CU(cuModuleGetFunction(&s->cu_func_uchar2, s->cu_module, "Subsample_Bilinear_uchar2")); CHECK_CU(cuModuleGetFunction(&s->cu_func_uchar4, s->cu_module, "Subsample_Bilinear_uchar4")); CHECK_CU(cuModuleGetFunction(&s->cu_func_ushort, s->cu_module, "Subsample_Bilinear_ushort")); CHECK_CU(cuModuleGetFunction(&s->cu_func_ushort2, s->cu_module, "Subsample_Bilinear_ushort2")); CHECK_CU(cuModuleGetFunction(&s->cu_func_ushort4, s->cu_module, "Subsample_Bilinear_ushort4")); CHECK_CU(cuModuleGetTexRef(&s->cu_tex_uchar, s->cu_module, "uchar_tex")); CHECK_CU(cuModuleGetTexRef(&s->cu_tex_uchar2, s->cu_module, "uchar2_tex")); CHECK_CU(cuModuleGetTexRef(&s->cu_tex_uchar4, s->cu_module, "uchar4_tex")); CHECK_CU(cuModuleGetTexRef(&s->cu_tex_ushort, s->cu_module, "ushort_tex")); CHECK_CU(cuModuleGetTexRef(&s->cu_tex_ushort2, s->cu_module, "ushort2_tex")); CHECK_CU(cuModuleGetTexRef(&s->cu_tex_ushort4, s->cu_module, "ushort4_tex")); CHECK_CU(cuTexRefSetFlags(s->cu_tex_uchar, CU_TRSF_READ_AS_INTEGER)); CHECK_CU(cuTexRefSetFlags(s->cu_tex_uchar2, CU_TRSF_READ_AS_INTEGER)); CHECK_CU(cuTexRefSetFlags(s->cu_tex_uchar4, CU_TRSF_READ_AS_INTEGER)); CHECK_CU(cuTexRefSetFlags(s->cu_tex_ushort, CU_TRSF_READ_AS_INTEGER)); CHECK_CU(cuTexRefSetFlags(s->cu_tex_ushort2, CU_TRSF_READ_AS_INTEGER)); CHECK_CU(cuTexRefSetFlags(s->cu_tex_ushort4, CU_TRSF_READ_AS_INTEGER)); CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_uchar, CU_TR_FILTER_MODE_LINEAR)); CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_uchar2, CU_TR_FILTER_MODE_LINEAR)); CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_uchar4, CU_TR_FILTER_MODE_LINEAR)); CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_ushort, CU_TR_FILTER_MODE_LINEAR)); CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_ushort2, CU_TR_FILTER_MODE_LINEAR)); CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_ushort4, CU_TR_FILTER_MODE_LINEAR)); CHECK_CU(cuCtxPopCurrent(&dummy)); if ((ret = ff_scale_eval_dimensions(s, s->w_expr, s->h_expr, inlink, outlink, &w, &h)) < 0) goto fail; if (((int64_t)h * inlink->w) > INT_MAX || ((int64_t)w * inlink->h) > INT_MAX) av_log(ctx, AV_LOG_ERROR, "Rescaled value for width or height is too big.\n"); outlink->w = w; outlink->h = h; ret = init_processing_chain(ctx, inlink->w, inlink->h, w, h); if (ret < 0) return ret; av_log(ctx, AV_LOG_VERBOSE, "w:%d h:%d -> w:%d h:%d\n", inlink->w, inlink->h, outlink->w, outlink->h); if (inlink->sample_aspect_ratio.num) { outlink->sample_aspect_ratio = av_mul_q((AVRational){outlink->h*inlink->w, outlink->w*inlink->h}, inlink->sample_aspect_ratio); } else { outlink->sample_aspect_ratio = inlink->sample_aspect_ratio; } return 0; fail: return ret; }