C++ (Cpp) cuModuleGetTexRef 예제들

예제 #1

0

파일 보기

파일: cucpp.cpp 프로젝트: CindyYang85/mgpu

CUresult CuModule::FindTexRef(const std::string& name, 
	CuModule::TexBind** ppTexBind) {

	for(size_t i(0); i < _textures.size(); ++i)
		if(name == _textures[i].name) {
			*ppTexBind = &_textures[i];
			return CUDA_SUCCESS;
		}
	CUtexref texRef;
	CUresult result = cuModuleGetTexRef(&texRef, _module, name.c_str());
	HANDLE_RESULT();

	TexBind texBind;
	texBind.texRef = texRef;
	texBind.name = name;
	memset(&texBind.sampler, -1, sizeof(CuTexSamplerAttr));
	_textures.push_back(texBind);
	
	CuTexSamplerAttr sampler;
	sampler.addressX = CU_TR_ADDRESS_MODE_WRAP;
	sampler.addressY = CU_TR_ADDRESS_MODE_WRAP;
	sampler.addressZ = CU_TR_ADDRESS_MODE_WRAP;
	sampler.filter = CU_TR_FILTER_MODE_LINEAR;
	sampler.fmt = CU_AD_FORMAT_UNSIGNED_INT8;
	sampler.numPackedComponents = 4;
	sampler.normCoord = true;
	sampler.readAsInteger = false;
	SetSampler(&_textures.back(), sampler);

	*ppTexBind = &_textures.back();

	return CUDA_SUCCESS;
}

예제 #2

0

파일 보기

파일: CudaModule.cpp 프로젝트: tcoppex/cudaraster-linux

CUtexref CudaModule::getTexRef(const std::string& name)
{
  CUtexref &texref = m_texrefHash[name];

  if (0 == texref) {
    checkError("cuModuleGetTexRef", cuModuleGetTexRef( &texref, m_module, name.c_str()));  
  } 

  return texref;
}

예제 #3

0

파일 보기

	void tex_alloc(const char *name, device_memory& mem, bool interpolation, bool periodic)
	{
		/* determine format */
		CUarray_format_enum format;
		size_t dsize = datatype_size(mem.data_type);
		size_t size = mem.memory_size();

		switch(mem.data_type) {
			case TYPE_UCHAR: format = CU_AD_FORMAT_UNSIGNED_INT8; break;
			case TYPE_UINT: format = CU_AD_FORMAT_UNSIGNED_INT32; break;
			case TYPE_INT: format = CU_AD_FORMAT_SIGNED_INT32; break;
			case TYPE_FLOAT: format = CU_AD_FORMAT_FLOAT; break;
			default: assert(0); return;
		}

		CUtexref texref = NULL;

		cuda_push_context();
		cuda_assert(cuModuleGetTexRef(&texref, cuModule, name))

		if(!texref) {
			cuda_pop_context();
			return;
		}

		if(interpolation) {
			CUarray handle = NULL;
			CUDA_ARRAY_DESCRIPTOR desc;

			desc.Width = mem.data_width;
			desc.Height = mem.data_height;
			desc.Format = format;
			desc.NumChannels = mem.data_elements;

			cuda_assert(cuArrayCreate(&handle, &desc))

			if(!handle) {
				cuda_pop_context();
				return;
			}

			if(mem.data_height > 1) {
				CUDA_MEMCPY2D param;
				memset(&param, 0, sizeof(param));
				param.dstMemoryType = CU_MEMORYTYPE_ARRAY;
				param.dstArray = handle;
				param.srcMemoryType = CU_MEMORYTYPE_HOST;
				param.srcHost = (void*)mem.data_pointer;
				param.srcPitch = mem.data_width*dsize*mem.data_elements;
				param.WidthInBytes = param.srcPitch;
				param.Height = mem.data_height;

				cuda_assert(cuMemcpy2D(&param))
			}
			else

예제 #4

0

파일 보기

파일: swanlib_nv.c 프로젝트: liyuming1978/swan

void swanBindToTexture1DEx(  const char *modname, const char *texname, size_t width, void *ptr, size_t typesize, int flags ) {
	CUresult err;
    CUtexref cu_texref;
	int mode, channels;

		// get the module
		CUmodule mod  = swanGetModule( modname );

		// get the texture
    err = cuModuleGetTexRef(&cu_texref, mod, texname );
		if( err != CUDA_SUCCESS) { error( "swanBindToTexture1D failed -- texture not found" ); }


		// now bind
	 err = cuTexRefSetAddress( NULL, cu_texref,  PTR_TO_CUDEVPTR(ptr), width * typesize );
	if( err != CUDA_SUCCESS) { 
			printf("EEERRR = %d\n", err );
		error( "swanBindToTexture1D failed -- bind failed" ); 
	}

// does not work for linear memory
/*
	if( (flags & TEXTURE_INTERPOLATE) == TEXTURE_INTERPOLATE ) {
		err = cuTexRefSetFilterMode( cu_texref, CU_TR_FILTER_MODE_LINEAR );
	}
	else {
		err = cuTexRefSetFilterMode( cu_texref, CU_TR_FILTER_MODE_POINT );
	}
		if( err != CUDA_SUCCESS) { error( "swanBindToTexture1D failed -- setfiltermode failed" ); }
*/

	mode = flags & TEXTURE_TYPE_MASK;
	channels = typesize / sizeof(float);
	switch( mode ) {
		case TEXTURE_FLOAT:
		err = cuTexRefSetFormat( cu_texref, CU_AD_FORMAT_FLOAT, channels );
		break;
		case TEXTURE_INT:
		err = cuTexRefSetFormat( cu_texref, CU_AD_FORMAT_SIGNED_INT32, channels );
		break;
		case TEXTURE_UINT:
		err = cuTexRefSetFormat( cu_texref, CU_AD_FORMAT_UNSIGNED_INT32, channels );
		break;
		default:
			error( "swanBinToTexture1D failed -- invalid format" );
	}

	if( err != CUDA_SUCCESS) {	
			error( "swanBinToTexture1D failed -- setformat failed" );
	}


	return;

}

예제 #5

0

파일 보기

파일: autoModule.c 프로젝트: PachoAlvarez/RCUDA

SEXP
R_auto_cuModuleGetTexRef(SEXP r_hmod, SEXP r_name)
{
    SEXP r_ans = R_NilValue;
    CUtexref pTexRef;
    CUmodule hmod = (CUmodule) getRReference(r_hmod);
    const char * name = CHAR(STRING_ELT(r_name, 0));
    CUresult ans;
    ans = cuModuleGetTexRef(& pTexRef,  hmod,  name);
    if(ans)
       return(R_cudaErrorInfo(ans));
    r_ans = R_createRef(pTexRef, "CUtexref") ;
    return(r_ans);
}

예제 #6

0

파일 보기

파일: engine.cpp 프로젝트: CindyYang85/mgpu

sparseStatus_t sparseEngine_d::LoadKernel(sparsePrec_t prec,
	sparseEngine_d::Kernel** ppKernel) {
	
	// First attempt to load the finalize module if it is not yet loaded.
	CUresult result = CUDA_SUCCESS;

	// Check if the requested kernel is available, and if not, load it.
	int p = (int)prec;
	if(!multiply[p].get()) {
		std::auto_ptr<Kernel> k(new Kernel);
		
		std::string filename = kernelPath + "spmxv_" + PrecNames[p] +
			".cubin";
		result = context->LoadModuleFilename(filename, &k->module);
		if(CUDA_SUCCESS != result) return SPARSE_STATUS_KERNEL_NOT_FOUND;

		// Load the five SpMxV kernels for different valuesPerThread counts.
		for(int i(0); i < NumVT; ++i) {
			std::ostringstream oss;
			oss<< "SpMxV_"<< ValuesPerThread[i];
			result = k->module->GetFunction(oss.str(), 
				make_int3(BlockSize, 1,1), &k->func[i]);
			if(CUDA_SUCCESS != result) return SPARSE_STATUS_KERNEL_ERROR;
		}

		// Load the finalize function.
		result = k->module->GetFunction("Finalize", make_int3(BlockSize, 1, 1), 
			&k->finalize);
			if(CUDA_SUCCESS != result) return SPARSE_STATUS_KERNEL_ERROR;

		// Cache the texture reference
		result = cuModuleGetTexRef(&k->xVec_texture, k->module->Handle(),
			"xVec_texture");
		if(CUDA_SUCCESS != result) return SPARSE_STATUS_KERNEL_ERROR;

		result = cuTexRefSetFlags(k->xVec_texture, CU_TRSF_READ_AS_INTEGER);
		if(CUDA_SUCCESS != result) return SPARSE_STATUS_KERNEL_ERROR;

		result = cuTexRefSetFormat(k->xVec_texture, PrecTerms[p].vecFormat, 
			PrecTerms[p].vecChannels);
		if(CUDA_SUCCESS != result) return SPARSE_STATUS_KERNEL_ERROR;

		multiply[p] = k;
	}

	*ppKernel = multiply[p].get();
	return SPARSE_STATUS_SUCCESS;
}

예제 #7

0

파일 보기

파일: CudaModuleScene.cpp 프로젝트: zhaoyizheng0930/UnrealEngine

void CudaModuleScene::initCudaObj(ApexCudaTexRef& texRef)
{
	const char* texRefName = texRef.getName();

	for (int j = 0 ; j < numRegisteredTextures ; j++)
	{
		if (nvidia::strcmp(textureTable[j].texRefName, texRefName) == 0)
		{
			ApexCudaModule* cudaModule = getCudaModule(textureTable[j].modIndex);
			PX_ASSERT(cudaModule->isValid());

			CUtexref cuTexRef;
			CUT_SAFE_CALL(cuModuleGetTexRef(&cuTexRef, cudaModule->getCuModule(), texRefName));

			const struct textureReference* texRefData = textureTable[j].texRefData;

			PX_ASSERT(texRefData->channelDesc.x > 0);
			int numChannels = 1;
			if (texRefData->channelDesc.y > 0)
			{
				PX_ASSERT(texRefData->channelDesc.y == texRefData->channelDesc.x);
				++numChannels;
			}
			if (texRefData->channelDesc.z > 0)
			{
				PX_ASSERT(texRefData->channelDesc.z == texRefData->channelDesc.x);
				++numChannels;
			}
			if (texRefData->channelDesc.w > 0)
			{
				PX_ASSERT(texRefData->channelDesc.w == texRefData->channelDesc.x);
				++numChannels;
			}

			CUarray_format cuFormat = CUarray_format(0);
			switch (texRefData->channelDesc.f)
			{
			case cudaChannelFormatKindSigned:
				switch (texRefData->channelDesc.x)
				{
				case  8:
					cuFormat = CU_AD_FORMAT_SIGNED_INT8;
					break;
				case 16:
					cuFormat = CU_AD_FORMAT_SIGNED_INT16;
					break;
				case 32:
					cuFormat = CU_AD_FORMAT_SIGNED_INT32;
					break;
				}
				break;
			case cudaChannelFormatKindUnsigned:
				switch (texRefData->channelDesc.x)
				{
				case  8:
					cuFormat = CU_AD_FORMAT_UNSIGNED_INT8;
					break;
				case 16:
					cuFormat = CU_AD_FORMAT_UNSIGNED_INT16;
					break;
				case 32:
					cuFormat = CU_AD_FORMAT_UNSIGNED_INT32;
					break;
				}
				break;
			case cudaChannelFormatKindFloat:
				cuFormat = CU_AD_FORMAT_FLOAT;
				break;
			default:
				PX_ASSERT(0);
			};
			PX_ASSERT(cuFormat != 0);

			int cuFlags = 0;
			if (textureTable[j].read_normalized_float == 0)
			{
				cuFlags |= CU_TRSF_READ_AS_INTEGER;
			}
			if (textureTable[j].texRefData->normalized != 0)
			{
				cuFlags |= CU_TRSF_NORMALIZED_COORDINATES;
			}

			texRef.init(this, cuTexRef, cudaModule, cuFormat, numChannels, textureTable[j].dim, cuFlags);
			break;
		}
	}
}

예제 #8

0

파일 보기

파일: swanlib_nv.c 프로젝트: liyuming1978/swan

void swanMakeTexture1DEx(  const char *modname, const char *texname, size_t width,  void *ptr, size_t typesize, int flags ) {
	int err;
		// get the texture
    CUtexref cu_texref;
	int mode, channels;
		CUarray array;
  CUDA_MEMCPY2D copyParam;
   CUDA_ARRAY_DESCRIPTOR p;

		// get the module
		CUmodule mod  = swanGetModule( modname );

    err = cuModuleGetTexRef(&cu_texref, mod, texname );
		if( err != CUDA_SUCCESS) { error( "swanMakeTexture1D failed -- texture not found" ); }

		p.Width = width;
		p.Height= 1;
	mode = flags & TEXTURE_TYPE_MASK;
	channels = typesize / sizeof(float);
	switch( mode ) {
		case TEXTURE_FLOAT:
		p.Format = CU_AD_FORMAT_FLOAT;
		p.NumChannels = channels;
		break;
		case TEXTURE_INT:
		p.Format = CU_AD_FORMAT_SIGNED_INT32;
		p.NumChannels = channels;
		break;
		case TEXTURE_UINT:
		p.Format = CU_AD_FORMAT_UNSIGNED_INT32;
		p.NumChannels = channels;
		break;
		default:
			error( "swanMakeTexture1D failed -- invalid format" );
	}


	  err = cuArrayCreate(  &array	, &p);
		if( err != CUDA_SUCCESS) { error( "swanMakeTexture1D failed -- array create failed" ); }

  memset(&copyParam, 0, sizeof(copyParam));
  copyParam.dstMemoryType = CU_MEMORYTYPE_ARRAY;
  copyParam.dstArray = array;
  copyParam.srcMemoryType = CU_MEMORYTYPE_HOST;
  copyParam.srcHost = ptr;
  copyParam.srcPitch = width * sizeof(float);
  copyParam.WidthInBytes = copyParam.srcPitch;
  copyParam.Height = 1;
  // err = cuMemcpy2D(&copyParam);


	err = cuMemcpyHtoA( array, 0, ptr,  typesize  * width );
	if( err != CUDA_SUCCESS) { error( "swanMakeTexture1D failed -- memcpy failed" ); }
 
	err = cuTexRefSetArray ( cu_texref, array, CU_TRSA_OVERRIDE_FORMAT );
	if( err != CUDA_SUCCESS) { error( "swanMakeTexture1D failed -- setarray failed" ); }


	if( (flags & TEXTURE_INTERPOLATE) == TEXTURE_INTERPOLATE ) {
		err = cuTexRefSetFilterMode( cu_texref, CU_TR_FILTER_MODE_LINEAR );
	}
	else {
		err = cuTexRefSetFilterMode( cu_texref, CU_TR_FILTER_MODE_POINT );
	}
		if( err != CUDA_SUCCESS) { error( "swanBindToTexture1D failed -- setfiltermode failed" ); }

	if(  (flags & TEXTURE_NORMALISE ) == TEXTURE_NORMALISE ) {
		err  = cuTexRefSetFlags(cu_texref, CU_TRSF_NORMALIZED_COORDINATES);
    err |= cuTexRefSetAddressMode(cu_texref, 0, CU_TR_ADDRESS_MODE_CLAMP);
    err |= cuTexRefSetAddressMode(cu_texref, 1, CU_TR_ADDRESS_MODE_CLAMP);
		if( err != CUDA_SUCCESS) { error( "swanBindToTexture1D failed -- setflags 1 failed" ); }
	}

		err = cuTexRefSetFormat( cu_texref, CU_AD_FORMAT_FLOAT, channels );
		if( err != CUDA_SUCCESS) { error( "swanBindToTexture1D failed -- setformat failed" ); }

//printf("TEX BIND DONE\n");
}

예제 #9

0

파일 보기

파일: texture_lininterp.cpp 프로젝트: xavigibert/ShearCuda

void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) {

  CUresult cudastatus = CUDA_SUCCESS;

  if (nrhs != 2)
    mexErrMsgTxt("Wrong number of arguments");

  if (init == 0) {
    // Initialize function
    //mexLock();

    // load GPUmat
		gm = gmGetGPUmat();

		// load module
		CUmodule *drvmod = gmGetModule("examples_texture");


    // load float GPU function
    CUresult status = cuModuleGetFunction(&drvfunf, *drvmod, "LININTERF");
    if (CUDA_SUCCESS != status) {
      mexErrMsgTxt("Unable to load user function.");
    }

    // load double GPU function
    status = cuModuleGetFunction(&drvfund, *drvmod, "LININTERD");
    if (CUDA_SUCCESS != status) {
      mexErrMsgTxt("Unable to load user function.");
    }

    // load textures defined in module
    status = cuModuleGetTexRef(&texf, *drvmod, "texref_f1_a");
    if (CUDA_SUCCESS != status) {
      mexErrMsgTxt("Unable to load texture.");
    }

    status = cuModuleGetTexRef(&texd, *drvmod, "texref_d1_a");
    if (CUDA_SUCCESS != status) {
      mexErrMsgTxt("Unable to load texture.");
    }

    // no complex function support
    init = 1;
  }

  // mex parameters are:

  // 1. IN1. Input array
  // 2. IN2. Input indexes array

  //IN1 is the input GPU array
  GPUtype IN1 = gm->gputype.getGPUtype(prhs[0]);

  //IN2 is the input GPU array
  GPUtype IN2 = gm->gputype.getGPUtype(prhs[1]);

  //OUT is the output GPU array (result)
  // Create of the same size of IN1
  gpuTYPE_t in1_t = gm->gputype.getType(IN1);
  int in1_d = gm->gputype.getNdims(IN1);
  const int * in1_s = gm->gputype.getSize(IN1);
  int in1_n = gm->gputype.getNumel(IN1);
  int in1_b = gm->gputype.getDataSize(IN1);

  gpuTYPE_t in2_t = gm->gputype.getType(IN2);
  int in2_d = gm->gputype.getNdims(IN2);
  const int * in2_s = gm->gputype.getSize(IN2);
  int in2_n = gm->gputype.getNumel(IN2);

  if ((in1_t==gpuCFLOAT) || (in1_t==gpuCDOUBLE)) {
    mexErrMsgTxt("Complex TYPE not supported");
  }

  if (in1_t != in2_t) {
    mexErrMsgTxt("Input arguments must be of the same type");
  }

  if (in1_n != in2_n) {
    mexErrMsgTxt("Input arguments must have the same number of elements");
  }

  //OUT is the output GPU array (result)
  // Create of the same size of IN1
  GPUtype OUT = gm->gputype.create(in1_t, in1_d, in1_s, NULL);



  // I need the pointers to GPU memory
  CUdeviceptr d_IN1  = (CUdeviceptr) (UINTPTR gm->gputype.getGPUptr(IN1));
  CUdeviceptr d_IN2  = (CUdeviceptr) (UINTPTR gm->gputype.getGPUptr(IN2));
  CUdeviceptr d_OUT = (CUdeviceptr) (UINTPTR gm->gputype.getGPUptr(OUT));

  // The GPU kernel depends on the type of input/output
  CUfunction drvfun;
  CUtexref   drvtex;
  CUarray_format_enum drvtexformat;
  int drvtexnum;

  if (in1_t == gpuFLOAT) {
    drvfun = drvfunf;
    drvtex = texf;
    drvtexformat = CU_AD_FORMAT_FLOAT;
    drvtexnum = 1;
  } else if (in1_t == gpuDOUBLE) {
    drvfun = drvfund;
    drvtex = texd;
    drvtexformat = CU_AD_FORMAT_SIGNED_INT32;
    drvtexnum = 2;
  }

  if (CUDA_SUCCESS != cuTexRefSetFormat(drvtex, drvtexformat, drvtexnum)) {
    mexErrMsgTxt("Execution error (texture).");
  }
  if (CUDA_SUCCESS != cuTexRefSetAddress(NULL, drvtex, UINTPTR d_IN1, in1_n*in1_b)) {
    mexErrMsgTxt("Execution error (texture).");
  }

  if (CUDA_SUCCESS != cuParamSetTexRef(drvfun, CU_PARAM_TR_DEFAULT, drvtex)) {
    mexErrMsgTxt("Execution error (texture1).");
  }


  hostdrv_pars_t gpuprhs[2];
	int gpunrhs = 2;
	gpuprhs[0] = hostdrv_pars(&d_IN2,sizeof(d_IN2),__alignof(d_IN2));
	gpuprhs[1] = hostdrv_pars(&d_OUT,sizeof(d_OUT),__alignof(d_OUT));

	int N = in1_n;

	hostGPUDRV(drvfun, N, gpunrhs, gpuprhs);


	// return result
	plhs[0] = gm->gputype.createMxArray(OUT);


}

예제 #10

0

파일 보기

파일: vf_scale_cuda.c 프로젝트: bradleysepos/ffmpeg

static av_cold int cudascale_config_props(AVFilterLink *outlink)
{
    AVFilterContext *ctx = outlink->src;
    AVFilterLink *inlink = outlink->src->inputs[0];
    CUDAScaleContext *s  = ctx->priv;
    AVHWFramesContext     *frames_ctx = (AVHWFramesContext*)inlink->hw_frames_ctx->data;
    AVCUDADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx;
    CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx;
    int w, h;
    int ret;

    extern char vf_scale_cuda_ptx[];

    ret = CHECK_CU(cuCtxPushCurrent(cuda_ctx));
    if (ret < 0)
        goto fail;

    ret = CHECK_CU(cuModuleLoadData(&s->cu_module, vf_scale_cuda_ptx));
    if (ret < 0)
        goto fail;

    CHECK_CU(cuModuleGetFunction(&s->cu_func_uchar, s->cu_module, "Subsample_Bilinear_uchar"));
    CHECK_CU(cuModuleGetFunction(&s->cu_func_uchar2, s->cu_module, "Subsample_Bilinear_uchar2"));
    CHECK_CU(cuModuleGetFunction(&s->cu_func_uchar4, s->cu_module, "Subsample_Bilinear_uchar4"));
    CHECK_CU(cuModuleGetFunction(&s->cu_func_ushort, s->cu_module, "Subsample_Bilinear_ushort"));
    CHECK_CU(cuModuleGetFunction(&s->cu_func_ushort2, s->cu_module, "Subsample_Bilinear_ushort2"));
    CHECK_CU(cuModuleGetFunction(&s->cu_func_ushort4, s->cu_module, "Subsample_Bilinear_ushort4"));

    CHECK_CU(cuModuleGetTexRef(&s->cu_tex_uchar, s->cu_module, "uchar_tex"));
    CHECK_CU(cuModuleGetTexRef(&s->cu_tex_uchar2, s->cu_module, "uchar2_tex"));
    CHECK_CU(cuModuleGetTexRef(&s->cu_tex_uchar4, s->cu_module, "uchar4_tex"));
    CHECK_CU(cuModuleGetTexRef(&s->cu_tex_ushort, s->cu_module, "ushort_tex"));
    CHECK_CU(cuModuleGetTexRef(&s->cu_tex_ushort2, s->cu_module, "ushort2_tex"));
    CHECK_CU(cuModuleGetTexRef(&s->cu_tex_ushort4, s->cu_module, "ushort4_tex"));

    CHECK_CU(cuTexRefSetFlags(s->cu_tex_uchar, CU_TRSF_READ_AS_INTEGER));
    CHECK_CU(cuTexRefSetFlags(s->cu_tex_uchar2, CU_TRSF_READ_AS_INTEGER));
    CHECK_CU(cuTexRefSetFlags(s->cu_tex_uchar4, CU_TRSF_READ_AS_INTEGER));
    CHECK_CU(cuTexRefSetFlags(s->cu_tex_ushort, CU_TRSF_READ_AS_INTEGER));
    CHECK_CU(cuTexRefSetFlags(s->cu_tex_ushort2, CU_TRSF_READ_AS_INTEGER));
    CHECK_CU(cuTexRefSetFlags(s->cu_tex_ushort4, CU_TRSF_READ_AS_INTEGER));

    CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_uchar, CU_TR_FILTER_MODE_LINEAR));
    CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_uchar2, CU_TR_FILTER_MODE_LINEAR));
    CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_uchar4, CU_TR_FILTER_MODE_LINEAR));
    CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_ushort, CU_TR_FILTER_MODE_LINEAR));
    CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_ushort2, CU_TR_FILTER_MODE_LINEAR));
    CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_ushort4, CU_TR_FILTER_MODE_LINEAR));

    CHECK_CU(cuCtxPopCurrent(&dummy));

    if ((ret = ff_scale_eval_dimensions(s,
                                        s->w_expr, s->h_expr,
                                        inlink, outlink,
                                        &w, &h)) < 0)
        goto fail;

    if (((int64_t)h * inlink->w) > INT_MAX  ||
        ((int64_t)w * inlink->h) > INT_MAX)
        av_log(ctx, AV_LOG_ERROR, "Rescaled value for width or height is too big.\n");

    outlink->w = w;
    outlink->h = h;

    ret = init_processing_chain(ctx, inlink->w, inlink->h, w, h);
    if (ret < 0)
        return ret;

    av_log(ctx, AV_LOG_VERBOSE, "w:%d h:%d -> w:%d h:%d\n",
           inlink->w, inlink->h, outlink->w, outlink->h);

    if (inlink->sample_aspect_ratio.num) {
        outlink->sample_aspect_ratio = av_mul_q((AVRational){outlink->h*inlink->w,
                                                             outlink->w*inlink->h},
                                                inlink->sample_aspect_ratio);
    } else {
        outlink->sample_aspect_ratio = inlink->sample_aspect_ratio;
    }

    return 0;

fail:
    return ret;
}