示例#1
0
void CuModule::SetSampler(CuModule::TexBind* texBind,
	const CuTexSamplerAttr& sampler) {

	SetFormat(texBind, sampler.fmt, sampler.numPackedComponents);
	
	if(texBind->sampler.addressX != sampler.addressX) {
		cuTexRefSetAddressMode(texBind->texRef, 0, texBind->sampler.addressX);
		texBind->sampler.addressX = sampler.addressX;
	}
	if(texBind->sampler.addressY != sampler.addressY) {
		cuTexRefSetAddressMode(texBind->texRef, 1, texBind->sampler.addressY);
		texBind->sampler.addressY = sampler.addressY;
	}
	if(texBind->sampler.addressZ != sampler.addressZ) {
		cuTexRefSetAddressMode(texBind->texRef, 2, texBind->sampler.addressZ);
		texBind->sampler.addressZ = sampler.addressZ;
	}
	if(texBind->sampler.filter != sampler.filter) {
		cuTexRefSetFilterMode(texBind->texRef, sampler.filter);
		texBind->sampler.filter = sampler.filter;
	}
	if((texBind->sampler.readAsInteger != sampler.readAsInteger) ||
		(texBind->sampler.normCoord != sampler.normCoord)) {
	
		uint flags = (sampler.readAsInteger ? CU_TRSF_READ_AS_INTEGER : 0) |
			(sampler.normCoord ? CU_TRSF_NORMALIZED_COORDINATES : 0);
		texBind->sampler.readAsInteger = sampler.readAsInteger;
		texBind->sampler.normCoord = sampler.normCoord;
		cuTexRefSetFlags(texBind->texRef, flags);		
	}
}
示例#2
0
void CudaModule::setTexRef( const std::string& name, 
                            CUarray cudaArray, 
                            bool wrap, 
                            bool bilinear, 
                            bool normalizedCoords, 
                            bool readAsInt)
{
  U32 flags = 0;
  if (normalizedCoords) {
    flags |= CU_TRSF_NORMALIZED_COORDINATES;
  }
  if (readAsInt) {
    flags |= CU_TRSF_READ_AS_INTEGER;
  }

  CUaddress_mode addressMode;
  CUfilter_mode filterMode;
  
  addressMode = (wrap) ? CU_TR_ADDRESS_MODE_WRAP : CU_TR_ADDRESS_MODE_CLAMP;
  filterMode = (bilinear) ? CU_TR_FILTER_MODE_LINEAR : CU_TR_FILTER_MODE_POINT;
  
  CUtexref texRef = getTexRef(name);
  for (int dim=0; dim<3; ++dim) 
  {
    checkError( "cuTexRefSetAddressMode", 
                 cuTexRefSetAddressMode(texRef, dim, addressMode));
  }
  
  checkError("cuTexRefSetFilterMode", cuTexRefSetFilterMode(texRef, filterMode));
  checkError("cuTexRefSetFlags", cuTexRefSetFlags(texRef, flags));
  checkError("cuTexRefSetArray", cuTexRefSetArray(texRef, cudaArray, CU_TRSA_OVERRIDE_FORMAT));
}
CUresult CNvEncoderLowLatency::ScaleNV12Image(CUdeviceptr dInput, CUdeviceptr dOutput,
                                                int srcWidth, int srcPitch, int srcHeight,
                                                int dstWidth, int dstPitch, int dstHeight,
                                                int maxWidth, int maxHeight)

{
    CCudaAutoLock cuLock(m_cuContext);
    CUDA_ARRAY_DESCRIPTOR desc;
    CUresult result;
    float left, right;
    float xOffset, yOffset, xScale, yScale;
    int srcLeft, srcTop, srcRight, srcBottom;
    int dstLeft, dstTop, dstRight, dstBottom;

    srcLeft = 0;
    srcTop = 0;
    srcRight = srcWidth;
    srcBottom = srcHeight;

    dstLeft = 0;
    dstTop = 0;
    dstRight = dstWidth;
    dstBottom = dstHeight;


    if ((!dInput) || (!dOutput))
    {
        PRINTERR("NULL surface pointer!\n");
        return CUDA_ERROR_INVALID_VALUE;
    }
    xScale = (float)(srcRight - srcLeft) / (float)(dstRight - dstLeft);
    xOffset = 0.5f*xScale - 0.5f;
    if (xOffset > 0.5f)
        xOffset = 0.5f;
    yScale = (float)(srcBottom - srcTop) / (float)(dstBottom - dstTop);
    yOffset = 0.5f*yScale - 0.5f;
    if (yOffset > 0.5f)
        yOffset = 0.5f;
    left = (float)srcLeft;
    right = (float)(srcRight - 1);
    xOffset += left;
    desc.NumChannels = 1;
    desc.Width = srcPitch / desc.NumChannels;
    desc.Height = srcBottom - srcTop;
    desc.Format = CU_AD_FORMAT_UNSIGNED_INT8;
    result = cuTexRefSetFilterMode(m_texLuma2D, CU_TR_FILTER_MODE_LINEAR);
    if (result != CUDA_SUCCESS)
    {
        PRINTERR("cuTexRefSetFilterMode: %d\n", result);
        return result;
    }
    result = cuTexRefSetAddress2D(m_texLuma2D, &desc, dInput + srcTop*srcPitch, srcPitch);
    if (result != CUDA_SUCCESS)
    {
        PRINTERR("BindTexture2D(luma): %d\n", result);
        return result;
    }
    desc.NumChannels = 2;
    desc.Width = srcPitch / desc.NumChannels;
    desc.Height = (srcBottom - srcTop) >> 1;
    desc.Format = CU_AD_FORMAT_UNSIGNED_INT8;
    result = cuTexRefSetFilterMode(m_texChroma2D, CU_TR_FILTER_MODE_LINEAR);
    if (result != CUDA_SUCCESS)
    {
        PRINTERR("cuTexRefSetFilterMode: %d\n", result);
        return result;
    }

    result = cuTexRefSetAddress2D(m_texChroma2D, &desc, dInput + (maxHeight + srcTop/2)*srcPitch, srcPitch);
    if (result != CUDA_SUCCESS)
    {
        PRINTERR("BindTexture2D(chroma): %d\n", result);
        return result;
    }

    int dstUVOffset = maxHeight * srcPitch;
    float x_Offset = xOffset - dstLeft*xScale;
    float y_Offset = yOffset + 0.5f - dstTop*yScale;
    float xc_offset = xOffset - dstLeft*xScale*0.5f;
    float yc_offset = yOffset + 0.5f - dstTop*yScale*0.5f;

    void *args[13] = { &dOutput, &dstUVOffset, &dstWidth, &dstHeight, &dstPitch,
        &left, &right, &x_Offset, &y_Offset,
        &xc_offset, &yc_offset, &xScale, &yScale };
    dim3 block(256, 1, 1);
    dim3 grid((dstRight + 255) >> 8, (dstBottom + 1) >> 1, 1);

    result = cuLaunchKernel(m_cuScaleNV12Function, grid.x, grid.y, grid.z,
        block.x, block.y, block.z,
        0,
        NULL, args, NULL);
    if (result != CUDA_SUCCESS)
    {
        PRINTERR("cuLaunchKernel: %d\n", result);
        return result;
    }

    result = cuStreamQuery(NULL);
    if (!((result == CUDA_SUCCESS) || (result == CUDA_ERROR_NOT_READY)))
    {
        return CUDA_SUCCESS;
    }
    return result;
}
示例#4
0
void swanMakeTexture1DEx(  const char *modname, const char *texname, size_t width,  void *ptr, size_t typesize, int flags ) {
	int err;
		// get the texture
    CUtexref cu_texref;
	int mode, channels;
		CUarray array;
  CUDA_MEMCPY2D copyParam;
   CUDA_ARRAY_DESCRIPTOR p;

		// get the module
		CUmodule mod  = swanGetModule( modname );

    err = cuModuleGetTexRef(&cu_texref, mod, texname );
		if( err != CUDA_SUCCESS) { error( "swanMakeTexture1D failed -- texture not found" ); }

		p.Width = width;
		p.Height= 1;
	mode = flags & TEXTURE_TYPE_MASK;
	channels = typesize / sizeof(float);
	switch( mode ) {
		case TEXTURE_FLOAT:
		p.Format = CU_AD_FORMAT_FLOAT;
		p.NumChannels = channels;
		break;
		case TEXTURE_INT:
		p.Format = CU_AD_FORMAT_SIGNED_INT32;
		p.NumChannels = channels;
		break;
		case TEXTURE_UINT:
		p.Format = CU_AD_FORMAT_UNSIGNED_INT32;
		p.NumChannels = channels;
		break;
		default:
			error( "swanMakeTexture1D failed -- invalid format" );
	}


	  err = cuArrayCreate(  &array	, &p);
		if( err != CUDA_SUCCESS) { error( "swanMakeTexture1D failed -- array create failed" ); }

  memset(&copyParam, 0, sizeof(copyParam));
  copyParam.dstMemoryType = CU_MEMORYTYPE_ARRAY;
  copyParam.dstArray = array;
  copyParam.srcMemoryType = CU_MEMORYTYPE_HOST;
  copyParam.srcHost = ptr;
  copyParam.srcPitch = width * sizeof(float);
  copyParam.WidthInBytes = copyParam.srcPitch;
  copyParam.Height = 1;
  // err = cuMemcpy2D(&copyParam);


	err = cuMemcpyHtoA( array, 0, ptr,  typesize  * width );
	if( err != CUDA_SUCCESS) { error( "swanMakeTexture1D failed -- memcpy failed" ); }
 
	err = cuTexRefSetArray ( cu_texref, array, CU_TRSA_OVERRIDE_FORMAT );
	if( err != CUDA_SUCCESS) { error( "swanMakeTexture1D failed -- setarray failed" ); }


	if( (flags & TEXTURE_INTERPOLATE) == TEXTURE_INTERPOLATE ) {
		err = cuTexRefSetFilterMode( cu_texref, CU_TR_FILTER_MODE_LINEAR );
	}
	else {
		err = cuTexRefSetFilterMode( cu_texref, CU_TR_FILTER_MODE_POINT );
	}
		if( err != CUDA_SUCCESS) { error( "swanBindToTexture1D failed -- setfiltermode failed" ); }

	if(  (flags & TEXTURE_NORMALISE ) == TEXTURE_NORMALISE ) {
		err  = cuTexRefSetFlags(cu_texref, CU_TRSF_NORMALIZED_COORDINATES);
    err |= cuTexRefSetAddressMode(cu_texref, 0, CU_TR_ADDRESS_MODE_CLAMP);
    err |= cuTexRefSetAddressMode(cu_texref, 1, CU_TR_ADDRESS_MODE_CLAMP);
		if( err != CUDA_SUCCESS) { error( "swanBindToTexture1D failed -- setflags 1 failed" ); }
	}

		err = cuTexRefSetFormat( cu_texref, CU_AD_FORMAT_FLOAT, channels );
		if( err != CUDA_SUCCESS) { error( "swanBindToTexture1D failed -- setformat failed" ); }

//printf("TEX BIND DONE\n");
}
示例#5
0
static av_cold int cudascale_config_props(AVFilterLink *outlink)
{
    AVFilterContext *ctx = outlink->src;
    AVFilterLink *inlink = outlink->src->inputs[0];
    CUDAScaleContext *s  = ctx->priv;
    AVHWFramesContext     *frames_ctx = (AVHWFramesContext*)inlink->hw_frames_ctx->data;
    AVCUDADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx;
    CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx;
    int w, h;
    int ret;

    extern char vf_scale_cuda_ptx[];

    ret = CHECK_CU(cuCtxPushCurrent(cuda_ctx));
    if (ret < 0)
        goto fail;

    ret = CHECK_CU(cuModuleLoadData(&s->cu_module, vf_scale_cuda_ptx));
    if (ret < 0)
        goto fail;

    CHECK_CU(cuModuleGetFunction(&s->cu_func_uchar, s->cu_module, "Subsample_Bilinear_uchar"));
    CHECK_CU(cuModuleGetFunction(&s->cu_func_uchar2, s->cu_module, "Subsample_Bilinear_uchar2"));
    CHECK_CU(cuModuleGetFunction(&s->cu_func_uchar4, s->cu_module, "Subsample_Bilinear_uchar4"));
    CHECK_CU(cuModuleGetFunction(&s->cu_func_ushort, s->cu_module, "Subsample_Bilinear_ushort"));
    CHECK_CU(cuModuleGetFunction(&s->cu_func_ushort2, s->cu_module, "Subsample_Bilinear_ushort2"));
    CHECK_CU(cuModuleGetFunction(&s->cu_func_ushort4, s->cu_module, "Subsample_Bilinear_ushort4"));

    CHECK_CU(cuModuleGetTexRef(&s->cu_tex_uchar, s->cu_module, "uchar_tex"));
    CHECK_CU(cuModuleGetTexRef(&s->cu_tex_uchar2, s->cu_module, "uchar2_tex"));
    CHECK_CU(cuModuleGetTexRef(&s->cu_tex_uchar4, s->cu_module, "uchar4_tex"));
    CHECK_CU(cuModuleGetTexRef(&s->cu_tex_ushort, s->cu_module, "ushort_tex"));
    CHECK_CU(cuModuleGetTexRef(&s->cu_tex_ushort2, s->cu_module, "ushort2_tex"));
    CHECK_CU(cuModuleGetTexRef(&s->cu_tex_ushort4, s->cu_module, "ushort4_tex"));

    CHECK_CU(cuTexRefSetFlags(s->cu_tex_uchar, CU_TRSF_READ_AS_INTEGER));
    CHECK_CU(cuTexRefSetFlags(s->cu_tex_uchar2, CU_TRSF_READ_AS_INTEGER));
    CHECK_CU(cuTexRefSetFlags(s->cu_tex_uchar4, CU_TRSF_READ_AS_INTEGER));
    CHECK_CU(cuTexRefSetFlags(s->cu_tex_ushort, CU_TRSF_READ_AS_INTEGER));
    CHECK_CU(cuTexRefSetFlags(s->cu_tex_ushort2, CU_TRSF_READ_AS_INTEGER));
    CHECK_CU(cuTexRefSetFlags(s->cu_tex_ushort4, CU_TRSF_READ_AS_INTEGER));

    CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_uchar, CU_TR_FILTER_MODE_LINEAR));
    CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_uchar2, CU_TR_FILTER_MODE_LINEAR));
    CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_uchar4, CU_TR_FILTER_MODE_LINEAR));
    CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_ushort, CU_TR_FILTER_MODE_LINEAR));
    CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_ushort2, CU_TR_FILTER_MODE_LINEAR));
    CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_ushort4, CU_TR_FILTER_MODE_LINEAR));

    CHECK_CU(cuCtxPopCurrent(&dummy));

    if ((ret = ff_scale_eval_dimensions(s,
                                        s->w_expr, s->h_expr,
                                        inlink, outlink,
                                        &w, &h)) < 0)
        goto fail;

    if (((int64_t)h * inlink->w) > INT_MAX  ||
        ((int64_t)w * inlink->h) > INT_MAX)
        av_log(ctx, AV_LOG_ERROR, "Rescaled value for width or height is too big.\n");

    outlink->w = w;
    outlink->h = h;

    ret = init_processing_chain(ctx, inlink->w, inlink->h, w, h);
    if (ret < 0)
        return ret;

    av_log(ctx, AV_LOG_VERBOSE, "w:%d h:%d -> w:%d h:%d\n",
           inlink->w, inlink->h, outlink->w, outlink->h);

    if (inlink->sample_aspect_ratio.num) {
        outlink->sample_aspect_ratio = av_mul_q((AVRational){outlink->h*inlink->w,
                                                             outlink->w*inlink->h},
                                                inlink->sample_aspect_ratio);
    } else {
        outlink->sample_aspect_ratio = inlink->sample_aspect_ratio;
    }

    return 0;

fail:
    return ret;
}