Example #1
0
void swanBindToTexture1DEx(  const char *modname, const char *texname, size_t width, void *ptr, size_t typesize, int flags ) {
	CUresult err;
    CUtexref cu_texref;
	int mode, channels;

		// get the module
		CUmodule mod  = swanGetModule( modname );

		// get the texture
    err = cuModuleGetTexRef(&cu_texref, mod, texname );
		if( err != CUDA_SUCCESS) { error( "swanBindToTexture1D failed -- texture not found" ); }


		// now bind
	 err = cuTexRefSetAddress( NULL, cu_texref,  PTR_TO_CUDEVPTR(ptr), width * typesize );
	if( err != CUDA_SUCCESS) { 
			printf("EEERRR = %d\n", err );
		error( "swanBindToTexture1D failed -- bind failed" ); 
	}

// does not work for linear memory
/*
	if( (flags & TEXTURE_INTERPOLATE) == TEXTURE_INTERPOLATE ) {
		err = cuTexRefSetFilterMode( cu_texref, CU_TR_FILTER_MODE_LINEAR );
	}
	else {
		err = cuTexRefSetFilterMode( cu_texref, CU_TR_FILTER_MODE_POINT );
	}
		if( err != CUDA_SUCCESS) { error( "swanBindToTexture1D failed -- setfiltermode failed" ); }
*/

	mode = flags & TEXTURE_TYPE_MASK;
	channels = typesize / sizeof(float);
	switch( mode ) {
		case TEXTURE_FLOAT:
		err = cuTexRefSetFormat( cu_texref, CU_AD_FORMAT_FLOAT, channels );
		break;
		case TEXTURE_INT:
		err = cuTexRefSetFormat( cu_texref, CU_AD_FORMAT_SIGNED_INT32, channels );
		break;
		case TEXTURE_UINT:
		err = cuTexRefSetFormat( cu_texref, CU_AD_FORMAT_UNSIGNED_INT32, channels );
		break;
		default:
			error( "swanBinToTexture1D failed -- invalid format" );
	}

	if( err != CUDA_SUCCESS) {	
			error( "swanBinToTexture1D failed -- setformat failed" );
	}


	return;

}
Example #2
0
void CudaModule::setTexRef( const std::string& name, 
                            CUdeviceptr ptr, 
                            S64 size, 
                            CUarray_format format, 
                            int numComponents)
{
  CUtexref texRef = getTexRef(name);
  
  checkError("cuTexRefSetFormat", cuTexRefSetFormat(texRef, format, numComponents));
  checkError("cuTexRefSetAddress", cuTexRefSetAddress(NULL, texRef, ptr, (U32)size));
}
Example #3
0
CUresult CuModule::BindLinearTexture(const std::string& name, CuDeviceMem* mem,
	CUarray_format format, int numChannels) {

	TexBind* texBind;
	CUresult result = FindTexRef(name, &texBind);
	HANDLE_RESULT();

	SetFormat(texBind, format, numChannels);

	size_t offset;
	result = cuTexRefSetAddress(&offset, texBind->texRef, mem->Handle(),
		mem->Size());
	HANDLE_RESULT();

	texBind->texture.reset();
	texBind->mem = mem;
	return CUDA_SUCCESS;
}
Example #4
0
sparseStatus_t sparseEngine_d::Multiply(sparseMat_t mat, T alpha, T beta,
	CUdeviceptr xVec, CUdeviceptr yVec) {

	sparseMatrix* m = static_cast<sparseMatrix*>(mat);

	Kernel* k;
	sparseStatus_t status = LoadKernel(m->prec, &k);
	if(SPARSE_STATUS_SUCCESS != status) return status;

	// Push the args and select the xVec as a texture
	CuCallStack callStack;
	callStack.Push(m->outputIndices, m->colIndices, m->sparseValues,
		m->tempOutput, m->numGroups);

	// Get the size of the xVec elements
	PrecTerm precTerms = PrecTerms[m->prec];	
	size_t offset;
	CUresult result = cuTexRefSetAddress(&offset, k->xVec_texture, xVec, 
		m->width * precTerms.vecSize);
	if(CUDA_SUCCESS != result) return SPARSE_STATUS_KERNEL_ERROR;
	
	// Launch the function
	uint numBlocks = DivUp(m->numGroups, WarpsPerBlock);
	result = k->func[IndexFromVT(m->valuesPerThread)]->Launch(numBlocks, 1, 
		callStack);
	if(CUDA_SUCCESS != result) return SPARSE_STATUS_LAUNCH_ERROR;

	// Finalize the vector
	int numFinalizeBlocks = DivUp(m->numGroups, WarpsPerBlock);
	int useBeta = !IsZero(beta);

	callStack.Reset();
	callStack.Push(m->tempOutput, m->rowIndices, m->height, yVec, alpha, beta,
		useBeta);

	result = k->finalize->Launch(numFinalizeBlocks, 1, callStack);
	if(CUDA_SUCCESS != result) return SPARSE_STATUS_KERNEL_ERROR;

	return SPARSE_STATUS_SUCCESS;
}
Example #5
0
void CudaModule::unsetTexRef(const std::string& name)
{
  CUtexref texRef = getTexRef(name);
  checkError("cuTexRefSetAddress", cuTexRefSetAddress( 0, texRef, 0, 0));
}
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) {

  CUresult cudastatus = CUDA_SUCCESS;

  if (nrhs != 2)
    mexErrMsgTxt("Wrong number of arguments");

  if (init == 0) {
    // Initialize function
    //mexLock();

    // load GPUmat
		gm = gmGetGPUmat();

		// load module
		CUmodule *drvmod = gmGetModule("examples_texture");


    // load float GPU function
    CUresult status = cuModuleGetFunction(&drvfunf, *drvmod, "LININTERF");
    if (CUDA_SUCCESS != status) {
      mexErrMsgTxt("Unable to load user function.");
    }

    // load double GPU function
    status = cuModuleGetFunction(&drvfund, *drvmod, "LININTERD");
    if (CUDA_SUCCESS != status) {
      mexErrMsgTxt("Unable to load user function.");
    }

    // load textures defined in module
    status = cuModuleGetTexRef(&texf, *drvmod, "texref_f1_a");
    if (CUDA_SUCCESS != status) {
      mexErrMsgTxt("Unable to load texture.");
    }

    status = cuModuleGetTexRef(&texd, *drvmod, "texref_d1_a");
    if (CUDA_SUCCESS != status) {
      mexErrMsgTxt("Unable to load texture.");
    }

    // no complex function support
    init = 1;
  }

  // mex parameters are:

  // 1. IN1. Input array
  // 2. IN2. Input indexes array

  //IN1 is the input GPU array
  GPUtype IN1 = gm->gputype.getGPUtype(prhs[0]);

  //IN2 is the input GPU array
  GPUtype IN2 = gm->gputype.getGPUtype(prhs[1]);

  //OUT is the output GPU array (result)
  // Create of the same size of IN1
  gpuTYPE_t in1_t = gm->gputype.getType(IN1);
  int in1_d = gm->gputype.getNdims(IN1);
  const int * in1_s = gm->gputype.getSize(IN1);
  int in1_n = gm->gputype.getNumel(IN1);
  int in1_b = gm->gputype.getDataSize(IN1);

  gpuTYPE_t in2_t = gm->gputype.getType(IN2);
  int in2_d = gm->gputype.getNdims(IN2);
  const int * in2_s = gm->gputype.getSize(IN2);
  int in2_n = gm->gputype.getNumel(IN2);

  if ((in1_t==gpuCFLOAT) || (in1_t==gpuCDOUBLE)) {
    mexErrMsgTxt("Complex TYPE not supported");
  }

  if (in1_t != in2_t) {
    mexErrMsgTxt("Input arguments must be of the same type");
  }

  if (in1_n != in2_n) {
    mexErrMsgTxt("Input arguments must have the same number of elements");
  }

  //OUT is the output GPU array (result)
  // Create of the same size of IN1
  GPUtype OUT = gm->gputype.create(in1_t, in1_d, in1_s, NULL);



  // I need the pointers to GPU memory
  CUdeviceptr d_IN1  = (CUdeviceptr) (UINTPTR gm->gputype.getGPUptr(IN1));
  CUdeviceptr d_IN2  = (CUdeviceptr) (UINTPTR gm->gputype.getGPUptr(IN2));
  CUdeviceptr d_OUT = (CUdeviceptr) (UINTPTR gm->gputype.getGPUptr(OUT));

  // The GPU kernel depends on the type of input/output
  CUfunction drvfun;
  CUtexref   drvtex;
  CUarray_format_enum drvtexformat;
  int drvtexnum;

  if (in1_t == gpuFLOAT) {
    drvfun = drvfunf;
    drvtex = texf;
    drvtexformat = CU_AD_FORMAT_FLOAT;
    drvtexnum = 1;
  } else if (in1_t == gpuDOUBLE) {
    drvfun = drvfund;
    drvtex = texd;
    drvtexformat = CU_AD_FORMAT_SIGNED_INT32;
    drvtexnum = 2;
  }

  if (CUDA_SUCCESS != cuTexRefSetFormat(drvtex, drvtexformat, drvtexnum)) {
    mexErrMsgTxt("Execution error (texture).");
  }
  if (CUDA_SUCCESS != cuTexRefSetAddress(NULL, drvtex, UINTPTR d_IN1, in1_n*in1_b)) {
    mexErrMsgTxt("Execution error (texture).");
  }

  if (CUDA_SUCCESS != cuParamSetTexRef(drvfun, CU_PARAM_TR_DEFAULT, drvtex)) {
    mexErrMsgTxt("Execution error (texture1).");
  }


  hostdrv_pars_t gpuprhs[2];
	int gpunrhs = 2;
	gpuprhs[0] = hostdrv_pars(&d_IN2,sizeof(d_IN2),__alignof(d_IN2));
	gpuprhs[1] = hostdrv_pars(&d_OUT,sizeof(d_OUT),__alignof(d_OUT));

	int N = in1_n;

	hostGPUDRV(drvfun, N, gpunrhs, gpuprhs);


	// return result
	plhs[0] = gm->gputype.createMxArray(OUT);


}