void swanBindToTexture1DEx( const char *modname, const char *texname, size_t width, void *ptr, size_t typesize, int flags ) { CUresult err; CUtexref cu_texref; int mode, channels; // get the module CUmodule mod = swanGetModule( modname ); // get the texture err = cuModuleGetTexRef(&cu_texref, mod, texname ); if( err != CUDA_SUCCESS) { error( "swanBindToTexture1D failed -- texture not found" ); } // now bind err = cuTexRefSetAddress( NULL, cu_texref, PTR_TO_CUDEVPTR(ptr), width * typesize ); if( err != CUDA_SUCCESS) { printf("EEERRR = %d\n", err ); error( "swanBindToTexture1D failed -- bind failed" ); } // does not work for linear memory /* if( (flags & TEXTURE_INTERPOLATE) == TEXTURE_INTERPOLATE ) { err = cuTexRefSetFilterMode( cu_texref, CU_TR_FILTER_MODE_LINEAR ); } else { err = cuTexRefSetFilterMode( cu_texref, CU_TR_FILTER_MODE_POINT ); } if( err != CUDA_SUCCESS) { error( "swanBindToTexture1D failed -- setfiltermode failed" ); } */ mode = flags & TEXTURE_TYPE_MASK; channels = typesize / sizeof(float); switch( mode ) { case TEXTURE_FLOAT: err = cuTexRefSetFormat( cu_texref, CU_AD_FORMAT_FLOAT, channels ); break; case TEXTURE_INT: err = cuTexRefSetFormat( cu_texref, CU_AD_FORMAT_SIGNED_INT32, channels ); break; case TEXTURE_UINT: err = cuTexRefSetFormat( cu_texref, CU_AD_FORMAT_UNSIGNED_INT32, channels ); break; default: error( "swanBinToTexture1D failed -- invalid format" ); } if( err != CUDA_SUCCESS) { error( "swanBinToTexture1D failed -- setformat failed" ); } return; }
void CudaModule::setTexRef( const std::string& name, CUdeviceptr ptr, S64 size, CUarray_format format, int numComponents) { CUtexref texRef = getTexRef(name); checkError("cuTexRefSetFormat", cuTexRefSetFormat(texRef, format, numComponents)); checkError("cuTexRefSetAddress", cuTexRefSetAddress(NULL, texRef, ptr, (U32)size)); }
CUresult CuModule::BindLinearTexture(const std::string& name, CuDeviceMem* mem, CUarray_format format, int numChannels) { TexBind* texBind; CUresult result = FindTexRef(name, &texBind); HANDLE_RESULT(); SetFormat(texBind, format, numChannels); size_t offset; result = cuTexRefSetAddress(&offset, texBind->texRef, mem->Handle(), mem->Size()); HANDLE_RESULT(); texBind->texture.reset(); texBind->mem = mem; return CUDA_SUCCESS; }
sparseStatus_t sparseEngine_d::Multiply(sparseMat_t mat, T alpha, T beta, CUdeviceptr xVec, CUdeviceptr yVec) { sparseMatrix* m = static_cast<sparseMatrix*>(mat); Kernel* k; sparseStatus_t status = LoadKernel(m->prec, &k); if(SPARSE_STATUS_SUCCESS != status) return status; // Push the args and select the xVec as a texture CuCallStack callStack; callStack.Push(m->outputIndices, m->colIndices, m->sparseValues, m->tempOutput, m->numGroups); // Get the size of the xVec elements PrecTerm precTerms = PrecTerms[m->prec]; size_t offset; CUresult result = cuTexRefSetAddress(&offset, k->xVec_texture, xVec, m->width * precTerms.vecSize); if(CUDA_SUCCESS != result) return SPARSE_STATUS_KERNEL_ERROR; // Launch the function uint numBlocks = DivUp(m->numGroups, WarpsPerBlock); result = k->func[IndexFromVT(m->valuesPerThread)]->Launch(numBlocks, 1, callStack); if(CUDA_SUCCESS != result) return SPARSE_STATUS_LAUNCH_ERROR; // Finalize the vector int numFinalizeBlocks = DivUp(m->numGroups, WarpsPerBlock); int useBeta = !IsZero(beta); callStack.Reset(); callStack.Push(m->tempOutput, m->rowIndices, m->height, yVec, alpha, beta, useBeta); result = k->finalize->Launch(numFinalizeBlocks, 1, callStack); if(CUDA_SUCCESS != result) return SPARSE_STATUS_KERNEL_ERROR; return SPARSE_STATUS_SUCCESS; }
void CudaModule::unsetTexRef(const std::string& name) { CUtexref texRef = getTexRef(name); checkError("cuTexRefSetAddress", cuTexRefSetAddress( 0, texRef, 0, 0)); }
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) { CUresult cudastatus = CUDA_SUCCESS; if (nrhs != 2) mexErrMsgTxt("Wrong number of arguments"); if (init == 0) { // Initialize function //mexLock(); // load GPUmat gm = gmGetGPUmat(); // load module CUmodule *drvmod = gmGetModule("examples_texture"); // load float GPU function CUresult status = cuModuleGetFunction(&drvfunf, *drvmod, "LININTERF"); if (CUDA_SUCCESS != status) { mexErrMsgTxt("Unable to load user function."); } // load double GPU function status = cuModuleGetFunction(&drvfund, *drvmod, "LININTERD"); if (CUDA_SUCCESS != status) { mexErrMsgTxt("Unable to load user function."); } // load textures defined in module status = cuModuleGetTexRef(&texf, *drvmod, "texref_f1_a"); if (CUDA_SUCCESS != status) { mexErrMsgTxt("Unable to load texture."); } status = cuModuleGetTexRef(&texd, *drvmod, "texref_d1_a"); if (CUDA_SUCCESS != status) { mexErrMsgTxt("Unable to load texture."); } // no complex function support init = 1; } // mex parameters are: // 1. IN1. Input array // 2. IN2. Input indexes array //IN1 is the input GPU array GPUtype IN1 = gm->gputype.getGPUtype(prhs[0]); //IN2 is the input GPU array GPUtype IN2 = gm->gputype.getGPUtype(prhs[1]); //OUT is the output GPU array (result) // Create of the same size of IN1 gpuTYPE_t in1_t = gm->gputype.getType(IN1); int in1_d = gm->gputype.getNdims(IN1); const int * in1_s = gm->gputype.getSize(IN1); int in1_n = gm->gputype.getNumel(IN1); int in1_b = gm->gputype.getDataSize(IN1); gpuTYPE_t in2_t = gm->gputype.getType(IN2); int in2_d = gm->gputype.getNdims(IN2); const int * in2_s = gm->gputype.getSize(IN2); int in2_n = gm->gputype.getNumel(IN2); if ((in1_t==gpuCFLOAT) || (in1_t==gpuCDOUBLE)) { mexErrMsgTxt("Complex TYPE not supported"); } if (in1_t != in2_t) { mexErrMsgTxt("Input arguments must be of the same type"); } if (in1_n != in2_n) { mexErrMsgTxt("Input arguments must have the same number of elements"); } //OUT is the output GPU array (result) // Create of the same size of IN1 GPUtype OUT = gm->gputype.create(in1_t, in1_d, in1_s, NULL); // I need the pointers to GPU memory CUdeviceptr d_IN1 = (CUdeviceptr) (UINTPTR gm->gputype.getGPUptr(IN1)); CUdeviceptr d_IN2 = (CUdeviceptr) (UINTPTR gm->gputype.getGPUptr(IN2)); CUdeviceptr d_OUT = (CUdeviceptr) (UINTPTR gm->gputype.getGPUptr(OUT)); // The GPU kernel depends on the type of input/output CUfunction drvfun; CUtexref drvtex; CUarray_format_enum drvtexformat; int drvtexnum; if (in1_t == gpuFLOAT) { drvfun = drvfunf; drvtex = texf; drvtexformat = CU_AD_FORMAT_FLOAT; drvtexnum = 1; } else if (in1_t == gpuDOUBLE) { drvfun = drvfund; drvtex = texd; drvtexformat = CU_AD_FORMAT_SIGNED_INT32; drvtexnum = 2; } if (CUDA_SUCCESS != cuTexRefSetFormat(drvtex, drvtexformat, drvtexnum)) { mexErrMsgTxt("Execution error (texture)."); } if (CUDA_SUCCESS != cuTexRefSetAddress(NULL, drvtex, UINTPTR d_IN1, in1_n*in1_b)) { mexErrMsgTxt("Execution error (texture)."); } if (CUDA_SUCCESS != cuParamSetTexRef(drvfun, CU_PARAM_TR_DEFAULT, drvtex)) { mexErrMsgTxt("Execution error (texture1)."); } hostdrv_pars_t gpuprhs[2]; int gpunrhs = 2; gpuprhs[0] = hostdrv_pars(&d_IN2,sizeof(d_IN2),__alignof(d_IN2)); gpuprhs[1] = hostdrv_pars(&d_OUT,sizeof(d_OUT),__alignof(d_OUT)); int N = in1_n; hostGPUDRV(drvfun, N, gpunrhs, gpuprhs); // return result plhs[0] = gm->gputype.createMxArray(OUT); }