__global__ void convolve_kernel(I, kernel_image2d<O> out, unsigned kernelsize) { i_int2 p = thread_pos2d(); if (!out.has(p)) return; bt_change_vtype(O, type_mult(bt_vtype(O), float)) r = zero(); for(int i = 0; i < kernelsize; i++) { float w = tex1Dfetch(tex_weights, i); point2d<int> n = i_int2(tex1Dfetch(tex_dpoints, i)) + p; if (out.has(n)) r += O(tex2D(conv_input_tex<I>::tex(), n)) * w; } out(p) = r; }
__device__ float SpMV_Ellpack_device(const float * vals, const int * colIdx, const int * rowLength, const int row, const int numRows) { const int num_rows =numRows; int maxEl = rowLength[row]; float dot=0; int col=-1; float val=0; int i=0; for(i=0; i<maxEl;i++) { col=colIdx[num_rows*i+row]; val= vals[num_rows*i+row]; dot+=val*tex1Dfetch(mainVecTexRef,col); } return dot; }